I'm burnt out, I can't get multithreaded audio feature extractor to work :(
This commit is contained in:
95
mtafe_lab/audiopreprocessing.py
Normal file
95
mtafe_lab/audiopreprocessing.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import librosa
|
||||
import pickle
|
||||
import os
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def triggerlog():
|
||||
logger.critical("Testing: info")
|
||||
|
||||
def resample_load(input_path : Path, target_sr : int = 16000, mono_audio : bool = False) -> np.ndarray: # AI
|
||||
"""Load and resamples the audio into `target_sr`.
|
||||
|
||||
Args:
|
||||
input_path (Path): pathlib.Path object to audio file
|
||||
target_sr (int, optional): Target Sample Rate to resample. Defaults to 16000.
|
||||
mono_audio (bool, optional): Load the audio in mono mode. Defaults to False.
|
||||
|
||||
Returns:
|
||||
np.ndarray: _description_
|
||||
"""
|
||||
# Load audio file with original sample rate
|
||||
logger.info(f"[resample_load] Loading audio {input_path}")
|
||||
audio, orig_sr = librosa.load(input_path, sr=None, mono=mono_audio)
|
||||
|
||||
# Resample if necessary
|
||||
if orig_sr != target_sr:
|
||||
logger.info(f"[resample_load] Resampling to {target_sr}")
|
||||
audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
|
||||
|
||||
return audio
|
||||
|
||||
def chunk_audio(audio : np.ndarray, sr: int, chunk_length: float = 10.0, overlap: float = 2.0) -> tuple[list[np.ndarray], list[float], int]: # AI
|
||||
"""
|
||||
Chunks audio file into overlapping segments. Only pass in mono audio here.
|
||||
|
||||
Args:
|
||||
audio_file: Loaded audio ndarray (one channel only)
|
||||
sr: Sample rate for the given audio file
|
||||
chunk_length: Length of each chunk in seconds
|
||||
overlap: Overlap between chunks in seconds
|
||||
|
||||
Returns:
|
||||
List of audio chunks, list of chunk positions, and given sample rate
|
||||
"""
|
||||
logger.info(f"[chunk_audio] Chunking audio ({len(audio) / sr}s)")
|
||||
# Calculate chunk size and hop length in samples
|
||||
chunk_size = int(chunk_length * sr)
|
||||
hop_length = int((chunk_length - overlap) * sr)
|
||||
|
||||
# Generate chunks
|
||||
chunks = []
|
||||
positions = []
|
||||
k = 0
|
||||
for i in range(0, len(audio) - chunk_size + 1, hop_length):
|
||||
chunk = audio[i:i + chunk_size]
|
||||
chunks.append(chunk)
|
||||
positions.append(i / sr)
|
||||
k += 1
|
||||
if k == 0: # The full audio length is less than chunk_length
|
||||
chunks = [audio]
|
||||
positions = [0.0]
|
||||
logger.info(f"[chunk_audio] Audio less than chunk_length. Returning original audio as chunk\r")
|
||||
else:
|
||||
logger.info(f"[chunk_audio] Audio is split into {k} chunks")
|
||||
|
||||
return chunks, positions, sr
|
||||
|
||||
def load_preprocessed_audio(
|
||||
path: Path,
|
||||
desired_sr: int,
|
||||
mono: bool = False,
|
||||
chunk_length: float = 15.0,
|
||||
overlap: float = 2.0) -> list[tuple[np.ndarray, float, int]]:
|
||||
|
||||
result = []
|
||||
# Load and resample audio
|
||||
audio = resample_load(path, desired_sr, mono) # Stereo 2D matrix, Mono 1D array
|
||||
if mono or (audio.ndim == 1):
|
||||
# Chunk audio: mono (or the audio file loaded in itself is mono)
|
||||
chunks, positions, _ = chunk_audio(audio, desired_sr, chunk_length, overlap)
|
||||
assert len(chunks) == len(positions)
|
||||
result.extend(zip(chunks, positions, [-1 for _ in range(len(chunks))]))
|
||||
# (ndarray_chunk1, pos1, -1): first audio chunk, position1, -1 (Mono channel indicator)
|
||||
else:
|
||||
# Chunk audio: stereo/multichannel
|
||||
for channel_id, channel_audio in enumerate(audio):
|
||||
chunks, positions, _ = chunk_audio(channel_audio, desired_sr, chunk_length, overlap)
|
||||
assert len(chunks) == len(positions)
|
||||
result.extend(zip(chunks, positions, [channel_id for _ in range(len(chunks))]))
|
||||
# (ndarray_chunk1, pos1, 0): first audio chunk, position1, 0 (channel 0)
|
||||
logging.info(f"[load_preprocessed_audio] Loaded audio {path} ({desired_sr}Hz, Chunk {chunk_length}s with overlap {overlap}s) MONO:{mono}")
|
||||
return result
|
||||
135
mtafe_lab/dataset.py
Normal file
135
mtafe_lab/dataset.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import platform
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
import multiprocessing
|
||||
import threading
|
||||
import time
|
||||
import concurrent.futures
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import audiopreprocessing
|
||||
import logging
|
||||
import queue
|
||||
|
||||
def serialize_dict_obj(path : Path, object : dict) -> int:
|
||||
"""Serializes Python Dictionary object to a file via Pickle.
|
||||
|
||||
Args:
|
||||
path (Path): Path to store the file
|
||||
object (dict): Dictionary object to serialize
|
||||
Returns:
|
||||
int: size in bytes written
|
||||
"""
|
||||
# Horrible practice, horrible security, but it will work for now
|
||||
with path.open("wb") as fp:
|
||||
pickle.dump(object, fp)
|
||||
fp.seek(0, os.SEEK_END)
|
||||
size = fp.tell()
|
||||
return size
|
||||
|
||||
logging.info("Reading local dataset directory structure...")
|
||||
|
||||
ASMRThreePath = Path("C:\\ASMRThree")
|
||||
ASMRTwoPath = Path("D:\\ASMRTwo")
|
||||
ASMROnePath = Path("E:\\ASMROne")
|
||||
|
||||
if (platform.system() == 'Linux'):
|
||||
ASMROnePath = Path('/mnt/Scratchpad/ASMROne')
|
||||
ASMRTwoPath = Path('/mnt/MyStuffz/ASMRTwo')
|
||||
ASMRThreePath = Path('/mnt/Windows11/ASMRThree')
|
||||
|
||||
size_one, size_two, size_three = 0, 0, 0
|
||||
files_one, files_two, files_three = [], [], []
|
||||
folders_one, folders_two, folders_three = [], [], []
|
||||
|
||||
# Statistic calculation for ASMROne
|
||||
for root, dirs, files in ASMROnePath.walk(): # Root will iterate through all folders
|
||||
if root.absolute() != ASMROnePath.absolute(): # Skip root of ASMROnePath
|
||||
folders_one.append(root) # Add folder to list
|
||||
for fname in files: # Iterate through all files in current root
|
||||
file = root/fname # Get file path
|
||||
assert file.is_file()
|
||||
files_one.append(file)
|
||||
size_one += file.stat().st_size # Get file size
|
||||
|
||||
# Statistic calculation for ASMRTwo
|
||||
for root, dirs, files in ASMRTwoPath.walk(): # Root will iterate through all folders
|
||||
if root.absolute() != ASMRTwoPath.absolute(): # Skip root of ASMRTwoPath
|
||||
folders_two.append(root) # Add folder to list
|
||||
for fname in files: # Iterate through all files in current root
|
||||
file = root/fname # Get file path
|
||||
assert file.is_file()
|
||||
files_two.append(file)
|
||||
size_two += file.stat().st_size # Get file size
|
||||
|
||||
# Statistic calculation for ASMRThree
|
||||
for root, dirs, files in ASMRThreePath.walk(): # Root will iterate through all folders
|
||||
if root.absolute() != ASMRThreePath.absolute(): # Skip root of ASMRThreePath
|
||||
folders_three.append(root) # Add folder to list
|
||||
for fname in files: # Iterate through all files in current root
|
||||
file = root/fname # Get file path
|
||||
assert file.is_file()
|
||||
files_three.append(file)
|
||||
size_three += file.stat().st_size # Get file size
|
||||
|
||||
DataSubsetPaths = [ASMROnePath, ASMRTwoPath, ASMRThreePath]
|
||||
DLSiteWorksPaths = []
|
||||
# Collect ASMR Works (RJ ID, Paths)
|
||||
for ASMRSubsetPath in DataSubsetPaths:
|
||||
for WorkPaths in ASMRSubsetPath.iterdir():
|
||||
DLSiteWorksPaths.append(WorkPaths)
|
||||
|
||||
fileExt2fileType = {
|
||||
".TXT": "Document",
|
||||
".WAV": "Audio",
|
||||
".MP3": "Audio",
|
||||
".PNG": "Image",
|
||||
".JPG": "Image",
|
||||
".VTT": "Subtitle",
|
||||
".PDF": "Document",
|
||||
".FLAC": "Audio",
|
||||
".MP4": "Video",
|
||||
".LRC": "Subtitle",
|
||||
".SRT": "Subtitle",
|
||||
".JPEG": "Image",
|
||||
".ASS": "Subtitle",
|
||||
"": "NO EXTENSION",
|
||||
".M4A": "Audio",
|
||||
".MKV": "Video"
|
||||
}
|
||||
fileext_stat = {}
|
||||
file_list = files_one + files_two + files_three
|
||||
file_list_count = len(file_list)
|
||||
|
||||
for file in file_list:
|
||||
f_ext = file.suffix.upper()
|
||||
if (f_ext in fileext_stat.keys()):
|
||||
fileext_stat[f_ext]['Count'] += 1
|
||||
fileext_stat[f_ext]['List'].append(file)
|
||||
fileext_stat[f_ext]['ExtensionMass'] += file.stat().st_size
|
||||
else:
|
||||
fileext_stat[f_ext] = {}
|
||||
fileext_stat[f_ext]['Count'] = 1
|
||||
fileext_stat[f_ext]['List'] = [file]
|
||||
fileext_stat[f_ext]['ExtensionMass'] = file.stat().st_size # The total sum of sizes of the same file extension
|
||||
fileext_stat[f_ext]['MediaType'] = fileExt2fileType[f_ext]
|
||||
|
||||
audio_paths = []
|
||||
for extension in fileext_stat: # I can't be bothered to convert this into a list compresion
|
||||
if fileext_stat[extension]['MediaType'] == "Audio":
|
||||
audio_paths += fileext_stat[extension]['List']
|
||||
|
||||
def random_audio_chunk(n : int, seed : int = 177013) -> list[Path]:
|
||||
"""Returns a random selection of audio files
|
||||
|
||||
Args:
|
||||
n (int): Amount of files to return
|
||||
seed (int, optional): Seed for RNG. Defaults to 177013.
|
||||
|
||||
Returns:
|
||||
list[Path]: List of randomly selected audio paths (using Path object)
|
||||
"""
|
||||
random.seed(seed)
|
||||
#return random.choices(audio_paths, k=n) # Contains repeated elements
|
||||
return random.sample(audio_paths, k=n)
|
||||
32
mtafe_lab/mtafe.py
Normal file
32
mtafe_lab/mtafe.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import logging
|
||||
logging.basicConfig(format="%(asctime)s/%(levelname)s: [%(module)s] %(message)s", level=logging.INFO)
|
||||
|
||||
import multiprocessing
|
||||
import multiprocessing.process
|
||||
import dataset
|
||||
import audiopreprocessing
|
||||
from pathlib import Path
|
||||
|
||||
def copy_worker(origin_queue, target_queue):
|
||||
p = origin_queue.get()
|
||||
logging.info(f"Processing: {p}")
|
||||
l = audiopreprocessing.load_preprocessed_audio(p, 32000, True)
|
||||
print("Preprocess complete, putting it into queue")
|
||||
target_queue.put(l) # Even on a small scale test, the process will always hang here
|
||||
|
||||
if __name__ == "__main__":
|
||||
audio_path_queue = multiprocessing.Queue()
|
||||
audio_queue = multiprocessing.Queue()
|
||||
|
||||
rand_paths = dataset.random_audio_chunk(1)
|
||||
for p in rand_paths:
|
||||
audio_path_queue.put(p)
|
||||
|
||||
print("Files queued")
|
||||
|
||||
processes = [multiprocessing.Process(target=copy_worker, args=(audio_path_queue, audio_queue)) for _ in range(1)]
|
||||
for p in processes: p.start()
|
||||
for p in processes: p.join()
|
||||
|
||||
print("Joined")
|
||||
#for _ in range(1): print(audio_queue.get())
|
||||
30
mtafe_lab/test_mp.py
Normal file
30
mtafe_lab/test_mp.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import logging
|
||||
logging.basicConfig(format="%(asctime)s/%(levelname)s: [%(module)s] %(message)s", level=logging.INFO)
|
||||
|
||||
import multiprocessing
|
||||
from dataset import random_audio_chunk
|
||||
import audiopreprocessing
|
||||
from time import sleep
|
||||
|
||||
origin_queue = multiprocessing.Queue()
|
||||
target_queue = multiprocessing.Queue()
|
||||
|
||||
def worker(orig, targ):
|
||||
p = orig.get()
|
||||
#out = "PROCESSED" + str(p.absolute())
|
||||
out = audiopreprocessing.load_preprocessed_audio(p, 16000, True) # This will cause put to hang
|
||||
targ.put(out) # This will hang the process
|
||||
|
||||
if __name__ == "__main__":
|
||||
K = 2
|
||||
|
||||
for p in random_audio_chunk(K):
|
||||
origin_queue.put(p)
|
||||
|
||||
processes = [multiprocessing.Process(target=worker, args=(origin_queue, target_queue)) for _ in range(K)]
|
||||
for p in processes: p.start()
|
||||
for p in processes: p.join()
|
||||
|
||||
logging.critical("Successfully terminated all threads")
|
||||
|
||||
for _ in range(K): print(target_queue.get())
|
||||
21
mtafe_lab/test_mtafe.py
Normal file
21
mtafe_lab/test_mtafe.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import logging
|
||||
logging.basicConfig(format="%(asctime)s/%(levelname)s: [%(module)s] %(message)s", level=logging.INFO)
|
||||
|
||||
import mtafe
|
||||
from dataset import random_audio_chunk
|
||||
|
||||
logging.info("Generating random audio path list")
|
||||
rdpl = random_audio_chunk(2)
|
||||
|
||||
logging.info("Initializing MTAFE")
|
||||
mtafe.initialize_parameters(
|
||||
paudio_paths=rdpl,
|
||||
pmax_audio_in_queue=4,
|
||||
paudio_feeder_threads=2,
|
||||
pfeature_extractor_threads=1,
|
||||
pdesired_sr=32000,
|
||||
pforce_mono=False,
|
||||
pchunk_length=15,
|
||||
pchunk_overlap=2
|
||||
)
|
||||
mtafe.test_feeder()
|
||||
Reference in New Issue
Block a user