from dataset_files import MultiThreadedAudioFeatureExtractor from pathlib import Path from panns_inference import AudioTagging import logging import numpy as np import queue import concurrent.futures import threading import time import audiopreprocessing #import torch #import gc class mtafe_panns(): __audio_queue: queue.Queue[ # List of ... tuple[ # Pair of chunked audio and its path list[tuple[np.ndarray, float, int]], # Chunked audio Path # Path to original audio ] ] # Listed of Chunked/Resampled audio __audio_loader_threads: int # Amount of audio feeder threads __feature_extractor_threads: int # Amount of feature extractor threads (if the method allows) __audio_paths_list: queue.Queue[Path] # Path list to audio __max_audio_in_queue: int # Maximum audio in queue __desired_sr: int __mono: bool __chunk_length: float __overlap: float __features: dict[Path, list[tuple[np.ndarray, float, int]]] # This is a crime, I know __features_lock: threading.Lock __audio_loader_threadpool: list[concurrent.futures.Future] __feature_extractor_threadpool: list[concurrent.futures.Future] __at: AudioTagging __batch_size: int def __init__(self, audio_paths: list[Path], max_audio_in_queue: int = 16, audio_feeder_threads: int = 8, feature_extractor_threads: int = 8, desired_sr: int = 32000, force_mono: bool = False, chunk_length: float = 15.0, chunk_overlap: float = 2.0, batch_size: int = 20 ): # Check if the paths passed in are all valid and add them to queue self.__audio_paths_list = queue.Queue() for p in audio_paths: if not p.is_file(): raise Exception(f"Path '{p.absolute()}' is NOT a valid file!") else: self.__audio_paths_list.put(p) #self.__audio_paths_list.task_done() logging.info(f"[MTAFE] [Constructor] Queued {self.__audio_paths_list.qsize()} files") # Set up private attributes ## Audio preprocessing parameters self.__desired_sr = desired_sr self.__mono = force_mono self.__chunk_length = chunk_length self.__overlap = chunk_overlap ## Extractor/Feeder settings self.__max_audio_in_queue = max_audio_in_queue self.__audio_loader_threads = audio_feeder_threads self.__feature_extractor_threads = feature_extractor_threads ## Set up runtime conditions self.__audio_queue = queue.Queue(maxsize=max_audio_in_queue) self.__features = {} self.__features_lock = threading.Lock() self.__audio_loader_threadpool = [] self.__feature_extractor_threadpool = [] logging.info(f"[MTAFE] [Constructor] Extraction parameters: {desired_sr}Hz, Mono: {force_mono}, Divide into {chunk_length}s chunks with {chunk_overlap}s of overlap") logging.info(f"[MTAFE] [Constructor] Using {audio_feeder_threads} threads for preprocessing audio and {feature_extractor_threads} threads for feature extraction. Max queue size of {max_audio_in_queue} files") logging.info(f"[MTAFE] [Constructor] Initializing PANNs") logging.info(f"[MTAFE] [Constructor] Inferencing with batch size {batch_size}") self.__at = AudioTagging(checkpoint_path=None, device='cuda') self.__batch_size = batch_size def __chunks(self, lst, n): # Stolen straight from Stackoverflow """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i:i + n] def __audio_inference_embedding(self, audio: list[tuple[np.ndarray, float, int]]) -> list[tuple[np.ndarray, float, int]]: audio_chunk_list = [] timepos_list = [] channel_id_list = [] embedding_list = [] # Split into equal sized list for audio_chunk, timepos, channel in audio: audio_chunk_list.append(audio_chunk) timepos_list.append(timepos) channel_id_list.append(channel) # Convert audio_chunk_list into numpy array audio_chunk_list = np.array(audio_chunk_list) #logging.info("[MTAFE] [PANNs] Inferencing...") try: for i, batch in enumerate(self.__chunks(audio_chunk_list, self.__batch_size)): (clipwise_output, embedding) = self.__at.inference(batch) for vect in embedding: # vect: np.ndarray embedding_list.append(vect) logging.info(f"[MTAFE] [PANNs] Inferenced batch {i}") assert len(audio_chunk_list) == len(timepos_list) == len(channel_id_list) == len(embedding_list) except Exception as e: logging.critical("[MTAFE] [PANNs] ERROR! INFERENCE FAILED!!! OR LIST SIZE MISMATCH") logging.critical(e) embedding_list = [None for _ in audio_chunk_list] # Clearing embedding_list and filling it with None return list(zip(embedding_list, channel_id_list, embedding_list)) def __audio_feeder_thread(self, thread_id): while (not self.__audio_paths_list.empty()): new_audio_path = self.__audio_paths_list.get() self.__audio_paths_list.task_done() logging.info(f"[MTAFE] [Audio Feeder {thread_id}] Preprocess: {new_audio_path.absolute()}") new_audio = audiopreprocessing.load_preprocessed_audio( new_audio_path, self.__desired_sr, self.__mono, self.__chunk_length, self.__overlap ) self.__audio_queue.put((new_audio, new_audio_path)) logging.info(f"[MTAFE] [Audio Feeder {thread_id}] Feed: {new_audio_path.absolute()}") logging.info(f"[MTAFE] [Audio Feeder {thread_id}] Thread finished!") def __check_all_audiofeed_thread_finished(self) -> bool: for ft in self.__audio_loader_threadpool: if ft.running(): return False return True def __check_all_featureextractor_thread_finished(self) -> bool: for ft in self.__feature_extractor_threadpool: if ft.running(): return False return True def __feature_extractor_thread(self, thread_id): while (not self.__check_all_audiofeed_thread_finished() or not self.__audio_queue.empty()): if (not self.__audio_queue.empty()): audio_to_process, audio_path = self.__audio_queue.get() self.__audio_queue.task_done() logging.info(f"[MTAFE] [Feature Extractor {thread_id}] Extracting: {audio_path}") features_to_add = self.__audio_inference_embedding(audio_to_process) logging.info(f"[MTAFE] [Feature Extractor {thread_id}] Extracted: {len(features_to_add)} features") with self.__features_lock: self.__features[audio_path] = features_to_add logging.info(f"[MTAFE] [Feature Extractor {thread_id}] Feature Extraction complete for {audio_path} w/ {len(features_to_add)} features") logging.info(f"[MTAFE] [Feature Extractor {thread_id}] Thread finished!") def __count_running_threads(self) -> tuple[int, int]: running_extractors = 0 running_feeders = 0 for ft in self.__feature_extractor_threadpool: if ft.running(): running_extractors += 1 for ft in self.__audio_loader_threadpool: if ft.running(): running_feeders += 1 return (running_feeders, running_extractors) @property def features(self) -> dict[Path, list[tuple[np.ndarray, float, int]]]: return self.__features def extract(self): total_amount = self.__audio_paths_list.qsize() logging.info(f"[MTAFE] [Main] Starting feature extraction for {total_amount} file(s)") t_start = time.perf_counter() with concurrent.futures.ThreadPoolExecutor(max_workers=(self.__audio_loader_threads + self.__feature_extractor_threads)) as executor: for i in range(self.__audio_loader_threads): ld_ft = executor.submit(self.__audio_feeder_thread, i) self.__audio_loader_threadpool.append(ld_ft) for i in range(self.__feature_extractor_threads): ld_ft = executor.submit(self.__feature_extractor_thread, i) self.__feature_extractor_threadpool.append(ld_ft) while ( (not self.__check_all_audiofeed_thread_finished()) and (not self.__check_all_featureextractor_thread_finished()) ): nfeeder, nextract = self.__count_running_threads() print(f"[MTAFE Progress] Processed {len(self.__features)}/{total_amount} (L:{self.__audio_queue.qsize()}/W:{self.__audio_paths_list.qsize()}, LD:{nfeeder}/EXT:{nextract})", end="\r") t_stop = time.perf_counter() logging.info(f"[MTAFE] Processed {len(self.__features)}/{total_amount} (L:{self.__audio_queue.qsize()}/W:{self.__audio_paths_list.qsize()} COMPLETE)") delta_t = t_stop - t_start total_features = sum( [len(self.__features[path]) for path in self.__features] ) logging.info(f"[MTAFE] Extraction complete. Took {delta_t} seconds. Added {total_features} vectors/embeddings")