I'm burnt out, I can't get multithreaded audio feature extractor to work :(

2025-04-19 17:47:09 +02:00
parent b855b7e255
commit 37b6a3c5e7
9 changed files with 563 additions and 180 deletions
--- a/mtafe_lab/audiopreprocessing.py
+++ b/mtafe_lab/audiopreprocessing.py
@@ -0,0 +1,95 @@
+import librosa
+import pickle
+import os
+import numpy as np
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+def triggerlog():
+    logger.critical("Testing: info")
+
+def resample_load(input_path : Path, target_sr : int = 16000, mono_audio : bool = False) -> np.ndarray: # AI
+    """Load and resamples the audio into `target_sr`.
+
+    Args:
+        input_path (Path): pathlib.Path object to audio file
+        target_sr (int, optional): Target Sample Rate to resample. Defaults to 16000.
+        mono_audio (bool, optional): Load the audio in mono mode. Defaults to False.
+
+    Returns:
+        np.ndarray: _description_
+    """
+    # Load audio file with original sample rate
+    logger.info(f"[resample_load] Loading audio {input_path}")
+    audio, orig_sr = librosa.load(input_path, sr=None, mono=mono_audio)
+    
+    # Resample if necessary
+    if orig_sr != target_sr:
+        logger.info(f"[resample_load] Resampling to {target_sr}")
+        audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
+        
+    return audio
+
+def chunk_audio(audio : np.ndarray, sr: int, chunk_length: float = 10.0, overlap: float = 2.0) -> tuple[list[np.ndarray], list[float], int]: # AI
+    """
+    Chunks audio file into overlapping segments. Only pass in mono audio here.
+    
+    Args:
+        audio_file: Loaded audio ndarray (one channel only)
+        sr: Sample rate for the given audio file
+        chunk_length: Length of each chunk in seconds
+        overlap: Overlap between chunks in seconds
+    
+    Returns:
+        List of audio chunks, list of chunk positions, and given sample rate
+    """
+    logger.info(f"[chunk_audio] Chunking audio ({len(audio) / sr}s)")
+    # Calculate chunk size and hop length in samples
+    chunk_size = int(chunk_length * sr)
+    hop_length = int((chunk_length - overlap) * sr)
+    
+    # Generate chunks
+    chunks = []
+    positions = []
+    k = 0
+    for i in range(0, len(audio) - chunk_size + 1, hop_length):
+        chunk = audio[i:i + chunk_size]
+        chunks.append(chunk)
+        positions.append(i / sr)
+        k += 1
+    if k == 0: # The full audio length is less than chunk_length
+        chunks = [audio]
+        positions = [0.0]
+        logger.info(f"[chunk_audio] Audio less than chunk_length. Returning original audio as chunk\r")
+    else:
+        logger.info(f"[chunk_audio] Audio is split into {k} chunks")
+    
+    return chunks, positions, sr
+
+def load_preprocessed_audio(
+    path: Path,
+    desired_sr: int,
+    mono: bool = False,
+    chunk_length: float = 15.0,
+    overlap: float = 2.0) -> list[tuple[np.ndarray, float, int]]:
+    
+    result = []
+    # Load and resample audio
+    audio = resample_load(path, desired_sr, mono) # Stereo 2D matrix, Mono 1D array
+    if mono or (audio.ndim == 1):
+        # Chunk audio: mono (or the audio file loaded in itself is mono)
+        chunks, positions, _ = chunk_audio(audio, desired_sr, chunk_length, overlap)
+        assert len(chunks) == len(positions)
+        result.extend(zip(chunks, positions, [-1 for _ in range(len(chunks))]))
+        # (ndarray_chunk1, pos1, -1): first audio chunk, position1, -1 (Mono channel indicator)
+    else:
+        # Chunk audio: stereo/multichannel
+        for channel_id, channel_audio in enumerate(audio):
+            chunks, positions, _ = chunk_audio(channel_audio, desired_sr, chunk_length, overlap)
+            assert len(chunks) == len(positions)
+            result.extend(zip(chunks, positions, [channel_id for _ in range(len(chunks))]))
+            # (ndarray_chunk1, pos1, 0): first audio chunk, position1, 0 (channel 0)
+    logging.info(f"[load_preprocessed_audio] Loaded audio {path} ({desired_sr}Hz, Chunk {chunk_length}s with overlap {overlap}s) MONO:{mono}")
+    return result