processing framework

This commit is contained in:
2025-04-18 21:07:16 +02:00
parent 6fc6df87b2
commit b855b7e255
7 changed files with 8579 additions and 10682 deletions

View File

@@ -3,18 +3,31 @@ import pickle
import os
import numpy as np
from pathlib import Path
import logging
DEBUG=True
logger = logging.getLogger(__name__)
def triggerlog():
logger.critical("Testing: info")
def resample_load(input_path : Path, target_sr : int = 16000, mono_audio : bool = False) -> np.ndarray: # AI
"""Resample audio to target sample rate and save to output directory"""
"""Load and resamples the audio into `target_sr`.
Args:
input_path (Path): pathlib.Path object to audio file
target_sr (int, optional): Target Sample Rate to resample. Defaults to 16000.
mono_audio (bool, optional): Load the audio in mono mode. Defaults to False.
Returns:
np.ndarray: _description_
"""
# Load audio file with original sample rate
if DEBUG: print("[resample_load] Loading audio", input_path)
logger.info(f"[resample_load] Loading audio {input_path}")
audio, orig_sr = librosa.load(input_path, sr=None, mono=mono_audio)
# Resample if necessary
if orig_sr != target_sr:
if DEBUG: print("[resample_load] Resampling to", target_sr)
logger.info(f"[resample_load] Resampling to {target_sr}")
audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
return audio
@@ -24,7 +37,7 @@ def chunk_audio(audio : np.ndarray, sr: int, chunk_length: float = 10.0, overlap
Chunks audio file into overlapping segments. Only pass in mono audio here.
Args:
audio_file: Loaded audio ndarray
audio_file: Loaded audio ndarray (one channel only)
sr: Sample rate for the given audio file
chunk_length: Length of each chunk in seconds
overlap: Overlap between chunks in seconds
@@ -32,7 +45,7 @@ def chunk_audio(audio : np.ndarray, sr: int, chunk_length: float = 10.0, overlap
Returns:
List of audio chunks, list of chunk positions, and given sample rate
"""
if DEBUG: print("[chunk_audio] Chunking audio")
logger.info(f"[chunk_audio] Chunking audio ({len(audio) / sr}s)")
# Calculate chunk size and hop length in samples
chunk_size = int(chunk_length * sr)
hop_length = int((chunk_length - overlap) * sr)
@@ -46,10 +59,12 @@ def chunk_audio(audio : np.ndarray, sr: int, chunk_length: float = 10.0, overlap
chunks.append(chunk)
positions.append(i / sr)
k += 1
if DEBUG: print("[chunk_audio] Chunked", k, end="\r")
if k == 0: # The full audio length is less than chunk_length
chunks = [audio]
positions = [0.0]
logger.info(f"[chunk_audio] Audio less than chunk_length. Returning original audio as chunk\r")
else:
logger.info(f"[chunk_audio] Audio is split into {k} chunks")
return chunks, positions, sr