yamlargparse
torchaudio
soundfile
tqdm
scipy
numpy
einops
rotary_embedding_torch
torchinfo
packaging
typing
yamlargparse
librosa
opencv-python
python_speech_features
scenedetect
torchvision
pydub
matplotlib