|
287a3bb…
|
leo
|
1 |
"""Tests for the audio extractor module.""" |
|
829e24a…
|
leo
|
2 |
|
|
287a3bb…
|
leo
|
3 |
import tempfile |
|
287a3bb…
|
leo
|
4 |
from pathlib import Path |
|
829e24a…
|
leo
|
5 |
from unittest.mock import MagicMock, patch |
|
287a3bb…
|
leo
|
6 |
|
|
287a3bb…
|
leo
|
7 |
import numpy as np |
|
287a3bb…
|
leo
|
8 |
|
|
287a3bb…
|
leo
|
9 |
from video_processor.extractors.audio_extractor import AudioExtractor |
|
287a3bb…
|
leo
|
10 |
|
|
829e24a…
|
leo
|
11 |
|
|
287a3bb…
|
leo
|
12 |
class TestAudioExtractor: |
|
287a3bb…
|
leo
|
13 |
"""Test suite for AudioExtractor class.""" |
|
829e24a…
|
leo
|
14 |
|
|
287a3bb…
|
leo
|
15 |
def test_init(self): |
|
287a3bb…
|
leo
|
16 |
"""Test initialization of AudioExtractor.""" |
|
287a3bb…
|
leo
|
17 |
# Default parameters |
|
287a3bb…
|
leo
|
18 |
extractor = AudioExtractor() |
|
287a3bb…
|
leo
|
19 |
assert extractor.sample_rate == 16000 |
|
287a3bb…
|
leo
|
20 |
assert extractor.mono is True |
|
829e24a…
|
leo
|
21 |
|
|
287a3bb…
|
leo
|
22 |
# Custom parameters |
|
287a3bb…
|
leo
|
23 |
extractor = AudioExtractor(sample_rate=44100, mono=False) |
|
287a3bb…
|
leo
|
24 |
assert extractor.sample_rate == 44100 |
|
287a3bb…
|
leo
|
25 |
assert extractor.mono is False |
|
829e24a…
|
leo
|
26 |
|
|
829e24a…
|
leo
|
27 |
@patch("subprocess.run") |
|
287a3bb…
|
leo
|
28 |
def test_extract_audio(self, mock_run): |
|
287a3bb…
|
leo
|
29 |
"""Test audio extraction from video.""" |
|
287a3bb…
|
leo
|
30 |
# Mock the subprocess.run call |
|
287a3bb…
|
leo
|
31 |
mock_result = MagicMock() |
|
287a3bb…
|
leo
|
32 |
mock_result.returncode = 0 |
|
287a3bb…
|
leo
|
33 |
mock_run.return_value = mock_result |
|
829e24a…
|
leo
|
34 |
|
|
287a3bb…
|
leo
|
35 |
with tempfile.TemporaryDirectory() as temp_dir: |
|
287a3bb…
|
leo
|
36 |
# Create a dummy video file |
|
287a3bb…
|
leo
|
37 |
video_path = Path(temp_dir) / "test_video.mp4" |
|
287a3bb…
|
leo
|
38 |
with open(video_path, "wb") as f: |
|
287a3bb…
|
leo
|
39 |
f.write(b"dummy video content") |
|
829e24a…
|
leo
|
40 |
|
|
287a3bb…
|
leo
|
41 |
# Extract audio |
|
287a3bb…
|
leo
|
42 |
extractor = AudioExtractor() |
|
829e24a…
|
leo
|
43 |
|
|
287a3bb…
|
leo
|
44 |
# Test with default output path |
|
287a3bb…
|
leo
|
45 |
output_path = extractor.extract_audio(video_path) |
|
287a3bb…
|
leo
|
46 |
assert output_path == video_path.with_suffix(".wav") |
|
829e24a…
|
leo
|
47 |
|
|
287a3bb…
|
leo
|
48 |
# Test with custom output path |
|
287a3bb…
|
leo
|
49 |
custom_output = Path(temp_dir) / "custom_audio.wav" |
|
287a3bb…
|
leo
|
50 |
output_path = extractor.extract_audio(video_path, custom_output) |
|
287a3bb…
|
leo
|
51 |
assert output_path == custom_output |
|
829e24a…
|
leo
|
52 |
|
|
287a3bb…
|
leo
|
53 |
# Verify subprocess.run was called with correct arguments |
|
287a3bb…
|
leo
|
54 |
mock_run.assert_called() |
|
287a3bb…
|
leo
|
55 |
args, kwargs = mock_run.call_args |
|
287a3bb…
|
leo
|
56 |
assert "ffmpeg" in args[0] |
|
287a3bb…
|
leo
|
57 |
assert "-i" in args[0] |
|
287a3bb…
|
leo
|
58 |
assert str(video_path) in args[0] |
|
829e24a…
|
leo
|
59 |
|
|
829e24a…
|
leo
|
60 |
@patch("soundfile.info") |
|
287a3bb…
|
leo
|
61 |
def test_get_audio_properties(self, mock_sf_info): |
|
287a3bb…
|
leo
|
62 |
"""Test getting audio properties.""" |
|
287a3bb…
|
leo
|
63 |
# Mock soundfile.info |
|
287a3bb…
|
leo
|
64 |
mock_info = MagicMock() |
|
287a3bb…
|
leo
|
65 |
mock_info.duration = 10.5 |
|
287a3bb…
|
leo
|
66 |
mock_info.samplerate = 16000 |
|
287a3bb…
|
leo
|
67 |
mock_info.channels = 1 |
|
287a3bb…
|
leo
|
68 |
mock_info.format = "WAV" |
|
287a3bb…
|
leo
|
69 |
mock_info.subtype = "PCM_16" |
|
287a3bb…
|
leo
|
70 |
mock_sf_info.return_value = mock_info |
|
829e24a…
|
leo
|
71 |
|
|
287a3bb…
|
leo
|
72 |
with tempfile.TemporaryDirectory() as temp_dir: |
|
287a3bb…
|
leo
|
73 |
# Create a dummy audio file |
|
287a3bb…
|
leo
|
74 |
audio_path = Path(temp_dir) / "test_audio.wav" |
|
287a3bb…
|
leo
|
75 |
with open(audio_path, "wb") as f: |
|
287a3bb…
|
leo
|
76 |
f.write(b"dummy audio content") |
|
829e24a…
|
leo
|
77 |
|
|
287a3bb…
|
leo
|
78 |
# Get properties |
|
287a3bb…
|
leo
|
79 |
extractor = AudioExtractor() |
|
287a3bb…
|
leo
|
80 |
props = extractor.get_audio_properties(audio_path) |
|
829e24a…
|
leo
|
81 |
|
|
287a3bb…
|
leo
|
82 |
# Verify properties |
|
287a3bb…
|
leo
|
83 |
assert props["duration"] == 10.5 |
|
287a3bb…
|
leo
|
84 |
assert props["sample_rate"] == 16000 |
|
287a3bb…
|
leo
|
85 |
assert props["channels"] == 1 |
|
287a3bb…
|
leo
|
86 |
assert props["format"] == "WAV" |
|
287a3bb…
|
leo
|
87 |
assert props["subtype"] == "PCM_16" |
|
287a3bb…
|
leo
|
88 |
assert props["path"] == str(audio_path) |
|
829e24a…
|
leo
|
89 |
|
|
287a3bb…
|
leo
|
90 |
def test_segment_audio(self): |
|
287a3bb…
|
leo
|
91 |
"""Test audio segmentation.""" |
|
287a3bb…
|
leo
|
92 |
# Create a dummy audio array (1 second at 16kHz) |
|
287a3bb…
|
leo
|
93 |
audio_data = np.ones(16000) |
|
287a3bb…
|
leo
|
94 |
sample_rate = 16000 |
|
829e24a…
|
leo
|
95 |
|
|
287a3bb…
|
leo
|
96 |
extractor = AudioExtractor() |
|
829e24a…
|
leo
|
97 |
|
|
287a3bb…
|
leo
|
98 |
# Test with 500ms segments, no overlap |
|
287a3bb…
|
leo
|
99 |
segments = extractor.segment_audio( |
|
829e24a…
|
leo
|
100 |
audio_data, sample_rate, segment_length_ms=500, overlap_ms=0 |
|
287a3bb…
|
leo
|
101 |
) |
|
829e24a…
|
leo
|
102 |
|
|
287a3bb…
|
leo
|
103 |
# Should produce 2 segments of 8000 samples each |
|
287a3bb…
|
leo
|
104 |
assert len(segments) == 2 |
|
287a3bb…
|
leo
|
105 |
assert len(segments[0]) == 8000 |
|
287a3bb…
|
leo
|
106 |
assert len(segments[1]) == 8000 |
|
829e24a…
|
leo
|
107 |
|
|
287a3bb…
|
leo
|
108 |
# Test with 600ms segments, 100ms overlap |
|
287a3bb…
|
leo
|
109 |
segments = extractor.segment_audio( |
|
829e24a…
|
leo
|
110 |
audio_data, sample_rate, segment_length_ms=600, overlap_ms=100 |
|
829e24a…
|
leo
|
111 |
) |
|
829e24a…
|
leo
|
112 |
|
|
829e24a…
|
leo
|
113 |
# Should produce 2 segments (with overlap) |
|
829e24a…
|
leo
|
114 |
assert len(segments) == 2 |