PlanOpticon

planopticon / video_processor / providers / whisper_local.py

Blame History Raw 134 lines

1	`"""Local Whisper transcription provider — runs on-device with GPU acceleration."""`
2
3	`import logging`
4	`from pathlib import Path`
5	`from typing import Optional`
6
7	`logger = logging.getLogger(__name__)`
8
9	`# Model size → approximate VRAM/RAM usage`
10	`_MODEL_SIZES = {`
11	`"tiny": "~1GB",`
12	`"base": "~1GB",`
13	`"small": "~2GB",`
14	`"medium": "~5GB",`
15	`"large": "~10GB",`
16	`"turbo": "~6GB",`
17	`}`
18
19
20	`class WhisperLocal:`
21	`"""`
22	`Local Whisper transcription using openai-whisper.`
23
24	`Uses MPS (Apple Silicon) or CUDA when available, falls back to CPU.`
25	`No file size limits — processes audio directly on device.`
26	`"""`
27
28	`def __init__(self, model_size: str = "large", device: Optional[str] = None):`
29	`"""`
30	`Initialize local Whisper.`
31
32	`Parameters`
33	`----------`
34	`model_size : str`
35	`Whisper model size: tiny, base, small, medium, large, turbo`
36	`device : str, optional`
37	`Force device: 'mps', 'cuda', 'cpu'. Auto-detects if None.`
38	`"""`
39	`self.model_size = model_size`
40	`self._model = None`
41
42	`if device:`
43	`self.device = device`
44	`else:`
45	`self.device = self._detect_device()`
46
47	`logger.info(`
48	`f"WhisperLocal: model={model_size} ({_MODEL_SIZES.get(model_size, '?')}), "`
49	`f"device={self.device}"`
50	`)`
51
52	`@staticmethod`
53	`def _detect_device() -> str:`
54	`"""Auto-detect the best available device."""`
55	`try:`
56	`import torch`
57
58	`if torch.cuda.is_available():`
59	`return "cuda"`
60	`if torch.backends.mps.is_available():`
61	`return "mps"`
62	`except ImportError:`
63	`pass`
64	`return "cpu"`
65
66	`def _load_model(self):`
67	`"""Lazy-load the Whisper model."""`
68	`if self._model is not None:`
69	`return`
70
71	`try:`
72	`import whisper`
73	`except ImportError:`
74	`raise ImportError("openai-whisper not installed. Run: pip install openai-whisper torch")`
75
76	`logger.info(f"Loading Whisper {self.model_size} model on {self.device}...")`
77	`self._model = whisper.load_model(self.model_size, device=self.device)`
78	`logger.info("Whisper model loaded")`
79
80	`def transcribe(`
81	`self,`
82	`audio_path: str \| Path,`
83	`language: Optional[str] = None,`
84	`) -> dict:`
85	`"""`
86	`Transcribe audio using local Whisper.`
87
88	`No file size limits. Runs entirely on device.`
89
90	`Returns dict compatible with ProviderManager transcription format.`
91	`"""`
92	`self._load_model()`
93	`audio_path = Path(audio_path)`
94
95	`logger.info(f"Transcribing {audio_path.name} with Whisper {self.model_size}...")`
96
97	`# fp16 only works reliably on CUDA; MPS produces NaN with large models`
98	`kwargs = {"fp16": self.device == "cuda"}`
99	`if language:`
100	`kwargs["language"] = language`
101
102	`result = self._model.transcribe(str(audio_path), **kwargs)`
103
104	`segments = [`
105	`{`
106	`"start": seg["start"],`
107	`"end": seg["end"],`
108	`"text": seg["text"].strip(),`
109	`}`
110	`for seg in result.get("segments", [])`
111	`]`
112
113	`duration = segments[-1]["end"] if segments else None`
114
115	`return {`
116	`"text": result.get("text", "").strip(),`
117	`"segments": segments,`
118	`"language": result.get("language", language),`
119	`"duration": duration,`
120	`"provider": "whisper-local",`
121	`"model": f"whisper-{self.model_size}",`
122	`}`
123
124	`@staticmethod`
125	`def is_available() -> bool:`
126	`"""Check if local Whisper is installed and usable."""`
127	`try:`
128	`import torch # noqa: F401`
129	`import whisper # noqa: F401`
130
131	`return True`
132	`except ImportError:`
133	`return False`
134

PlanOpticon

Keyboard Shortcuts