PlanOpticon

planopticon / video_processor / providers / ollama_provider.py

Blame History Raw 182 lines

1	`"""Ollama provider implementation using OpenAI-compatible API."""`
2
3	`import base64`
4	`import logging`
5	`import os`
6	`from pathlib import Path`
7	`from typing import Optional`
8
9	`import requests`
10	`from openai import OpenAI`
11
12	`from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry`
13
14	`logger = logging.getLogger(__name__)`
15
16	`# Known vision-capable model families (base name before the colon/tag)`
17	`_VISION_FAMILIES = {`
18	`"llava",`
19	`"llava-llama3",`
20	`"llava-phi3",`
21	`"llama3.2-vision",`
22	`"moondream",`
23	`"bakllava",`
24	`"minicpm-v",`
25	`"deepseek-vl",`
26	`"internvl2",`
27	`}`
28
29	`DEFAULT_HOST = "http://localhost:11434"`
30
31
32	`class OllamaProvider(BaseProvider):`
33	`"""Ollama local LLM provider via OpenAI-compatible API."""`
34
35	`provider_name = "ollama"`
36
37	`def __init__(self, host: Optional[str] = None):`
38	`self.host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)`
39	`self.client = OpenAI(`
40	`base_url=f"{self.host}/v1",`
41	`api_key="ollama",`
42	`)`
43	`self._models_cache: Optional[list[ModelInfo]] = None`
44
45	`@staticmethod`
46	`def is_available(host: Optional[str] = None) -> bool:`
47	`"""Check if an Ollama server is running and reachable."""`
48	`host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)`
49	`try:`
50	`resp = requests.get(f"{host}/api/tags", timeout=3)`
51	`return resp.status_code == 200`
52	`except Exception:`
53	`return False`
54
55	`@property`
56	`def _default_model(self) -> str:`
57	`models = self._get_models()`
58	`for m in models:`
59	`if "chat" in m.capabilities:`
60	`return m.id`
61	`return "llama3.2:latest"`
62
63	`@property`
64	`def _default_vision_model(self) -> Optional[str]:`
65	`models = self._get_models()`
66	`for m in models:`
67	`if "vision" in m.capabilities:`
68	`return m.id`
69	`return None`
70
71	`def _get_models(self) -> list[ModelInfo]:`
72	`if self._models_cache is None:`
73	`self._models_cache = self.list_models()`
74	`return self._models_cache`
75
76	`def chat(`
77	`self,`
78	`messages: list[dict],`
79	`max_tokens: int = 4096,`
80	`temperature: float = 0.7,`
81	`model: Optional[str] = None,`
82	`) -> str:`
83	`model = model or self._default_model`
84	`response = self.client.chat.completions.create(`
85	`model=model,`
86	`messages=messages,`
87	`max_tokens=max_tokens,`
88	`temperature=temperature,`
89	`)`
90	`self._last_usage = {`
91	`"input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)`
92	`if response.usage`
93	`else 0,`
94	`"output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)`
95	`if response.usage`
96	`else 0,`
97	`}`
98	`return response.choices[0].message.content or ""`
99
100	`def analyze_image(`
101	`self,`
102	`image_bytes: bytes,`
103	`prompt: str,`
104	`max_tokens: int = 4096,`
105	`model: Optional[str] = None,`
106	`) -> str:`
107	`model = model or self._default_vision_model`
108	`if not model:`
109	`raise RuntimeError(`
110	`"No Ollama vision model available. Install a multimodal model: ollama pull llava"`
111	`)`
112	`b64 = base64.b64encode(image_bytes).decode()`
113	`response = self.client.chat.completions.create(`
114	`model=model,`
115	`messages=[`
116	`{`
117	`"role": "user",`
118	`"content": [`
119	`{"type": "text", "text": prompt},`
120	`{`
121	`"type": "image_url",`
122	`"image_url": {"url": f"data:image/jpeg;base64,{b64}"},`
123	`},`
124	`],`
125	`}`
126	`],`
127	`max_tokens=max_tokens,`
128	`)`
129	`self._last_usage = {`
130	`"input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)`
131	`if response.usage`
132	`else 0,`
133	`"output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)`
134	`if response.usage`
135	`else 0,`
136	`}`
137	`return response.choices[0].message.content or ""`
138
139	`def transcribe_audio(`
140	`self,`
141	`audio_path: str \| Path,`
142	`language: Optional[str] = None,`
143	`model: Optional[str] = None,`
144	`) -> dict:`
145	`raise NotImplementedError(`
146	`"Ollama does not support audio transcription. "`
147	`"Use local Whisper (--transcription-model whisper-local:large) or OpenAI Whisper API."`
148	`)`
149
150	`def list_models(self) -> list[ModelInfo]:`
151	`models = []`
152	`try:`
153	`resp = requests.get(f"{self.host}/api/tags", timeout=5)`
154	`resp.raise_for_status()`
155	`data = resp.json()`
156	`for m in data.get("models", []):`
157	`name = m.get("name", "")`
158	`caps = ["chat"]`
159	`base_name = name.split(":")[0].lower()`
160	`if base_name in _VISION_FAMILIES or "vision" in base_name:`
161	`caps.append("vision")`
162	`models.append(`
163	`ModelInfo(`
164	`id=name,`
165	`provider="ollama",`
166	`display_name=name,`
167	`capabilities=caps,`
168	`)`
169	`)`
170	`except Exception as e:`
171	`logger.warning(f"Failed to list Ollama models: {e}")`
172	`return sorted(models, key=lambda m: m.id)`
173
174
175	`ProviderRegistry.register(`
176	`name="ollama",`
177	`provider_class=OllamaProvider,`
178	`env_var="",`
179	`model_prefixes=[],`
180	`default_models={"chat": "", "vision": "", "audio": ""},`
181	`)`
182

PlanOpticon

Keyboard Shortcuts