PlanOpticon

planopticon / video_processor / providers / ollama_provider.py
Blame History Raw 182 lines
1
"""Ollama provider implementation using OpenAI-compatible API."""
2
3
import base64
4
import logging
5
import os
6
from pathlib import Path
7
from typing import Optional
8
9
import requests
10
from openai import OpenAI
11
12
from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry
13
14
logger = logging.getLogger(__name__)
15
16
# Known vision-capable model families (base name before the colon/tag)
17
_VISION_FAMILIES = {
18
"llava",
19
"llava-llama3",
20
"llava-phi3",
21
"llama3.2-vision",
22
"moondream",
23
"bakllava",
24
"minicpm-v",
25
"deepseek-vl",
26
"internvl2",
27
}
28
29
DEFAULT_HOST = "http://localhost:11434"
30
31
32
class OllamaProvider(BaseProvider):
33
"""Ollama local LLM provider via OpenAI-compatible API."""
34
35
provider_name = "ollama"
36
37
def __init__(self, host: Optional[str] = None):
38
self.host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)
39
self.client = OpenAI(
40
base_url=f"{self.host}/v1",
41
api_key="ollama",
42
)
43
self._models_cache: Optional[list[ModelInfo]] = None
44
45
@staticmethod
46
def is_available(host: Optional[str] = None) -> bool:
47
"""Check if an Ollama server is running and reachable."""
48
host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)
49
try:
50
resp = requests.get(f"{host}/api/tags", timeout=3)
51
return resp.status_code == 200
52
except Exception:
53
return False
54
55
@property
56
def _default_model(self) -> str:
57
models = self._get_models()
58
for m in models:
59
if "chat" in m.capabilities:
60
return m.id
61
return "llama3.2:latest"
62
63
@property
64
def _default_vision_model(self) -> Optional[str]:
65
models = self._get_models()
66
for m in models:
67
if "vision" in m.capabilities:
68
return m.id
69
return None
70
71
def _get_models(self) -> list[ModelInfo]:
72
if self._models_cache is None:
73
self._models_cache = self.list_models()
74
return self._models_cache
75
76
def chat(
77
self,
78
messages: list[dict],
79
max_tokens: int = 4096,
80
temperature: float = 0.7,
81
model: Optional[str] = None,
82
) -> str:
83
model = model or self._default_model
84
response = self.client.chat.completions.create(
85
model=model,
86
messages=messages,
87
max_tokens=max_tokens,
88
temperature=temperature,
89
)
90
self._last_usage = {
91
"input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)
92
if response.usage
93
else 0,
94
"output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)
95
if response.usage
96
else 0,
97
}
98
return response.choices[0].message.content or ""
99
100
def analyze_image(
101
self,
102
image_bytes: bytes,
103
prompt: str,
104
max_tokens: int = 4096,
105
model: Optional[str] = None,
106
) -> str:
107
model = model or self._default_vision_model
108
if not model:
109
raise RuntimeError(
110
"No Ollama vision model available. Install a multimodal model: ollama pull llava"
111
)
112
b64 = base64.b64encode(image_bytes).decode()
113
response = self.client.chat.completions.create(
114
model=model,
115
messages=[
116
{
117
"role": "user",
118
"content": [
119
{"type": "text", "text": prompt},
120
{
121
"type": "image_url",
122
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
123
},
124
],
125
}
126
],
127
max_tokens=max_tokens,
128
)
129
self._last_usage = {
130
"input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)
131
if response.usage
132
else 0,
133
"output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)
134
if response.usage
135
else 0,
136
}
137
return response.choices[0].message.content or ""
138
139
def transcribe_audio(
140
self,
141
audio_path: str | Path,
142
language: Optional[str] = None,
143
model: Optional[str] = None,
144
) -> dict:
145
raise NotImplementedError(
146
"Ollama does not support audio transcription. "
147
"Use local Whisper (--transcription-model whisper-local:large) or OpenAI Whisper API."
148
)
149
150
def list_models(self) -> list[ModelInfo]:
151
models = []
152
try:
153
resp = requests.get(f"{self.host}/api/tags", timeout=5)
154
resp.raise_for_status()
155
data = resp.json()
156
for m in data.get("models", []):
157
name = m.get("name", "")
158
caps = ["chat"]
159
base_name = name.split(":")[0].lower()
160
if base_name in _VISION_FAMILIES or "vision" in base_name:
161
caps.append("vision")
162
models.append(
163
ModelInfo(
164
id=name,
165
provider="ollama",
166
display_name=name,
167
capabilities=caps,
168
)
169
)
170
except Exception as e:
171
logger.warning(f"Failed to list Ollama models: {e}")
172
return sorted(models, key=lambda m: m.id)
173
174
175
ProviderRegistry.register(
176
name="ollama",
177
provider_class=OllamaProvider,
178
env_var="",
179
model_prefixes=[],
180
default_models={"chat": "", "vision": "", "audio": ""},
181
)
182

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button