PlanOpticon

planopticon / video_processor / providers / ollama_provider.py
Source Blame History 181 lines
a0146a5… noreply 1 """Ollama provider implementation using OpenAI-compatible API."""
a0146a5… noreply 2
a0146a5… noreply 3 import base64
a0146a5… noreply 4 import logging
a0146a5… noreply 5 import os
a0146a5… noreply 6 from pathlib import Path
a0146a5… noreply 7 from typing import Optional
a0146a5… noreply 8
a0146a5… noreply 9 import requests
a0146a5… noreply 10 from openai import OpenAI
a0146a5… noreply 11
0981a08… noreply 12 from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry
a0146a5… noreply 13
a0146a5… noreply 14 logger = logging.getLogger(__name__)
a0146a5… noreply 15
a0146a5… noreply 16 # Known vision-capable model families (base name before the colon/tag)
a0146a5… noreply 17 _VISION_FAMILIES = {
a0146a5… noreply 18 "llava",
a0146a5… noreply 19 "llava-llama3",
a0146a5… noreply 20 "llava-phi3",
a0146a5… noreply 21 "llama3.2-vision",
a0146a5… noreply 22 "moondream",
a0146a5… noreply 23 "bakllava",
a0146a5… noreply 24 "minicpm-v",
a0146a5… noreply 25 "deepseek-vl",
a0146a5… noreply 26 "internvl2",
a0146a5… noreply 27 }
a0146a5… noreply 28
a0146a5… noreply 29 DEFAULT_HOST = "http://localhost:11434"
a0146a5… noreply 30
a0146a5… noreply 31
a0146a5… noreply 32 class OllamaProvider(BaseProvider):
a0146a5… noreply 33 """Ollama local LLM provider via OpenAI-compatible API."""
a0146a5… noreply 34
a0146a5… noreply 35 provider_name = "ollama"
a0146a5… noreply 36
a0146a5… noreply 37 def __init__(self, host: Optional[str] = None):
a0146a5… noreply 38 self.host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)
a0146a5… noreply 39 self.client = OpenAI(
a0146a5… noreply 40 base_url=f"{self.host}/v1",
a0146a5… noreply 41 api_key="ollama",
a0146a5… noreply 42 )
a0146a5… noreply 43 self._models_cache: Optional[list[ModelInfo]] = None
a0146a5… noreply 44
a0146a5… noreply 45 @staticmethod
a0146a5… noreply 46 def is_available(host: Optional[str] = None) -> bool:
a0146a5… noreply 47 """Check if an Ollama server is running and reachable."""
a0146a5… noreply 48 host = host or os.getenv("OLLAMA_HOST", DEFAULT_HOST)
a0146a5… noreply 49 try:
a0146a5… noreply 50 resp = requests.get(f"{host}/api/tags", timeout=3)
a0146a5… noreply 51 return resp.status_code == 200
a0146a5… noreply 52 except Exception:
a0146a5… noreply 53 return False
a0146a5… noreply 54
a0146a5… noreply 55 @property
a0146a5… noreply 56 def _default_model(self) -> str:
a0146a5… noreply 57 models = self._get_models()
a0146a5… noreply 58 for m in models:
a0146a5… noreply 59 if "chat" in m.capabilities:
a0146a5… noreply 60 return m.id
a0146a5… noreply 61 return "llama3.2:latest"
a0146a5… noreply 62
a0146a5… noreply 63 @property
a0146a5… noreply 64 def _default_vision_model(self) -> Optional[str]:
a0146a5… noreply 65 models = self._get_models()
a0146a5… noreply 66 for m in models:
a0146a5… noreply 67 if "vision" in m.capabilities:
a0146a5… noreply 68 return m.id
a0146a5… noreply 69 return None
a0146a5… noreply 70
a0146a5… noreply 71 def _get_models(self) -> list[ModelInfo]:
a0146a5… noreply 72 if self._models_cache is None:
a0146a5… noreply 73 self._models_cache = self.list_models()
a0146a5… noreply 74 return self._models_cache
a0146a5… noreply 75
a0146a5… noreply 76 def chat(
a0146a5… noreply 77 self,
a0146a5… noreply 78 messages: list[dict],
a0146a5… noreply 79 max_tokens: int = 4096,
a0146a5… noreply 80 temperature: float = 0.7,
a0146a5… noreply 81 model: Optional[str] = None,
a0146a5… noreply 82 ) -> str:
a0146a5… noreply 83 model = model or self._default_model
a0146a5… noreply 84 response = self.client.chat.completions.create(
a0146a5… noreply 85 model=model,
a0146a5… noreply 86 messages=messages,
a0146a5… noreply 87 max_tokens=max_tokens,
a0146a5… noreply 88 temperature=temperature,
a0146a5… noreply 89 )
a0146a5… noreply 90 self._last_usage = {
fd03277… noreply 91 "input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)
fd03277… noreply 92 if response.usage
fd03277… noreply 93 else 0,
fd03277… noreply 94 "output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)
a0146a5… noreply 95 if response.usage
a0146a5… noreply 96 else 0,
a0146a5… noreply 97 }
a0146a5… noreply 98 return response.choices[0].message.content or ""
a0146a5… noreply 99
a0146a5… noreply 100 def analyze_image(
a0146a5… noreply 101 self,
a0146a5… noreply 102 image_bytes: bytes,
a0146a5… noreply 103 prompt: str,
a0146a5… noreply 104 max_tokens: int = 4096,
a0146a5… noreply 105 model: Optional[str] = None,
a0146a5… noreply 106 ) -> str:
a0146a5… noreply 107 model = model or self._default_vision_model
a0146a5… noreply 108 if not model:
a0146a5… noreply 109 raise RuntimeError(
a0146a5… noreply 110 "No Ollama vision model available. Install a multimodal model: ollama pull llava"
a0146a5… noreply 111 )
a0146a5… noreply 112 b64 = base64.b64encode(image_bytes).decode()
a0146a5… noreply 113 response = self.client.chat.completions.create(
a0146a5… noreply 114 model=model,
a0146a5… noreply 115 messages=[
a0146a5… noreply 116 {
a0146a5… noreply 117 "role": "user",
a0146a5… noreply 118 "content": [
a0146a5… noreply 119 {"type": "text", "text": prompt},
a0146a5… noreply 120 {
a0146a5… noreply 121 "type": "image_url",
a0146a5… noreply 122 "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
a0146a5… noreply 123 },
a0146a5… noreply 124 ],
a0146a5… noreply 125 }
a0146a5… noreply 126 ],
a0146a5… noreply 127 max_tokens=max_tokens,
a0146a5… noreply 128 )
a0146a5… noreply 129 self._last_usage = {
fd03277… noreply 130 "input_tokens": (getattr(response.usage, "prompt_tokens", 0) or 0)
fd03277… noreply 131 if response.usage
fd03277… noreply 132 else 0,
fd03277… noreply 133 "output_tokens": (getattr(response.usage, "completion_tokens", 0) or 0)
a0146a5… noreply 134 if response.usage
a0146a5… noreply 135 else 0,
a0146a5… noreply 136 }
a0146a5… noreply 137 return response.choices[0].message.content or ""
a0146a5… noreply 138
a0146a5… noreply 139 def transcribe_audio(
a0146a5… noreply 140 self,
a0146a5… noreply 141 audio_path: str | Path,
a0146a5… noreply 142 language: Optional[str] = None,
a0146a5… noreply 143 model: Optional[str] = None,
a0146a5… noreply 144 ) -> dict:
a0146a5… noreply 145 raise NotImplementedError(
a0146a5… noreply 146 "Ollama does not support audio transcription. "
a0146a5… noreply 147 "Use local Whisper (--transcription-model whisper-local:large) or OpenAI Whisper API."
a0146a5… noreply 148 )
a0146a5… noreply 149
a0146a5… noreply 150 def list_models(self) -> list[ModelInfo]:
a0146a5… noreply 151 models = []
a0146a5… noreply 152 try:
a0146a5… noreply 153 resp = requests.get(f"{self.host}/api/tags", timeout=5)
a0146a5… noreply 154 resp.raise_for_status()
a0146a5… noreply 155 data = resp.json()
a0146a5… noreply 156 for m in data.get("models", []):
a0146a5… noreply 157 name = m.get("name", "")
a0146a5… noreply 158 caps = ["chat"]
a0146a5… noreply 159 base_name = name.split(":")[0].lower()
a0146a5… noreply 160 if base_name in _VISION_FAMILIES or "vision" in base_name:
a0146a5… noreply 161 caps.append("vision")
a0146a5… noreply 162 models.append(
a0146a5… noreply 163 ModelInfo(
a0146a5… noreply 164 id=name,
a0146a5… noreply 165 provider="ollama",
a0146a5… noreply 166 display_name=name,
a0146a5… noreply 167 capabilities=caps,
a0146a5… noreply 168 )
a0146a5… noreply 169 )
a0146a5… noreply 170 except Exception as e:
a0146a5… noreply 171 logger.warning(f"Failed to list Ollama models: {e}")
a0146a5… noreply 172 return sorted(models, key=lambda m: m.id)
0981a08… noreply 173
0981a08… noreply 174
0981a08… noreply 175 ProviderRegistry.register(
0981a08… noreply 176 name="ollama",
0981a08… noreply 177 provider_class=OllamaProvider,
0981a08… noreply 178 env_var="",
0981a08… noreply 179 model_prefixes=[],
0981a08… noreply 180 default_models={"chat": "", "vision": "", "audio": ""},
0981a08… noreply 181 )

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button