PlanOpticon

planopticon / video_processor / providers / huggingface_provider.py
Source Blame History 187 lines
0981a08… noreply 1 """Hugging Face Inference API provider implementation."""
0981a08… noreply 2
0981a08… noreply 3 import base64
0981a08… noreply 4 import logging
0981a08… noreply 5 import os
0981a08… noreply 6 from pathlib import Path
0981a08… noreply 7 from typing import Optional
0981a08… noreply 8
0981a08… noreply 9 from dotenv import load_dotenv
0981a08… noreply 10
0981a08… noreply 11 from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry
0981a08… noreply 12
0981a08… noreply 13 load_dotenv()
0981a08… noreply 14 logger = logging.getLogger(__name__)
0981a08… noreply 15
0981a08… noreply 16 # Curated list of popular HF Inference models
0981a08… noreply 17 _HF_MODELS = [
0981a08… noreply 18 ModelInfo(
0981a08… noreply 19 id="meta-llama/Llama-3.1-70B-Instruct",
0981a08… noreply 20 provider="huggingface",
0981a08… noreply 21 display_name="Llama 3.1 70B Instruct",
0981a08… noreply 22 capabilities=["chat"],
0981a08… noreply 23 ),
0981a08… noreply 24 ModelInfo(
0981a08… noreply 25 id="meta-llama/Llama-3.1-8B-Instruct",
0981a08… noreply 26 provider="huggingface",
0981a08… noreply 27 display_name="Llama 3.1 8B Instruct",
0981a08… noreply 28 capabilities=["chat"],
0981a08… noreply 29 ),
0981a08… noreply 30 ModelInfo(
0981a08… noreply 31 id="mistralai/Mixtral-8x7B-Instruct-v0.1",
0981a08… noreply 32 provider="huggingface",
0981a08… noreply 33 display_name="Mixtral 8x7B Instruct",
0981a08… noreply 34 capabilities=["chat"],
0981a08… noreply 35 ),
0981a08… noreply 36 ModelInfo(
0981a08… noreply 37 id="microsoft/Phi-3-mini-4k-instruct",
0981a08… noreply 38 provider="huggingface",
0981a08… noreply 39 display_name="Phi-3 Mini 4K Instruct",
0981a08… noreply 40 capabilities=["chat"],
0981a08… noreply 41 ),
0981a08… noreply 42 ModelInfo(
0981a08… noreply 43 id="llava-hf/llava-v1.6-mistral-7b-hf",
0981a08… noreply 44 provider="huggingface",
0981a08… noreply 45 display_name="LLaVA v1.6 Mistral 7B",
0981a08… noreply 46 capabilities=["chat", "vision"],
0981a08… noreply 47 ),
0981a08… noreply 48 ModelInfo(
0981a08… noreply 49 id="openai/whisper-large-v3",
0981a08… noreply 50 provider="huggingface",
0981a08… noreply 51 display_name="Whisper Large v3",
0981a08… noreply 52 capabilities=["audio"],
0981a08… noreply 53 ),
0981a08… noreply 54 ]
0981a08… noreply 55
0981a08… noreply 56
0981a08… noreply 57 class HuggingFaceProvider(BaseProvider):
0981a08… noreply 58 """Hugging Face Inference API provider using huggingface_hub."""
0981a08… noreply 59
0981a08… noreply 60 provider_name = "huggingface"
0981a08… noreply 61
0981a08… noreply 62 def __init__(self, token: Optional[str] = None):
0981a08… noreply 63 try:
0981a08… noreply 64 from huggingface_hub import InferenceClient
0981a08… noreply 65 except ImportError:
0981a08… noreply 66 raise ImportError(
0981a08… noreply 67 "huggingface_hub package not installed. Install with: pip install huggingface_hub"
0981a08… noreply 68 )
0981a08… noreply 69
0981a08… noreply 70 self._token = token or os.getenv("HF_TOKEN")
0981a08… noreply 71 if not self._token:
0981a08… noreply 72 raise ValueError("HF_TOKEN not set")
0981a08… noreply 73
0981a08… noreply 74 self._client = InferenceClient(token=self._token)
0981a08… noreply 75 self._last_usage = {}
0981a08… noreply 76
0981a08… noreply 77 def chat(
0981a08… noreply 78 self,
0981a08… noreply 79 messages: list[dict],
0981a08… noreply 80 max_tokens: int = 4096,
0981a08… noreply 81 temperature: float = 0.7,
0981a08… noreply 82 model: Optional[str] = None,
0981a08… noreply 83 ) -> str:
0981a08… noreply 84 model = model or "meta-llama/Llama-3.1-70B-Instruct"
0981a08… noreply 85 if model.startswith("hf/"):
0981a08… noreply 86 model = model[len("hf/") :]
0981a08… noreply 87
0981a08… noreply 88 response = self._client.chat_completion(
0981a08… noreply 89 model=model,
0981a08… noreply 90 messages=messages,
0981a08… noreply 91 max_tokens=max_tokens,
0981a08… noreply 92 temperature=temperature,
0981a08… noreply 93 )
0981a08… noreply 94
0981a08… noreply 95 usage = getattr(response, "usage", None)
0981a08… noreply 96 self._last_usage = {
0981a08… noreply 97 "input_tokens": getattr(usage, "prompt_tokens", 0) if usage else 0,
0981a08… noreply 98 "output_tokens": getattr(usage, "completion_tokens", 0) if usage else 0,
0981a08… noreply 99 }
0981a08… noreply 100 return response.choices[0].message.content or ""
0981a08… noreply 101
0981a08… noreply 102 def analyze_image(
0981a08… noreply 103 self,
0981a08… noreply 104 image_bytes: bytes,
0981a08… noreply 105 prompt: str,
0981a08… noreply 106 max_tokens: int = 4096,
0981a08… noreply 107 model: Optional[str] = None,
0981a08… noreply 108 ) -> str:
0981a08… noreply 109 model = model or "llava-hf/llava-v1.6-mistral-7b-hf"
0981a08… noreply 110 if model.startswith("hf/"):
0981a08… noreply 111 model = model[len("hf/") :]
0981a08… noreply 112
0981a08… noreply 113 b64 = base64.b64encode(image_bytes).decode()
0981a08… noreply 114
0981a08… noreply 115 response = self._client.chat_completion(
0981a08… noreply 116 model=model,
0981a08… noreply 117 messages=[
0981a08… noreply 118 {
0981a08… noreply 119 "role": "user",
0981a08… noreply 120 "content": [
0981a08… noreply 121 {"type": "text", "text": prompt},
0981a08… noreply 122 {
0981a08… noreply 123 "type": "image_url",
0981a08… noreply 124 "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
0981a08… noreply 125 },
0981a08… noreply 126 ],
0981a08… noreply 127 }
0981a08… noreply 128 ],
0981a08… noreply 129 max_tokens=max_tokens,
0981a08… noreply 130 )
0981a08… noreply 131
0981a08… noreply 132 usage = getattr(response, "usage", None)
0981a08… noreply 133 self._last_usage = {
0981a08… noreply 134 "input_tokens": getattr(usage, "prompt_tokens", 0) if usage else 0,
0981a08… noreply 135 "output_tokens": getattr(usage, "completion_tokens", 0) if usage else 0,
0981a08… noreply 136 }
0981a08… noreply 137 return response.choices[0].message.content or ""
0981a08… noreply 138
0981a08… noreply 139 def transcribe_audio(
0981a08… noreply 140 self,
0981a08… noreply 141 audio_path: str | Path,
0981a08… noreply 142 language: Optional[str] = None,
0981a08… noreply 143 model: Optional[str] = None,
0981a08… noreply 144 ) -> dict:
0981a08… noreply 145 model = model or "openai/whisper-large-v3"
0981a08… noreply 146 if model.startswith("hf/"):
0981a08… noreply 147 model = model[len("hf/") :]
0981a08… noreply 148
0981a08… noreply 149 audio_path = Path(audio_path)
0981a08… noreply 150 audio_bytes = audio_path.read_bytes()
0981a08… noreply 151
0981a08… noreply 152 result = self._client.automatic_speech_recognition(
0981a08… noreply 153 audio=audio_bytes,
0981a08… noreply 154 model=model,
0981a08… noreply 155 )
0981a08… noreply 156
0981a08… noreply 157 text = result.text if hasattr(result, "text") else str(result)
0981a08… noreply 158
0981a08… noreply 159 self._last_usage = {
0981a08… noreply 160 "input_tokens": 0,
0981a08… noreply 161 "output_tokens": 0,
0981a08… noreply 162 }
0981a08… noreply 163
0981a08… noreply 164 return {
0981a08… noreply 165 "text": text,
0981a08… noreply 166 "segments": [],
0981a08… noreply 167 "language": language,
0981a08… noreply 168 "duration": None,
0981a08… noreply 169 "provider": "huggingface",
0981a08… noreply 170 "model": model,
0981a08… noreply 171 }
0981a08… noreply 172
0981a08… noreply 173 def list_models(self) -> list[ModelInfo]:
0981a08… noreply 174 return list(_HF_MODELS)
0981a08… noreply 175
0981a08… noreply 176
0981a08… noreply 177 ProviderRegistry.register(
0981a08… noreply 178 name="huggingface",
0981a08… noreply 179 provider_class=HuggingFaceProvider,
0981a08… noreply 180 env_var="HF_TOKEN",
0981a08… noreply 181 model_prefixes=["hf/"],
0981a08… noreply 182 default_models={
0981a08… noreply 183 "chat": "meta-llama/Llama-3.1-70B-Instruct",
0981a08… noreply 184 "vision": "llava-hf/llava-v1.6-mistral-7b-hf",
0981a08… noreply 185 "audio": "openai/whisper-large-v3",
0981a08… noreply 186 },
0981a08… noreply 187 )

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button