PlanOpticon

planopticon / video_processor / providers / huggingface_provider.py
Blame History Raw 188 lines
1
"""Hugging Face Inference API provider implementation."""
2
3
import base64
4
import logging
5
import os
6
from pathlib import Path
7
from typing import Optional
8
9
from dotenv import load_dotenv
10
11
from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry
12
13
load_dotenv()
14
logger = logging.getLogger(__name__)
15
16
# Curated list of popular HF Inference models
17
_HF_MODELS = [
18
ModelInfo(
19
id="meta-llama/Llama-3.1-70B-Instruct",
20
provider="huggingface",
21
display_name="Llama 3.1 70B Instruct",
22
capabilities=["chat"],
23
),
24
ModelInfo(
25
id="meta-llama/Llama-3.1-8B-Instruct",
26
provider="huggingface",
27
display_name="Llama 3.1 8B Instruct",
28
capabilities=["chat"],
29
),
30
ModelInfo(
31
id="mistralai/Mixtral-8x7B-Instruct-v0.1",
32
provider="huggingface",
33
display_name="Mixtral 8x7B Instruct",
34
capabilities=["chat"],
35
),
36
ModelInfo(
37
id="microsoft/Phi-3-mini-4k-instruct",
38
provider="huggingface",
39
display_name="Phi-3 Mini 4K Instruct",
40
capabilities=["chat"],
41
),
42
ModelInfo(
43
id="llava-hf/llava-v1.6-mistral-7b-hf",
44
provider="huggingface",
45
display_name="LLaVA v1.6 Mistral 7B",
46
capabilities=["chat", "vision"],
47
),
48
ModelInfo(
49
id="openai/whisper-large-v3",
50
provider="huggingface",
51
display_name="Whisper Large v3",
52
capabilities=["audio"],
53
),
54
]
55
56
57
class HuggingFaceProvider(BaseProvider):
58
"""Hugging Face Inference API provider using huggingface_hub."""
59
60
provider_name = "huggingface"
61
62
def __init__(self, token: Optional[str] = None):
63
try:
64
from huggingface_hub import InferenceClient
65
except ImportError:
66
raise ImportError(
67
"huggingface_hub package not installed. Install with: pip install huggingface_hub"
68
)
69
70
self._token = token or os.getenv("HF_TOKEN")
71
if not self._token:
72
raise ValueError("HF_TOKEN not set")
73
74
self._client = InferenceClient(token=self._token)
75
self._last_usage = {}
76
77
def chat(
78
self,
79
messages: list[dict],
80
max_tokens: int = 4096,
81
temperature: float = 0.7,
82
model: Optional[str] = None,
83
) -> str:
84
model = model or "meta-llama/Llama-3.1-70B-Instruct"
85
if model.startswith("hf/"):
86
model = model[len("hf/") :]
87
88
response = self._client.chat_completion(
89
model=model,
90
messages=messages,
91
max_tokens=max_tokens,
92
temperature=temperature,
93
)
94
95
usage = getattr(response, "usage", None)
96
self._last_usage = {
97
"input_tokens": getattr(usage, "prompt_tokens", 0) if usage else 0,
98
"output_tokens": getattr(usage, "completion_tokens", 0) if usage else 0,
99
}
100
return response.choices[0].message.content or ""
101
102
def analyze_image(
103
self,
104
image_bytes: bytes,
105
prompt: str,
106
max_tokens: int = 4096,
107
model: Optional[str] = None,
108
) -> str:
109
model = model or "llava-hf/llava-v1.6-mistral-7b-hf"
110
if model.startswith("hf/"):
111
model = model[len("hf/") :]
112
113
b64 = base64.b64encode(image_bytes).decode()
114
115
response = self._client.chat_completion(
116
model=model,
117
messages=[
118
{
119
"role": "user",
120
"content": [
121
{"type": "text", "text": prompt},
122
{
123
"type": "image_url",
124
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
125
},
126
],
127
}
128
],
129
max_tokens=max_tokens,
130
)
131
132
usage = getattr(response, "usage", None)
133
self._last_usage = {
134
"input_tokens": getattr(usage, "prompt_tokens", 0) if usage else 0,
135
"output_tokens": getattr(usage, "completion_tokens", 0) if usage else 0,
136
}
137
return response.choices[0].message.content or ""
138
139
def transcribe_audio(
140
self,
141
audio_path: str | Path,
142
language: Optional[str] = None,
143
model: Optional[str] = None,
144
) -> dict:
145
model = model or "openai/whisper-large-v3"
146
if model.startswith("hf/"):
147
model = model[len("hf/") :]
148
149
audio_path = Path(audio_path)
150
audio_bytes = audio_path.read_bytes()
151
152
result = self._client.automatic_speech_recognition(
153
audio=audio_bytes,
154
model=model,
155
)
156
157
text = result.text if hasattr(result, "text") else str(result)
158
159
self._last_usage = {
160
"input_tokens": 0,
161
"output_tokens": 0,
162
}
163
164
return {
165
"text": text,
166
"segments": [],
167
"language": language,
168
"duration": None,
169
"provider": "huggingface",
170
"model": model,
171
}
172
173
def list_models(self) -> list[ModelInfo]:
174
return list(_HF_MODELS)
175
176
177
ProviderRegistry.register(
178
name="huggingface",
179
provider_class=HuggingFaceProvider,
180
env_var="HF_TOKEN",
181
model_prefixes=["hf/"],
182
default_models={
183
"chat": "meta-llama/Llama-3.1-70B-Instruct",
184
"vision": "llava-hf/llava-v1.6-mistral-7b-hf",
185
"audio": "openai/whisper-large-v3",
186
},
187
)
188

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button