PlanOpticon

planopticon / video_processor / providers / vertex_provider.py

Blame History Raw 227 lines

1	`"""Google Vertex AI provider implementation."""`
2
3	`import logging`
4	`import os`
5	`from pathlib import Path`
6	`from typing import Optional`
7
8	`from dotenv import load_dotenv`
9
10	`from video_processor.providers.base import BaseProvider, ModelInfo, ProviderRegistry`
11
12	`load_dotenv()`
13	`logger = logging.getLogger(__name__)`
14
15	`# Curated list of models available on Vertex AI`
16	`_VERTEX_MODELS = [`
17	`ModelInfo(`
18	`id="gemini-2.0-flash",`
19	`provider="vertex",`
20	`display_name="Gemini 2.0 Flash",`
21	`capabilities=["chat", "vision", "audio"],`
22	`),`
23	`ModelInfo(`
24	`id="gemini-2.0-pro",`
25	`provider="vertex",`
26	`display_name="Gemini 2.0 Pro",`
27	`capabilities=["chat", "vision", "audio"],`
28	`),`
29	`ModelInfo(`
30	`id="gemini-1.5-pro",`
31	`provider="vertex",`
32	`display_name="Gemini 1.5 Pro",`
33	`capabilities=["chat", "vision", "audio"],`
34	`),`
35	`ModelInfo(`
36	`id="gemini-1.5-flash",`
37	`provider="vertex",`
38	`display_name="Gemini 1.5 Flash",`
39	`capabilities=["chat", "vision", "audio"],`
40	`),`
41	`]`
42
43
44	`class VertexProvider(BaseProvider):`
45	`"""Google Vertex AI provider using google-genai SDK with Vertex config."""`
46
47	`provider_name = "vertex"`
48
49	`def __init__(`
50	`self,`
51	`project: Optional[str] = None,`
52	`location: Optional[str] = None,`
53	`):`
54	`try:`
55	`from google import genai`
56	`from google.genai import types # noqa: F401`
57	`except ImportError:`
58	`raise ImportError(`
59	`"google-cloud-aiplatform or google-genai package not installed. "`
60	`"Install with: pip install google-cloud-aiplatform"`
61	`)`
62
63	`self._genai = genai`
64	`self._project = project or os.getenv("GOOGLE_CLOUD_PROJECT")`
65	`self._location = location or os.getenv("GOOGLE_CLOUD_REGION", "us-central1")`
66
67	`if not self._project:`
68	`raise ValueError("GOOGLE_CLOUD_PROJECT not set")`
69
70	`self.client = genai.Client(`
71	`vertexai=True,`
72	`project=self._project,`
73	`location=self._location,`
74	`)`
75	`self._last_usage = {}`
76
77	`def chat(`
78	`self,`
79	`messages: list[dict],`
80	`max_tokens: int = 4096,`
81	`temperature: float = 0.7,`
82	`model: Optional[str] = None,`
83	`) -> str:`
84	`from google.genai import types`
85
86	`model = model or "gemini-2.0-flash"`
87	`if model.startswith("vertex/"):`
88	`model = model[len("vertex/") :]`
89
90	`contents = []`
91	`for msg in messages:`
92	`role = "user" if msg["role"] == "user" else "model"`
93	`contents.append(`
94	`types.Content(`
95	`role=role,`
96	`parts=[types.Part.from_text(text=msg["content"])],`
97	`)`
98	`)`
99
100	`response = self.client.models.generate_content(`
101	`model=model,`
102	`contents=contents,`
103	`config=types.GenerateContentConfig(`
104	`max_output_tokens=max_tokens,`
105	`temperature=temperature,`
106	`),`
107	`)`
108	`um = getattr(response, "usage_metadata", None)`
109	`self._last_usage = {`
110	`"input_tokens": getattr(um, "prompt_token_count", 0) if um else 0,`
111	`"output_tokens": getattr(um, "candidates_token_count", 0) if um else 0,`
112	`}`
113	`return response.text or ""`
114
115	`def analyze_image(`
116	`self,`
117	`image_bytes: bytes,`
118	`prompt: str,`
119	`max_tokens: int = 4096,`
120	`model: Optional[str] = None,`
121	`) -> str:`
122	`from google.genai import types`
123
124	`model = model or "gemini-2.0-flash"`
125	`if model.startswith("vertex/"):`
126	`model = model[len("vertex/") :]`
127
128	`response = self.client.models.generate_content(`
129	`model=model,`
130	`contents=[`
131	`types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),`
132	`prompt,`
133	`],`
134	`config=types.GenerateContentConfig(`
135	`max_output_tokens=max_tokens,`
136	`),`
137	`)`
138	`um = getattr(response, "usage_metadata", None)`
139	`self._last_usage = {`
140	`"input_tokens": getattr(um, "prompt_token_count", 0) if um else 0,`
141	`"output_tokens": getattr(um, "candidates_token_count", 0) if um else 0,`
142	`}`
143	`return response.text or ""`
144
145	`def transcribe_audio(`
146	`self,`
147	`audio_path: str \| Path,`
148	`language: Optional[str] = None,`
149	`model: Optional[str] = None,`
150	`) -> dict:`
151	`import json`
152
153	`from google.genai import types`
154
155	`model = model or "gemini-2.0-flash"`
156	`if model.startswith("vertex/"):`
157	`model = model[len("vertex/") :]`
158
159	`audio_path = Path(audio_path)`
160	`suffix = audio_path.suffix.lower()`
161	`mime_map = {`
162	`".wav": "audio/wav",`
163	`".mp3": "audio/mpeg",`
164	`".m4a": "audio/mp4",`
165	`".flac": "audio/flac",`
166	`".ogg": "audio/ogg",`
167	`".webm": "audio/webm",`
168	`}`
169	`mime_type = mime_map.get(suffix, "audio/wav")`
170	`audio_bytes = audio_path.read_bytes()`
171
172	`lang_hint = f" The audio is in {language}." if language else ""`
173	`prompt = (`
174	`f"Transcribe this audio accurately.{lang_hint} "`
175	`"Return a JSON object with keys: "`
176	`'"text" (full transcript), '`
177	`'"segments" (array of {start, end, text} objects with timestamps in seconds).'`
178	`)`
179
180	`response = self.client.models.generate_content(`
181	`model=model,`
182	`contents=[`
183	`types.Part.from_bytes(data=audio_bytes, mime_type=mime_type),`
184	`prompt,`
185	`],`
186	`config=types.GenerateContentConfig(`
187	`max_output_tokens=8192,`
188	`response_mime_type="application/json",`
189	`),`
190	`)`
191
192	`try:`
193	`data = json.loads(response.text)`
194	`except (json.JSONDecodeError, TypeError):`
195	`data = {"text": response.text or "", "segments": []}`
196
197	`um = getattr(response, "usage_metadata", None)`
198	`self._last_usage = {`
199	`"input_tokens": getattr(um, "prompt_token_count", 0) if um else 0,`
200	`"output_tokens": getattr(um, "candidates_token_count", 0) if um else 0,`
201	`}`
202
203	`return {`
204	`"text": data.get("text", ""),`
205	`"segments": data.get("segments", []),`
206	`"language": language,`
207	`"duration": None,`
208	`"provider": "vertex",`
209	`"model": model,`
210	`}`
211
212	`def list_models(self) -> list[ModelInfo]:`
213	`return list(_VERTEX_MODELS)`
214
215
216	`ProviderRegistry.register(`
217	`name="vertex",`
218	`provider_class=VertexProvider,`
219	`env_var="GOOGLE_CLOUD_PROJECT",`
220	`model_prefixes=["vertex/"],`
221	`default_models={`
222	`"chat": "gemini-2.0-flash",`
223	`"vision": "gemini-2.0-flash",`
224	`"audio": "gemini-2.0-flash",`
225	`},`
226	`)`
227

PlanOpticon

Keyboard Shortcuts