|
287a3bb…
|
leo
|
1 |
"""Usage tracking and cost estimation for API calls.""" |
|
287a3bb…
|
leo
|
2 |
|
|
287a3bb…
|
leo
|
3 |
import time |
|
287a3bb…
|
leo
|
4 |
from dataclasses import dataclass, field |
|
287a3bb…
|
leo
|
5 |
from typing import Optional |
|
287a3bb…
|
leo
|
6 |
|
|
287a3bb…
|
leo
|
7 |
# Cost per million tokens (USD) — updated Feb 2025 |
|
287a3bb…
|
leo
|
8 |
_MODEL_PRICING = { |
|
287a3bb…
|
leo
|
9 |
# Anthropic |
|
287a3bb…
|
leo
|
10 |
"claude-sonnet-4-5-20250929": {"input": 3.00, "output": 15.00}, |
|
287a3bb…
|
leo
|
11 |
"claude-haiku-3-5-20241022": {"input": 0.80, "output": 4.00}, |
|
287a3bb…
|
leo
|
12 |
# OpenAI |
|
287a3bb…
|
leo
|
13 |
"gpt-4o": {"input": 2.50, "output": 10.00}, |
|
287a3bb…
|
leo
|
14 |
"gpt-4o-mini": {"input": 0.15, "output": 0.60}, |
|
287a3bb…
|
leo
|
15 |
"gpt-4.1": {"input": 2.00, "output": 8.00}, |
|
287a3bb…
|
leo
|
16 |
"gpt-4.1-mini": {"input": 0.40, "output": 1.60}, |
|
287a3bb…
|
leo
|
17 |
"gpt-4.1-nano": {"input": 0.10, "output": 0.40}, |
|
287a3bb…
|
leo
|
18 |
# Google Gemini |
|
287a3bb…
|
leo
|
19 |
"gemini-2.5-flash": {"input": 0.15, "output": 0.60}, |
|
287a3bb…
|
leo
|
20 |
"gemini-2.5-pro": {"input": 1.25, "output": 10.00}, |
|
287a3bb…
|
leo
|
21 |
"gemini-2.0-flash": {"input": 0.10, "output": 0.40}, |
|
287a3bb…
|
leo
|
22 |
# Whisper |
|
287a3bb…
|
leo
|
23 |
"whisper-1": {"per_minute": 0.006}, |
|
287a3bb…
|
leo
|
24 |
} |
|
287a3bb…
|
leo
|
25 |
|
|
287a3bb…
|
leo
|
26 |
|
|
287a3bb…
|
leo
|
27 |
@dataclass |
|
287a3bb…
|
leo
|
28 |
class ModelUsage: |
|
287a3bb…
|
leo
|
29 |
"""Accumulated usage for a single model.""" |
|
829e24a…
|
leo
|
30 |
|
|
287a3bb…
|
leo
|
31 |
provider: str = "" |
|
287a3bb…
|
leo
|
32 |
model: str = "" |
|
287a3bb…
|
leo
|
33 |
calls: int = 0 |
|
287a3bb…
|
leo
|
34 |
input_tokens: int = 0 |
|
287a3bb…
|
leo
|
35 |
output_tokens: int = 0 |
|
287a3bb…
|
leo
|
36 |
audio_minutes: float = 0.0 |
|
287a3bb…
|
leo
|
37 |
|
|
287a3bb…
|
leo
|
38 |
@property |
|
287a3bb…
|
leo
|
39 |
def total_tokens(self) -> int: |
|
287a3bb…
|
leo
|
40 |
return self.input_tokens + self.output_tokens |
|
287a3bb…
|
leo
|
41 |
|
|
287a3bb…
|
leo
|
42 |
@property |
|
287a3bb…
|
leo
|
43 |
def estimated_cost(self) -> float: |
|
287a3bb…
|
leo
|
44 |
pricing = _MODEL_PRICING.get(self.model) |
|
287a3bb…
|
leo
|
45 |
if not pricing: |
|
287a3bb…
|
leo
|
46 |
# Try partial match |
|
287a3bb…
|
leo
|
47 |
for key, p in _MODEL_PRICING.items(): |
|
287a3bb…
|
leo
|
48 |
if key in self.model or self.model in key: |
|
287a3bb…
|
leo
|
49 |
pricing = p |
|
287a3bb…
|
leo
|
50 |
break |
|
287a3bb…
|
leo
|
51 |
if not pricing: |
|
287a3bb…
|
leo
|
52 |
return 0.0 |
|
287a3bb…
|
leo
|
53 |
if "per_minute" in pricing: |
|
287a3bb…
|
leo
|
54 |
return self.audio_minutes * pricing["per_minute"] |
|
287a3bb…
|
leo
|
55 |
return ( |
|
287a3bb…
|
leo
|
56 |
self.input_tokens * pricing.get("input", 0) / 1_000_000 |
|
287a3bb…
|
leo
|
57 |
+ self.output_tokens * pricing.get("output", 0) / 1_000_000 |
|
287a3bb…
|
leo
|
58 |
) |
|
287a3bb…
|
leo
|
59 |
|
|
287a3bb…
|
leo
|
60 |
|
|
287a3bb…
|
leo
|
61 |
@dataclass |
|
287a3bb…
|
leo
|
62 |
class StepTiming: |
|
287a3bb…
|
leo
|
63 |
"""Timing for a single pipeline step.""" |
|
829e24a…
|
leo
|
64 |
|
|
287a3bb…
|
leo
|
65 |
name: str |
|
287a3bb…
|
leo
|
66 |
start_time: float = 0.0 |
|
287a3bb…
|
leo
|
67 |
end_time: float = 0.0 |
|
287a3bb…
|
leo
|
68 |
|
|
287a3bb…
|
leo
|
69 |
@property |
|
287a3bb…
|
leo
|
70 |
def duration(self) -> float: |
|
287a3bb…
|
leo
|
71 |
if self.end_time and self.start_time: |
|
287a3bb…
|
leo
|
72 |
return self.end_time - self.start_time |
|
287a3bb…
|
leo
|
73 |
return 0.0 |
|
287a3bb…
|
leo
|
74 |
|
|
287a3bb…
|
leo
|
75 |
|
|
287a3bb…
|
leo
|
76 |
@dataclass |
|
287a3bb…
|
leo
|
77 |
class UsageTracker: |
|
287a3bb…
|
leo
|
78 |
"""Tracks API usage, costs, and timing across a pipeline run.""" |
|
829e24a…
|
leo
|
79 |
|
|
287a3bb…
|
leo
|
80 |
_models: dict = field(default_factory=dict) |
|
287a3bb…
|
leo
|
81 |
_steps: list = field(default_factory=list) |
|
287a3bb…
|
leo
|
82 |
_current_step: Optional[StepTiming] = field(default=None) |
|
287a3bb…
|
leo
|
83 |
_start_time: float = field(default_factory=time.time) |
|
287a3bb…
|
leo
|
84 |
|
|
287a3bb…
|
leo
|
85 |
def record( |
|
287a3bb…
|
leo
|
86 |
self, |
|
287a3bb…
|
leo
|
87 |
provider: str, |
|
287a3bb…
|
leo
|
88 |
model: str, |
|
287a3bb…
|
leo
|
89 |
input_tokens: int = 0, |
|
287a3bb…
|
leo
|
90 |
output_tokens: int = 0, |
|
287a3bb…
|
leo
|
91 |
audio_minutes: float = 0.0, |
|
287a3bb…
|
leo
|
92 |
) -> None: |
|
287a3bb…
|
leo
|
93 |
"""Record usage for an API call.""" |
|
287a3bb…
|
leo
|
94 |
key = f"{provider}/{model}" |
|
287a3bb…
|
leo
|
95 |
if key not in self._models: |
|
287a3bb…
|
leo
|
96 |
self._models[key] = ModelUsage(provider=provider, model=model) |
|
287a3bb…
|
leo
|
97 |
usage = self._models[key] |
|
287a3bb…
|
leo
|
98 |
usage.calls += 1 |
|
287a3bb…
|
leo
|
99 |
usage.input_tokens += input_tokens |
|
287a3bb…
|
leo
|
100 |
usage.output_tokens += output_tokens |
|
287a3bb…
|
leo
|
101 |
usage.audio_minutes += audio_minutes |
|
287a3bb…
|
leo
|
102 |
|
|
287a3bb…
|
leo
|
103 |
def start_step(self, name: str) -> None: |
|
287a3bb…
|
leo
|
104 |
"""Start timing a pipeline step.""" |
|
287a3bb…
|
leo
|
105 |
if self._current_step: |
|
287a3bb…
|
leo
|
106 |
self._current_step.end_time = time.time() |
|
287a3bb…
|
leo
|
107 |
self._steps.append(self._current_step) |
|
287a3bb…
|
leo
|
108 |
self._current_step = StepTiming(name=name, start_time=time.time()) |
|
287a3bb…
|
leo
|
109 |
|
|
287a3bb…
|
leo
|
110 |
def end_step(self) -> None: |
|
287a3bb…
|
leo
|
111 |
"""End timing the current step.""" |
|
287a3bb…
|
leo
|
112 |
if self._current_step: |
|
287a3bb…
|
leo
|
113 |
self._current_step.end_time = time.time() |
|
287a3bb…
|
leo
|
114 |
self._steps.append(self._current_step) |
|
287a3bb…
|
leo
|
115 |
self._current_step = None |
|
287a3bb…
|
leo
|
116 |
|
|
287a3bb…
|
leo
|
117 |
@property |
|
287a3bb…
|
leo
|
118 |
def total_api_calls(self) -> int: |
|
287a3bb…
|
leo
|
119 |
return sum(u.calls for u in self._models.values()) |
|
287a3bb…
|
leo
|
120 |
|
|
287a3bb…
|
leo
|
121 |
@property |
|
287a3bb…
|
leo
|
122 |
def total_input_tokens(self) -> int: |
|
287a3bb…
|
leo
|
123 |
return sum(u.input_tokens for u in self._models.values()) |
|
287a3bb…
|
leo
|
124 |
|
|
287a3bb…
|
leo
|
125 |
@property |
|
287a3bb…
|
leo
|
126 |
def total_output_tokens(self) -> int: |
|
287a3bb…
|
leo
|
127 |
return sum(u.output_tokens for u in self._models.values()) |
|
287a3bb…
|
leo
|
128 |
|
|
287a3bb…
|
leo
|
129 |
@property |
|
287a3bb…
|
leo
|
130 |
def total_tokens(self) -> int: |
|
287a3bb…
|
leo
|
131 |
return self.total_input_tokens + self.total_output_tokens |
|
287a3bb…
|
leo
|
132 |
|
|
287a3bb…
|
leo
|
133 |
@property |
|
287a3bb…
|
leo
|
134 |
def total_cost(self) -> float: |
|
287a3bb…
|
leo
|
135 |
return sum(u.estimated_cost for u in self._models.values()) |
|
287a3bb…
|
leo
|
136 |
|
|
287a3bb…
|
leo
|
137 |
@property |
|
287a3bb…
|
leo
|
138 |
def total_duration(self) -> float: |
|
287a3bb…
|
leo
|
139 |
return time.time() - self._start_time |
|
287a3bb…
|
leo
|
140 |
|
|
287a3bb…
|
leo
|
141 |
def format_summary(self) -> str: |
|
287a3bb…
|
leo
|
142 |
"""Format a human-readable summary for CLI output.""" |
|
287a3bb…
|
leo
|
143 |
lines = [] |
|
287a3bb…
|
leo
|
144 |
lines.append("") |
|
287a3bb…
|
leo
|
145 |
lines.append("=" * 60) |
|
287a3bb…
|
leo
|
146 |
lines.append(" PROCESSING SUMMARY") |
|
287a3bb…
|
leo
|
147 |
lines.append("=" * 60) |
|
287a3bb…
|
leo
|
148 |
|
|
287a3bb…
|
leo
|
149 |
# Timing |
|
287a3bb…
|
leo
|
150 |
total = self.total_duration |
|
287a3bb…
|
leo
|
151 |
lines.append(f"\n Total time: {_fmt_duration(total)}") |
|
287a3bb…
|
leo
|
152 |
if self._steps: |
|
287a3bb…
|
leo
|
153 |
lines.append("") |
|
287a3bb…
|
leo
|
154 |
max_name = max(len(s.name) for s in self._steps) |
|
287a3bb…
|
leo
|
155 |
for step in self._steps: |
|
287a3bb…
|
leo
|
156 |
pct = (step.duration / total * 100) if total > 0 else 0 |
|
287a3bb…
|
leo
|
157 |
bar_len = int(pct / 3) |
|
287a3bb…
|
leo
|
158 |
bar = "█" * bar_len + "░" * (20 - bar_len) |
|
287a3bb…
|
leo
|
159 |
lines.append( |
|
287a3bb…
|
leo
|
160 |
f" {step.name:<{max_name}} {_fmt_duration(step.duration):>8} " |
|
287a3bb…
|
leo
|
161 |
f"{bar} {pct:4.1f}%" |
|
287a3bb…
|
leo
|
162 |
) |
|
287a3bb…
|
leo
|
163 |
|
|
287a3bb…
|
leo
|
164 |
# API usage |
|
287a3bb…
|
leo
|
165 |
if self._models: |
|
287a3bb…
|
leo
|
166 |
lines.append(f"\n API Calls: {self.total_api_calls}") |
|
829e24a…
|
leo
|
167 |
lines.append( |
|
829e24a…
|
leo
|
168 |
f" Tokens: {self.total_tokens:,} " |
|
829e24a…
|
leo
|
169 |
f"({self.total_input_tokens:,} in / {self.total_output_tokens:,} out)" |
|
829e24a…
|
leo
|
170 |
) |
|
287a3bb…
|
leo
|
171 |
lines.append("") |
|
287a3bb…
|
leo
|
172 |
lines.append(f" {'Model':<35} {'Calls':>6} {'In Tok':>8} {'Out Tok':>8} {'Cost':>8}") |
|
829e24a…
|
leo
|
173 |
lines.append(f" {'-' * 35} {'-' * 6} {'-' * 8} {'-' * 8} {'-' * 8}") |
|
287a3bb…
|
leo
|
174 |
for key in sorted(self._models.keys()): |
|
287a3bb…
|
leo
|
175 |
u = self._models[key] |
|
287a3bb…
|
leo
|
176 |
cost_str = f"${u.estimated_cost:.4f}" if u.estimated_cost > 0 else "free" |
|
287a3bb…
|
leo
|
177 |
if u.audio_minutes > 0: |
|
287a3bb…
|
leo
|
178 |
lines.append( |
|
287a3bb…
|
leo
|
179 |
f" {key:<35} {u.calls:>6} {u.audio_minutes:>7.1f}m {'-':>8} {cost_str:>8}" |
|
287a3bb…
|
leo
|
180 |
) |
|
287a3bb…
|
leo
|
181 |
else: |
|
287a3bb…
|
leo
|
182 |
lines.append( |
|
829e24a…
|
leo
|
183 |
f" {key:<35} {u.calls:>6} " |
|
829e24a…
|
leo
|
184 |
f"{u.input_tokens:>8,} {u.output_tokens:>8,} {cost_str:>8}" |
|
287a3bb…
|
leo
|
185 |
) |
|
287a3bb…
|
leo
|
186 |
|
|
287a3bb…
|
leo
|
187 |
lines.append(f"\n Estimated total cost: ${self.total_cost:.4f}") |
|
287a3bb…
|
leo
|
188 |
|
|
287a3bb…
|
leo
|
189 |
lines.append("=" * 60) |
|
287a3bb…
|
leo
|
190 |
return "\n".join(lines) |
|
287a3bb…
|
leo
|
191 |
|
|
287a3bb…
|
leo
|
192 |
|
|
287a3bb…
|
leo
|
193 |
def _fmt_duration(seconds: float) -> str: |
|
287a3bb…
|
leo
|
194 |
"""Format seconds as human-readable duration.""" |
|
287a3bb…
|
leo
|
195 |
if seconds < 60: |
|
287a3bb…
|
leo
|
196 |
return f"{seconds:.1f}s" |
|
287a3bb…
|
leo
|
197 |
m = int(seconds // 60) |
|
287a3bb…
|
leo
|
198 |
s = seconds % 60 |
|
287a3bb…
|
leo
|
199 |
if m < 60: |
|
287a3bb…
|
leo
|
200 |
return f"{m}m {s:.0f}s" |
|
287a3bb…
|
leo
|
201 |
h = m // 60 |
|
287a3bb…
|
leo
|
202 |
m = m % 60 |
|
287a3bb…
|
leo
|
203 |
return f"{h}h {m}m {s:.0f}s" |