|
1
|
"""Usage tracking and cost estimation for API calls.""" |
|
2
|
|
|
3
|
import time |
|
4
|
from dataclasses import dataclass, field |
|
5
|
from typing import Optional |
|
6
|
|
|
7
|
# Cost per million tokens (USD) — updated Feb 2025 |
|
8
|
_MODEL_PRICING = { |
|
9
|
# Anthropic |
|
10
|
"claude-sonnet-4-5-20250929": {"input": 3.00, "output": 15.00}, |
|
11
|
"claude-haiku-3-5-20241022": {"input": 0.80, "output": 4.00}, |
|
12
|
# OpenAI |
|
13
|
"gpt-4o": {"input": 2.50, "output": 10.00}, |
|
14
|
"gpt-4o-mini": {"input": 0.15, "output": 0.60}, |
|
15
|
"gpt-4.1": {"input": 2.00, "output": 8.00}, |
|
16
|
"gpt-4.1-mini": {"input": 0.40, "output": 1.60}, |
|
17
|
"gpt-4.1-nano": {"input": 0.10, "output": 0.40}, |
|
18
|
# Google Gemini |
|
19
|
"gemini-2.5-flash": {"input": 0.15, "output": 0.60}, |
|
20
|
"gemini-2.5-pro": {"input": 1.25, "output": 10.00}, |
|
21
|
"gemini-2.0-flash": {"input": 0.10, "output": 0.40}, |
|
22
|
# Whisper |
|
23
|
"whisper-1": {"per_minute": 0.006}, |
|
24
|
} |
|
25
|
|
|
26
|
|
|
27
|
@dataclass |
|
28
|
class ModelUsage: |
|
29
|
"""Accumulated usage for a single model.""" |
|
30
|
|
|
31
|
provider: str = "" |
|
32
|
model: str = "" |
|
33
|
calls: int = 0 |
|
34
|
input_tokens: int = 0 |
|
35
|
output_tokens: int = 0 |
|
36
|
audio_minutes: float = 0.0 |
|
37
|
|
|
38
|
@property |
|
39
|
def total_tokens(self) -> int: |
|
40
|
return self.input_tokens + self.output_tokens |
|
41
|
|
|
42
|
@property |
|
43
|
def estimated_cost(self) -> float: |
|
44
|
pricing = _MODEL_PRICING.get(self.model) |
|
45
|
if not pricing: |
|
46
|
# Try partial match |
|
47
|
for key, p in _MODEL_PRICING.items(): |
|
48
|
if key in self.model or self.model in key: |
|
49
|
pricing = p |
|
50
|
break |
|
51
|
if not pricing: |
|
52
|
return 0.0 |
|
53
|
if "per_minute" in pricing: |
|
54
|
return self.audio_minutes * pricing["per_minute"] |
|
55
|
return ( |
|
56
|
self.input_tokens * pricing.get("input", 0) / 1_000_000 |
|
57
|
+ self.output_tokens * pricing.get("output", 0) / 1_000_000 |
|
58
|
) |
|
59
|
|
|
60
|
|
|
61
|
@dataclass |
|
62
|
class StepTiming: |
|
63
|
"""Timing for a single pipeline step.""" |
|
64
|
|
|
65
|
name: str |
|
66
|
start_time: float = 0.0 |
|
67
|
end_time: float = 0.0 |
|
68
|
|
|
69
|
@property |
|
70
|
def duration(self) -> float: |
|
71
|
if self.end_time and self.start_time: |
|
72
|
return self.end_time - self.start_time |
|
73
|
return 0.0 |
|
74
|
|
|
75
|
|
|
76
|
@dataclass |
|
77
|
class UsageTracker: |
|
78
|
"""Tracks API usage, costs, and timing across a pipeline run.""" |
|
79
|
|
|
80
|
_models: dict = field(default_factory=dict) |
|
81
|
_steps: list = field(default_factory=list) |
|
82
|
_current_step: Optional[StepTiming] = field(default=None) |
|
83
|
_start_time: float = field(default_factory=time.time) |
|
84
|
|
|
85
|
def record( |
|
86
|
self, |
|
87
|
provider: str, |
|
88
|
model: str, |
|
89
|
input_tokens: int = 0, |
|
90
|
output_tokens: int = 0, |
|
91
|
audio_minutes: float = 0.0, |
|
92
|
) -> None: |
|
93
|
"""Record usage for an API call.""" |
|
94
|
key = f"{provider}/{model}" |
|
95
|
if key not in self._models: |
|
96
|
self._models[key] = ModelUsage(provider=provider, model=model) |
|
97
|
usage = self._models[key] |
|
98
|
usage.calls += 1 |
|
99
|
usage.input_tokens += input_tokens |
|
100
|
usage.output_tokens += output_tokens |
|
101
|
usage.audio_minutes += audio_minutes |
|
102
|
|
|
103
|
def start_step(self, name: str) -> None: |
|
104
|
"""Start timing a pipeline step.""" |
|
105
|
if self._current_step: |
|
106
|
self._current_step.end_time = time.time() |
|
107
|
self._steps.append(self._current_step) |
|
108
|
self._current_step = StepTiming(name=name, start_time=time.time()) |
|
109
|
|
|
110
|
def end_step(self) -> None: |
|
111
|
"""End timing the current step.""" |
|
112
|
if self._current_step: |
|
113
|
self._current_step.end_time = time.time() |
|
114
|
self._steps.append(self._current_step) |
|
115
|
self._current_step = None |
|
116
|
|
|
117
|
@property |
|
118
|
def total_api_calls(self) -> int: |
|
119
|
return sum(u.calls for u in self._models.values()) |
|
120
|
|
|
121
|
@property |
|
122
|
def total_input_tokens(self) -> int: |
|
123
|
return sum(u.input_tokens for u in self._models.values()) |
|
124
|
|
|
125
|
@property |
|
126
|
def total_output_tokens(self) -> int: |
|
127
|
return sum(u.output_tokens for u in self._models.values()) |
|
128
|
|
|
129
|
@property |
|
130
|
def total_tokens(self) -> int: |
|
131
|
return self.total_input_tokens + self.total_output_tokens |
|
132
|
|
|
133
|
@property |
|
134
|
def total_cost(self) -> float: |
|
135
|
return sum(u.estimated_cost for u in self._models.values()) |
|
136
|
|
|
137
|
@property |
|
138
|
def total_duration(self) -> float: |
|
139
|
return time.time() - self._start_time |
|
140
|
|
|
141
|
def format_summary(self) -> str: |
|
142
|
"""Format a human-readable summary for CLI output.""" |
|
143
|
lines = [] |
|
144
|
lines.append("") |
|
145
|
lines.append("=" * 60) |
|
146
|
lines.append(" PROCESSING SUMMARY") |
|
147
|
lines.append("=" * 60) |
|
148
|
|
|
149
|
# Timing |
|
150
|
total = self.total_duration |
|
151
|
lines.append(f"\n Total time: {_fmt_duration(total)}") |
|
152
|
if self._steps: |
|
153
|
lines.append("") |
|
154
|
max_name = max(len(s.name) for s in self._steps) |
|
155
|
for step in self._steps: |
|
156
|
pct = (step.duration / total * 100) if total > 0 else 0 |
|
157
|
bar_len = int(pct / 3) |
|
158
|
bar = "█" * bar_len + "░" * (20 - bar_len) |
|
159
|
lines.append( |
|
160
|
f" {step.name:<{max_name}} {_fmt_duration(step.duration):>8} " |
|
161
|
f"{bar} {pct:4.1f}%" |
|
162
|
) |
|
163
|
|
|
164
|
# API usage |
|
165
|
if self._models: |
|
166
|
lines.append(f"\n API Calls: {self.total_api_calls}") |
|
167
|
lines.append( |
|
168
|
f" Tokens: {self.total_tokens:,} " |
|
169
|
f"({self.total_input_tokens:,} in / {self.total_output_tokens:,} out)" |
|
170
|
) |
|
171
|
lines.append("") |
|
172
|
lines.append(f" {'Model':<35} {'Calls':>6} {'In Tok':>8} {'Out Tok':>8} {'Cost':>8}") |
|
173
|
lines.append(f" {'-' * 35} {'-' * 6} {'-' * 8} {'-' * 8} {'-' * 8}") |
|
174
|
for key in sorted(self._models.keys()): |
|
175
|
u = self._models[key] |
|
176
|
cost_str = f"${u.estimated_cost:.4f}" if u.estimated_cost > 0 else "free" |
|
177
|
if u.audio_minutes > 0: |
|
178
|
lines.append( |
|
179
|
f" {key:<35} {u.calls:>6} {u.audio_minutes:>7.1f}m {'-':>8} {cost_str:>8}" |
|
180
|
) |
|
181
|
else: |
|
182
|
lines.append( |
|
183
|
f" {key:<35} {u.calls:>6} " |
|
184
|
f"{u.input_tokens:>8,} {u.output_tokens:>8,} {cost_str:>8}" |
|
185
|
) |
|
186
|
|
|
187
|
lines.append(f"\n Estimated total cost: ${self.total_cost:.4f}") |
|
188
|
|
|
189
|
lines.append("=" * 60) |
|
190
|
return "\n".join(lines) |
|
191
|
|
|
192
|
|
|
193
|
def _fmt_duration(seconds: float) -> str: |
|
194
|
"""Format seconds as human-readable duration.""" |
|
195
|
if seconds < 60: |
|
196
|
return f"{seconds:.1f}s" |
|
197
|
m = int(seconds // 60) |
|
198
|
s = seconds % 60 |
|
199
|
if m < 60: |
|
200
|
return f"{m}m {s:.0f}s" |
|
201
|
h = m // 60 |
|
202
|
m = m % 60 |
|
203
|
return f"{h}h {m}m {s:.0f}s" |
|
204
|
|