|
287a3bb…
|
leo
|
1 |
"""Caching system for API responses to reduce API calls and costs.""" |
|
829e24a…
|
leo
|
2 |
|
|
829e24a…
|
leo
|
3 |
import hashlib |
|
287a3bb…
|
leo
|
4 |
import json |
|
287a3bb…
|
leo
|
5 |
import logging |
|
287a3bb…
|
leo
|
6 |
import os |
|
287a3bb…
|
leo
|
7 |
import time |
|
287a3bb…
|
leo
|
8 |
from pathlib import Path |
|
287a3bb…
|
leo
|
9 |
from typing import Any, Dict, Optional, Union |
|
287a3bb…
|
leo
|
10 |
|
|
287a3bb…
|
leo
|
11 |
logger = logging.getLogger(__name__) |
|
287a3bb…
|
leo
|
12 |
|
|
829e24a…
|
leo
|
13 |
|
|
287a3bb…
|
leo
|
14 |
class ApiCache: |
|
287a3bb…
|
leo
|
15 |
"""Disk-based API response cache.""" |
|
829e24a…
|
leo
|
16 |
|
|
287a3bb…
|
leo
|
17 |
def __init__( |
|
829e24a…
|
leo
|
18 |
self, |
|
829e24a…
|
leo
|
19 |
cache_dir: Union[str, Path], |
|
287a3bb…
|
leo
|
20 |
namespace: str = "default", |
|
829e24a…
|
leo
|
21 |
ttl: int = 86400, # 24 hours in seconds |
|
287a3bb…
|
leo
|
22 |
): |
|
287a3bb…
|
leo
|
23 |
""" |
|
287a3bb…
|
leo
|
24 |
Initialize API cache. |
|
829e24a…
|
leo
|
25 |
|
|
287a3bb…
|
leo
|
26 |
Parameters |
|
287a3bb…
|
leo
|
27 |
---------- |
|
287a3bb…
|
leo
|
28 |
cache_dir : str or Path |
|
287a3bb…
|
leo
|
29 |
Directory for cache files |
|
287a3bb…
|
leo
|
30 |
namespace : str |
|
287a3bb…
|
leo
|
31 |
Cache namespace for organizing cache files |
|
287a3bb…
|
leo
|
32 |
ttl : int |
|
287a3bb…
|
leo
|
33 |
Time-to-live for cache entries in seconds |
|
287a3bb…
|
leo
|
34 |
""" |
|
287a3bb…
|
leo
|
35 |
self.cache_dir = Path(cache_dir) |
|
287a3bb…
|
leo
|
36 |
self.namespace = namespace |
|
287a3bb…
|
leo
|
37 |
self.ttl = ttl |
|
829e24a…
|
leo
|
38 |
|
|
287a3bb…
|
leo
|
39 |
# Ensure namespace directory exists |
|
287a3bb…
|
leo
|
40 |
self.namespace_dir = self.cache_dir / namespace |
|
287a3bb…
|
leo
|
41 |
self.namespace_dir.mkdir(parents=True, exist_ok=True) |
|
829e24a…
|
leo
|
42 |
|
|
287a3bb…
|
leo
|
43 |
logger.debug(f"Initialized API cache in {self.namespace_dir}") |
|
829e24a…
|
leo
|
44 |
|
|
287a3bb…
|
leo
|
45 |
def get_cache_path(self, key: str) -> Path: |
|
287a3bb…
|
leo
|
46 |
""" |
|
287a3bb…
|
leo
|
47 |
Get path to cache file for key. |
|
829e24a…
|
leo
|
48 |
|
|
287a3bb…
|
leo
|
49 |
Parameters |
|
287a3bb…
|
leo
|
50 |
---------- |
|
287a3bb…
|
leo
|
51 |
key : str |
|
287a3bb…
|
leo
|
52 |
Cache key |
|
829e24a…
|
leo
|
53 |
|
|
287a3bb…
|
leo
|
54 |
Returns |
|
287a3bb…
|
leo
|
55 |
------- |
|
287a3bb…
|
leo
|
56 |
Path |
|
287a3bb…
|
leo
|
57 |
Path to cache file |
|
287a3bb…
|
leo
|
58 |
""" |
|
287a3bb…
|
leo
|
59 |
# Hash the key to ensure valid filename |
|
287a3bb…
|
leo
|
60 |
hashed_key = hashlib.md5(key.encode()).hexdigest() |
|
287a3bb…
|
leo
|
61 |
return self.namespace_dir / f"{hashed_key}.json" |
|
829e24a…
|
leo
|
62 |
|
|
287a3bb…
|
leo
|
63 |
def get(self, key: str) -> Optional[Any]: |
|
287a3bb…
|
leo
|
64 |
""" |
|
287a3bb…
|
leo
|
65 |
Get value from cache. |
|
829e24a…
|
leo
|
66 |
|
|
287a3bb…
|
leo
|
67 |
Parameters |
|
287a3bb…
|
leo
|
68 |
---------- |
|
287a3bb…
|
leo
|
69 |
key : str |
|
287a3bb…
|
leo
|
70 |
Cache key |
|
829e24a…
|
leo
|
71 |
|
|
287a3bb…
|
leo
|
72 |
Returns |
|
287a3bb…
|
leo
|
73 |
------- |
|
287a3bb…
|
leo
|
74 |
object or None |
|
287a3bb…
|
leo
|
75 |
Cached value if available and not expired, None otherwise |
|
287a3bb…
|
leo
|
76 |
""" |
|
287a3bb…
|
leo
|
77 |
cache_path = self.get_cache_path(key) |
|
829e24a…
|
leo
|
78 |
|
|
287a3bb…
|
leo
|
79 |
# Check if cache file exists |
|
287a3bb…
|
leo
|
80 |
if not cache_path.exists(): |
|
287a3bb…
|
leo
|
81 |
return None |
|
829e24a…
|
leo
|
82 |
|
|
287a3bb…
|
leo
|
83 |
try: |
|
287a3bb…
|
leo
|
84 |
# Read cache file |
|
287a3bb…
|
leo
|
85 |
with open(cache_path, "r", encoding="utf-8") as f: |
|
287a3bb…
|
leo
|
86 |
cache_data = json.load(f) |
|
829e24a…
|
leo
|
87 |
|
|
287a3bb…
|
leo
|
88 |
# Check if cache entry is expired |
|
287a3bb…
|
leo
|
89 |
timestamp = cache_data.get("timestamp", 0) |
|
287a3bb…
|
leo
|
90 |
now = time.time() |
|
829e24a…
|
leo
|
91 |
|
|
287a3bb…
|
leo
|
92 |
if now - timestamp > self.ttl: |
|
287a3bb…
|
leo
|
93 |
logger.debug(f"Cache entry expired for {key}") |
|
287a3bb…
|
leo
|
94 |
return None |
|
829e24a…
|
leo
|
95 |
|
|
287a3bb…
|
leo
|
96 |
logger.debug(f"Cache hit for {key}") |
|
287a3bb…
|
leo
|
97 |
return cache_data.get("value") |
|
829e24a…
|
leo
|
98 |
|
|
287a3bb…
|
leo
|
99 |
except Exception as e: |
|
287a3bb…
|
leo
|
100 |
logger.warning(f"Error reading cache: {str(e)}") |
|
287a3bb…
|
leo
|
101 |
return None |
|
829e24a…
|
leo
|
102 |
|
|
287a3bb…
|
leo
|
103 |
def set(self, key: str, value: Any) -> bool: |
|
287a3bb…
|
leo
|
104 |
""" |
|
287a3bb…
|
leo
|
105 |
Set value in cache. |
|
829e24a…
|
leo
|
106 |
|
|
287a3bb…
|
leo
|
107 |
Parameters |
|
287a3bb…
|
leo
|
108 |
---------- |
|
287a3bb…
|
leo
|
109 |
key : str |
|
287a3bb…
|
leo
|
110 |
Cache key |
|
287a3bb…
|
leo
|
111 |
value : object |
|
287a3bb…
|
leo
|
112 |
Value to cache (must be JSON serializable) |
|
829e24a…
|
leo
|
113 |
|
|
287a3bb…
|
leo
|
114 |
Returns |
|
287a3bb…
|
leo
|
115 |
------- |
|
287a3bb…
|
leo
|
116 |
bool |
|
287a3bb…
|
leo
|
117 |
True if successful, False otherwise |
|
287a3bb…
|
leo
|
118 |
""" |
|
287a3bb…
|
leo
|
119 |
cache_path = self.get_cache_path(key) |
|
829e24a…
|
leo
|
120 |
|
|
287a3bb…
|
leo
|
121 |
try: |
|
287a3bb…
|
leo
|
122 |
# Prepare cache data |
|
829e24a…
|
leo
|
123 |
cache_data = {"timestamp": time.time(), "value": value} |
|
829e24a…
|
leo
|
124 |
|
|
287a3bb…
|
leo
|
125 |
# Write to cache file |
|
287a3bb…
|
leo
|
126 |
with open(cache_path, "w", encoding="utf-8") as f: |
|
287a3bb…
|
leo
|
127 |
json.dump(cache_data, f, ensure_ascii=False) |
|
829e24a…
|
leo
|
128 |
|
|
287a3bb…
|
leo
|
129 |
logger.debug(f"Cached value for {key}") |
|
287a3bb…
|
leo
|
130 |
return True |
|
829e24a…
|
leo
|
131 |
|
|
287a3bb…
|
leo
|
132 |
except Exception as e: |
|
287a3bb…
|
leo
|
133 |
logger.warning(f"Error writing to cache: {str(e)}") |
|
287a3bb…
|
leo
|
134 |
return False |
|
829e24a…
|
leo
|
135 |
|
|
287a3bb…
|
leo
|
136 |
def invalidate(self, key: str) -> bool: |
|
287a3bb…
|
leo
|
137 |
""" |
|
287a3bb…
|
leo
|
138 |
Invalidate cache entry. |
|
829e24a…
|
leo
|
139 |
|
|
287a3bb…
|
leo
|
140 |
Parameters |
|
287a3bb…
|
leo
|
141 |
---------- |
|
287a3bb…
|
leo
|
142 |
key : str |
|
287a3bb…
|
leo
|
143 |
Cache key |
|
829e24a…
|
leo
|
144 |
|
|
287a3bb…
|
leo
|
145 |
Returns |
|
287a3bb…
|
leo
|
146 |
------- |
|
287a3bb…
|
leo
|
147 |
bool |
|
287a3bb…
|
leo
|
148 |
True if entry was removed, False otherwise |
|
287a3bb…
|
leo
|
149 |
""" |
|
287a3bb…
|
leo
|
150 |
cache_path = self.get_cache_path(key) |
|
829e24a…
|
leo
|
151 |
|
|
287a3bb…
|
leo
|
152 |
if cache_path.exists(): |
|
287a3bb…
|
leo
|
153 |
try: |
|
287a3bb…
|
leo
|
154 |
os.remove(cache_path) |
|
287a3bb…
|
leo
|
155 |
logger.debug(f"Invalidated cache for {key}") |
|
287a3bb…
|
leo
|
156 |
return True |
|
287a3bb…
|
leo
|
157 |
except Exception as e: |
|
287a3bb…
|
leo
|
158 |
logger.warning(f"Error invalidating cache: {str(e)}") |
|
829e24a…
|
leo
|
159 |
|
|
287a3bb…
|
leo
|
160 |
return False |
|
829e24a…
|
leo
|
161 |
|
|
287a3bb…
|
leo
|
162 |
def clear(self, older_than: Optional[int] = None) -> int: |
|
287a3bb…
|
leo
|
163 |
""" |
|
287a3bb…
|
leo
|
164 |
Clear all cache entries or entries older than specified time. |
|
829e24a…
|
leo
|
165 |
|
|
287a3bb…
|
leo
|
166 |
Parameters |
|
287a3bb…
|
leo
|
167 |
---------- |
|
287a3bb…
|
leo
|
168 |
older_than : int, optional |
|
287a3bb…
|
leo
|
169 |
Clear entries older than this many seconds |
|
829e24a…
|
leo
|
170 |
|
|
287a3bb…
|
leo
|
171 |
Returns |
|
287a3bb…
|
leo
|
172 |
------- |
|
287a3bb…
|
leo
|
173 |
int |
|
287a3bb…
|
leo
|
174 |
Number of entries cleared |
|
287a3bb…
|
leo
|
175 |
""" |
|
287a3bb…
|
leo
|
176 |
count = 0 |
|
287a3bb…
|
leo
|
177 |
now = time.time() |
|
829e24a…
|
leo
|
178 |
|
|
287a3bb…
|
leo
|
179 |
for cache_file in self.namespace_dir.glob("*.json"): |
|
287a3bb…
|
leo
|
180 |
try: |
|
287a3bb…
|
leo
|
181 |
# Check file age if criteria provided |
|
287a3bb…
|
leo
|
182 |
if older_than is not None: |
|
287a3bb…
|
leo
|
183 |
file_age = now - os.path.getmtime(cache_file) |
|
287a3bb…
|
leo
|
184 |
if file_age <= older_than: |
|
287a3bb…
|
leo
|
185 |
continue |
|
829e24a…
|
leo
|
186 |
|
|
287a3bb…
|
leo
|
187 |
# Remove file |
|
287a3bb…
|
leo
|
188 |
os.remove(cache_file) |
|
287a3bb…
|
leo
|
189 |
count += 1 |
|
829e24a…
|
leo
|
190 |
|
|
287a3bb…
|
leo
|
191 |
except Exception as e: |
|
287a3bb…
|
leo
|
192 |
logger.warning(f"Error clearing cache file {cache_file}: {str(e)}") |
|
829e24a…
|
leo
|
193 |
|
|
287a3bb…
|
leo
|
194 |
logger.info(f"Cleared {count} cache entries from {self.namespace}") |
|
287a3bb…
|
leo
|
195 |
return count |
|
829e24a…
|
leo
|
196 |
|
|
287a3bb…
|
leo
|
197 |
def get_stats(self) -> Dict: |
|
287a3bb…
|
leo
|
198 |
""" |
|
287a3bb…
|
leo
|
199 |
Get cache statistics. |
|
829e24a…
|
leo
|
200 |
|
|
287a3bb…
|
leo
|
201 |
Returns |
|
287a3bb…
|
leo
|
202 |
------- |
|
287a3bb…
|
leo
|
203 |
dict |
|
287a3bb…
|
leo
|
204 |
Cache statistics |
|
287a3bb…
|
leo
|
205 |
""" |
|
287a3bb…
|
leo
|
206 |
cache_files = list(self.namespace_dir.glob("*.json")) |
|
287a3bb…
|
leo
|
207 |
total_size = sum(os.path.getsize(f) for f in cache_files) |
|
829e24a…
|
leo
|
208 |
|
|
287a3bb…
|
leo
|
209 |
# Analyze age distribution |
|
287a3bb…
|
leo
|
210 |
now = time.time() |
|
829e24a…
|
leo
|
211 |
age_distribution = {"1h": 0, "6h": 0, "24h": 0, "older": 0} |
|
829e24a…
|
leo
|
212 |
|
|
287a3bb…
|
leo
|
213 |
for cache_file in cache_files: |
|
287a3bb…
|
leo
|
214 |
file_age = now - os.path.getmtime(cache_file) |
|
829e24a…
|
leo
|
215 |
|
|
287a3bb…
|
leo
|
216 |
if file_age <= 3600: # 1 hour |
|
287a3bb…
|
leo
|
217 |
age_distribution["1h"] += 1 |
|
287a3bb…
|
leo
|
218 |
elif file_age <= 21600: # 6 hours |
|
287a3bb…
|
leo
|
219 |
age_distribution["6h"] += 1 |
|
287a3bb…
|
leo
|
220 |
elif file_age <= 86400: # 24 hours |
|
287a3bb…
|
leo
|
221 |
age_distribution["24h"] += 1 |
|
287a3bb…
|
leo
|
222 |
else: |
|
287a3bb…
|
leo
|
223 |
age_distribution["older"] += 1 |
|
829e24a…
|
leo
|
224 |
|
|
287a3bb…
|
leo
|
225 |
return { |
|
287a3bb…
|
leo
|
226 |
"namespace": self.namespace, |
|
287a3bb…
|
leo
|
227 |
"entry_count": len(cache_files), |
|
287a3bb…
|
leo
|
228 |
"total_size_bytes": total_size, |
|
829e24a…
|
leo
|
229 |
"age_distribution": age_distribution, |
|
287a3bb…
|
leo
|
230 |
} |