PlanOpticon

feat(extractor): add max_memory_mb safety valve for frame extraction

lmata 2026-03-07 22:15 trunk
Commit 4c7f23b4492fda26ce53c15d2bb30ec6b54383365dd8bb328c69c9f2811160aa
--- video_processor/extractors/frame_extractor.py
+++ video_processor/extractors/frame_extractor.py
@@ -1,9 +1,11 @@
11
"""Frame extraction module for video processing."""
22
33
import functools
44
import logging
5
+import sys
6
+import tempfile
57
from pathlib import Path
68
from typing import List, Optional, Tuple, Union
79
810
import cv2
911
import numpy as np
@@ -183,10 +185,11 @@
183185
sampling_rate: float = 1.0,
184186
change_threshold: float = 0.15,
185187
periodic_capture_seconds: float = 30.0,
186188
max_frames: Optional[int] = None,
187189
resize_to: Optional[Tuple[int, int]] = None,
190
+ max_memory_mb: int = 1024,
188191
) -> List[np.ndarray]:
189192
"""
190193
Extract frames from video based on visual change detection + periodic capture.
191194
192195
Two capture strategies work together:
@@ -209,10 +212,15 @@
209212
Capture a frame every N seconds regardless of change (0 to disable)
210213
max_frames : int, optional
211214
Maximum number of frames to extract
212215
resize_to : tuple of (width, height), optional
213216
Resize frames to this dimension
217
+ max_memory_mb : int
218
+ Approximate memory limit in MB for held frames. When approaching this
219
+ limit, frames are flushed to disk early and only paths are retained
220
+ internally. The returned list still contains numpy arrays (reloaded
221
+ from the temp files at the end). Default 1024 MB.
214222
215223
Returns
216224
-------
217225
list
218226
List of extracted frames as numpy arrays
@@ -247,10 +255,16 @@
247255
extracted_frames = []
248256
prev_frame = None
249257
frame_idx = 0
250258
last_capture_frame = -periodic_interval # allow first periodic capture immediately
251259
260
+ # Memory safety valve
261
+ max_memory_bytes = max_memory_mb * 1024 * 1024
262
+ approx_memory_used = 0
263
+ _flush_dir = None # lazily created temp dir for flushed frames
264
+ _flushed_paths: List[Path] = [] # paths of frames flushed to disk
265
+
252266
pbar = tqdm(
253267
total=frame_count,
254268
desc="Extracting frames",
255269
unit="frame",
256270
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
@@ -288,13 +302,31 @@
288302
should_capture = True
289303
reason = "periodic"
290304
291305
if should_capture:
292306
extracted_frames.append(frame)
307
+ approx_memory_used += sys.getsizeof(frame) + (
308
+ frame.nbytes if hasattr(frame, "nbytes") else 0
309
+ )
293310
prev_frame = frame
294311
last_capture_frame = frame_idx
295312
logger.debug(f"Frame {frame_idx} extracted ({reason})")
313
+
314
+ # Memory safety valve: flush frames to disk when approaching limit
315
+ if approx_memory_used >= max_memory_bytes * 0.9:
316
+ if _flush_dir is None:
317
+ _flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
318
+ logger.info(
319
+ f"Memory limit ~{max_memory_mb}MB approaching, "
320
+ f"flushing frames to {_flush_dir}"
321
+ )
322
+ for fi, f in enumerate(extracted_frames):
323
+ flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
324
+ cv2.imwrite(str(flush_path), f)
325
+ _flushed_paths.append(flush_path)
326
+ extracted_frames.clear()
327
+ approx_memory_used = 0
296328
297329
pbar.set_postfix(extracted=len(extracted_frames))
298330
299331
# Check if we've reached the maximum
300332
if max_frames is not None and len(extracted_frames) >= max_frames:
@@ -306,10 +338,27 @@
306338
frame_idx += 1
307339
pbar.update(frame_interval)
308340
309341
pbar.close()
310342
cap.release()
343
+
344
+ # If frames were flushed to disk, reload them
345
+ if _flushed_paths:
346
+ reloaded = []
347
+ for fp in _flushed_paths:
348
+ img = cv2.imread(str(fp))
349
+ if img is not None:
350
+ reloaded.append(img)
351
+ reloaded.extend(extracted_frames)
352
+ extracted_frames = reloaded
353
+ logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
354
+ # Clean up temp files
355
+ import shutil
356
+
357
+ if _flush_dir:
358
+ shutil.rmtree(_flush_dir, ignore_errors=True)
359
+
311360
logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
312361
return extracted_frames
313362
314363
315364
def func_gpu(*args, **kwargs):
316365
--- video_processor/extractors/frame_extractor.py
+++ video_processor/extractors/frame_extractor.py
@@ -1,9 +1,11 @@
1 """Frame extraction module for video processing."""
2
3 import functools
4 import logging
 
 
5 from pathlib import Path
6 from typing import List, Optional, Tuple, Union
7
8 import cv2
9 import numpy as np
@@ -183,10 +185,11 @@
183 sampling_rate: float = 1.0,
184 change_threshold: float = 0.15,
185 periodic_capture_seconds: float = 30.0,
186 max_frames: Optional[int] = None,
187 resize_to: Optional[Tuple[int, int]] = None,
 
188 ) -> List[np.ndarray]:
189 """
190 Extract frames from video based on visual change detection + periodic capture.
191
192 Two capture strategies work together:
@@ -209,10 +212,15 @@
209 Capture a frame every N seconds regardless of change (0 to disable)
210 max_frames : int, optional
211 Maximum number of frames to extract
212 resize_to : tuple of (width, height), optional
213 Resize frames to this dimension
 
 
 
 
 
214
215 Returns
216 -------
217 list
218 List of extracted frames as numpy arrays
@@ -247,10 +255,16 @@
247 extracted_frames = []
248 prev_frame = None
249 frame_idx = 0
250 last_capture_frame = -periodic_interval # allow first periodic capture immediately
251
 
 
 
 
 
 
252 pbar = tqdm(
253 total=frame_count,
254 desc="Extracting frames",
255 unit="frame",
256 bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
@@ -288,13 +302,31 @@
288 should_capture = True
289 reason = "periodic"
290
291 if should_capture:
292 extracted_frames.append(frame)
 
 
 
293 prev_frame = frame
294 last_capture_frame = frame_idx
295 logger.debug(f"Frame {frame_idx} extracted ({reason})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
297 pbar.set_postfix(extracted=len(extracted_frames))
298
299 # Check if we've reached the maximum
300 if max_frames is not None and len(extracted_frames) >= max_frames:
@@ -306,10 +338,27 @@
306 frame_idx += 1
307 pbar.update(frame_interval)
308
309 pbar.close()
310 cap.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311 logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
312 return extracted_frames
313
314
315 def func_gpu(*args, **kwargs):
316
--- video_processor/extractors/frame_extractor.py
+++ video_processor/extractors/frame_extractor.py
@@ -1,9 +1,11 @@
1 """Frame extraction module for video processing."""
2
3 import functools
4 import logging
5 import sys
6 import tempfile
7 from pathlib import Path
8 from typing import List, Optional, Tuple, Union
9
10 import cv2
11 import numpy as np
@@ -183,10 +185,11 @@
185 sampling_rate: float = 1.0,
186 change_threshold: float = 0.15,
187 periodic_capture_seconds: float = 30.0,
188 max_frames: Optional[int] = None,
189 resize_to: Optional[Tuple[int, int]] = None,
190 max_memory_mb: int = 1024,
191 ) -> List[np.ndarray]:
192 """
193 Extract frames from video based on visual change detection + periodic capture.
194
195 Two capture strategies work together:
@@ -209,10 +212,15 @@
212 Capture a frame every N seconds regardless of change (0 to disable)
213 max_frames : int, optional
214 Maximum number of frames to extract
215 resize_to : tuple of (width, height), optional
216 Resize frames to this dimension
217 max_memory_mb : int
218 Approximate memory limit in MB for held frames. When approaching this
219 limit, frames are flushed to disk early and only paths are retained
220 internally. The returned list still contains numpy arrays (reloaded
221 from the temp files at the end). Default 1024 MB.
222
223 Returns
224 -------
225 list
226 List of extracted frames as numpy arrays
@@ -247,10 +255,16 @@
255 extracted_frames = []
256 prev_frame = None
257 frame_idx = 0
258 last_capture_frame = -periodic_interval # allow first periodic capture immediately
259
260 # Memory safety valve
261 max_memory_bytes = max_memory_mb * 1024 * 1024
262 approx_memory_used = 0
263 _flush_dir = None # lazily created temp dir for flushed frames
264 _flushed_paths: List[Path] = [] # paths of frames flushed to disk
265
266 pbar = tqdm(
267 total=frame_count,
268 desc="Extracting frames",
269 unit="frame",
270 bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
@@ -288,13 +302,31 @@
302 should_capture = True
303 reason = "periodic"
304
305 if should_capture:
306 extracted_frames.append(frame)
307 approx_memory_used += sys.getsizeof(frame) + (
308 frame.nbytes if hasattr(frame, "nbytes") else 0
309 )
310 prev_frame = frame
311 last_capture_frame = frame_idx
312 logger.debug(f"Frame {frame_idx} extracted ({reason})")
313
314 # Memory safety valve: flush frames to disk when approaching limit
315 if approx_memory_used >= max_memory_bytes * 0.9:
316 if _flush_dir is None:
317 _flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
318 logger.info(
319 f"Memory limit ~{max_memory_mb}MB approaching, "
320 f"flushing frames to {_flush_dir}"
321 )
322 for fi, f in enumerate(extracted_frames):
323 flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
324 cv2.imwrite(str(flush_path), f)
325 _flushed_paths.append(flush_path)
326 extracted_frames.clear()
327 approx_memory_used = 0
328
329 pbar.set_postfix(extracted=len(extracted_frames))
330
331 # Check if we've reached the maximum
332 if max_frames is not None and len(extracted_frames) >= max_frames:
@@ -306,10 +338,27 @@
338 frame_idx += 1
339 pbar.update(frame_interval)
340
341 pbar.close()
342 cap.release()
343
344 # If frames were flushed to disk, reload them
345 if _flushed_paths:
346 reloaded = []
347 for fp in _flushed_paths:
348 img = cv2.imread(str(fp))
349 if img is not None:
350 reloaded.append(img)
351 reloaded.extend(extracted_frames)
352 extracted_frames = reloaded
353 logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
354 # Clean up temp files
355 import shutil
356
357 if _flush_dir:
358 shutil.rmtree(_flush_dir, ignore_errors=True)
359
360 logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
361 return extracted_frames
362
363
364 def func_gpu(*args, **kwargs):
365

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button