PlanOpticon

feat(extractor): add max_memory_mb safety valve for frame extraction

lmata 2026-03-07 22:15 trunk

Commit 4c7f23b4492fda26ce53c15d2bb30ec6b54383365dd8bb328c69c9f2811160aa

Parent 21f18d4a8d62489…

1 file changed +49

~ video_processor/extractors/frame_extractor.py

M video_processor/extractors/frame_extractor.py

+49

		--- video_processor/extractors/frame_extractor.py
		+++ video_processor/extractors/frame_extractor.py
		@@ -1,9 +1,11 @@
1	1	"""Frame extraction module for video processing."""
2	2
3	3	import functools
4	4	import logging
	5	+import sys
	6	+import tempfile
5	7	from pathlib import Path
6	8	from typing import List, Optional, Tuple, Union
7	9
8	10	import cv2
9	11	import numpy as np
		@@ -183,10 +185,11 @@
183	185	sampling_rate: float = 1.0,
184	186	change_threshold: float = 0.15,
185	187	periodic_capture_seconds: float = 30.0,
186	188	max_frames: Optional[int] = None,
187	189	resize_to: Optional[Tuple[int, int]] = None,
	190	+ max_memory_mb: int = 1024,
188	191	) -> List[np.ndarray]:
189	192	"""
190	193	Extract frames from video based on visual change detection + periodic capture.
191	194
192	195	Two capture strategies work together:
		@@ -209,10 +212,15 @@
209	212	Capture a frame every N seconds regardless of change (0 to disable)
210	213	max_frames : int, optional
211	214	Maximum number of frames to extract
212	215	resize_to : tuple of (width, height), optional
213	216	Resize frames to this dimension
	217	+ max_memory_mb : int
	218	+ Approximate memory limit in MB for held frames. When approaching this
	219	+ limit, frames are flushed to disk early and only paths are retained
	220	+ internally. The returned list still contains numpy arrays (reloaded
	221	+ from the temp files at the end). Default 1024 MB.
214	222
215	223	Returns
216	224	-------
217	225	list
218	226	List of extracted frames as numpy arrays
		@@ -247,10 +255,16 @@
247	255	extracted_frames = []
248	256	prev_frame = None
249	257	frame_idx = 0
250	258	last_capture_frame = -periodic_interval # allow first periodic capture immediately
251	259
	260	+ # Memory safety valve
	261	+ max_memory_bytes = max_memory_mb * 1024 * 1024
	262	+ approx_memory_used = 0
	263	+ _flush_dir = None # lazily created temp dir for flushed frames
	264	+ _flushed_paths: List[Path] = [] # paths of frames flushed to disk
	265	+
252	266	pbar = tqdm(
253	267	total=frame_count,
254	268	desc="Extracting frames",
255	269	unit="frame",
256	270	bar_format="{l_bar}{bar}\| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
		@@ -288,13 +302,31 @@
288	302	should_capture = True
289	303	reason = "periodic"
290	304
291	305	if should_capture:
292	306	extracted_frames.append(frame)
	307	+ approx_memory_used += sys.getsizeof(frame) + (
	308	+ frame.nbytes if hasattr(frame, "nbytes") else 0
	309	+ )
293	310	prev_frame = frame
294	311	last_capture_frame = frame_idx
295	312	logger.debug(f"Frame {frame_idx} extracted ({reason})")
	313	+
	314	+ # Memory safety valve: flush frames to disk when approaching limit
	315	+ if approx_memory_used >= max_memory_bytes * 0.9:
	316	+ if _flush_dir is None:
	317	+ _flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
	318	+ logger.info(
	319	+ f"Memory limit ~{max_memory_mb}MB approaching, "
	320	+ f"flushing frames to {_flush_dir}"
	321	+ )
	322	+ for fi, f in enumerate(extracted_frames):
	323	+ flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
	324	+ cv2.imwrite(str(flush_path), f)
	325	+ _flushed_paths.append(flush_path)
	326	+ extracted_frames.clear()
	327	+ approx_memory_used = 0
296	328
297	329	pbar.set_postfix(extracted=len(extracted_frames))
298	330
299	331	# Check if we've reached the maximum
300	332	if max_frames is not None and len(extracted_frames) >= max_frames:
		@@ -306,10 +338,27 @@
306	338	frame_idx += 1
307	339	pbar.update(frame_interval)
308	340
309	341	pbar.close()
310	342	cap.release()
	343	+
	344	+ # If frames were flushed to disk, reload them
	345	+ if _flushed_paths:
	346	+ reloaded = []
	347	+ for fp in _flushed_paths:
	348	+ img = cv2.imread(str(fp))
	349	+ if img is not None:
	350	+ reloaded.append(img)
	351	+ reloaded.extend(extracted_frames)
	352	+ extracted_frames = reloaded
	353	+ logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
	354	+ # Clean up temp files
	355	+ import shutil
	356	+
	357	+ if _flush_dir:
	358	+ shutil.rmtree(_flush_dir, ignore_errors=True)
	359	+
311	360	logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
312	361	return extracted_frames
313	362
314	363
315	364	def func_gpu(args, *kwargs):
316	365

	--- video_processor/extractors/frame_extractor.py
	+++ video_processor/extractors/frame_extractor.py
	@@ -1,9 +1,11 @@
1	"""Frame extraction module for video processing."""
2
3	import functools
4	import logging


5	from pathlib import Path
6	from typing import List, Optional, Tuple, Union
7
8	import cv2
9	import numpy as np
	@@ -183,10 +185,11 @@
183	sampling_rate: float = 1.0,
184	change_threshold: float = 0.15,
185	periodic_capture_seconds: float = 30.0,
186	max_frames: Optional[int] = None,
187	resize_to: Optional[Tuple[int, int]] = None,

188	) -> List[np.ndarray]:
189	"""
190	Extract frames from video based on visual change detection + periodic capture.
191
192	Two capture strategies work together:
	@@ -209,10 +212,15 @@
209	Capture a frame every N seconds regardless of change (0 to disable)
210	max_frames : int, optional
211	Maximum number of frames to extract
212	resize_to : tuple of (width, height), optional
213	Resize frames to this dimension





214
215	Returns
216	-------
217	list
218	List of extracted frames as numpy arrays
	@@ -247,10 +255,16 @@
247	extracted_frames = []
248	prev_frame = None
249	frame_idx = 0
250	last_capture_frame = -periodic_interval # allow first periodic capture immediately
251






252	pbar = tqdm(
253	total=frame_count,
254	desc="Extracting frames",
255	unit="frame",
256	bar_format="{l_bar}{bar}\| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
	@@ -288,13 +302,31 @@
288	should_capture = True
289	reason = "periodic"
290
291	if should_capture:
292	extracted_frames.append(frame)



293	prev_frame = frame
294	last_capture_frame = frame_idx
295	logger.debug(f"Frame {frame_idx} extracted ({reason})")















296
297	pbar.set_postfix(extracted=len(extracted_frames))
298
299	# Check if we've reached the maximum
300	if max_frames is not None and len(extracted_frames) >= max_frames:
	@@ -306,10 +338,27 @@
306	frame_idx += 1
307	pbar.update(frame_interval)
308
309	pbar.close()
310	cap.release()

















311	logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
312	return extracted_frames
313
314
315	def func_gpu(args, *kwargs):
316

	--- video_processor/extractors/frame_extractor.py
	+++ video_processor/extractors/frame_extractor.py
	@@ -1,9 +1,11 @@
1	"""Frame extraction module for video processing."""
2
3	import functools
4	import logging
5	import sys
6	import tempfile
7	from pathlib import Path
8	from typing import List, Optional, Tuple, Union
9
10	import cv2
11	import numpy as np
	@@ -183,10 +185,11 @@
185	sampling_rate: float = 1.0,
186	change_threshold: float = 0.15,
187	periodic_capture_seconds: float = 30.0,
188	max_frames: Optional[int] = None,
189	resize_to: Optional[Tuple[int, int]] = None,
190	max_memory_mb: int = 1024,
191	) -> List[np.ndarray]:
192	"""
193	Extract frames from video based on visual change detection + periodic capture.
194
195	Two capture strategies work together:
	@@ -209,10 +212,15 @@
212	Capture a frame every N seconds regardless of change (0 to disable)
213	max_frames : int, optional
214	Maximum number of frames to extract
215	resize_to : tuple of (width, height), optional
216	Resize frames to this dimension
217	max_memory_mb : int
218	Approximate memory limit in MB for held frames. When approaching this
219	limit, frames are flushed to disk early and only paths are retained
220	internally. The returned list still contains numpy arrays (reloaded
221	from the temp files at the end). Default 1024 MB.
222
223	Returns
224	-------
225	list
226	List of extracted frames as numpy arrays
	@@ -247,10 +255,16 @@
255	extracted_frames = []
256	prev_frame = None
257	frame_idx = 0
258	last_capture_frame = -periodic_interval # allow first periodic capture immediately
259
260	# Memory safety valve
261	max_memory_bytes = max_memory_mb * 1024 * 1024
262	approx_memory_used = 0
263	_flush_dir = None # lazily created temp dir for flushed frames
264	_flushed_paths: List[Path] = [] # paths of frames flushed to disk
265
266	pbar = tqdm(
267	total=frame_count,
268	desc="Extracting frames",
269	unit="frame",
270	bar_format="{l_bar}{bar}\| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
	@@ -288,13 +302,31 @@
302	should_capture = True
303	reason = "periodic"
304
305	if should_capture:
306	extracted_frames.append(frame)
307	approx_memory_used += sys.getsizeof(frame) + (
308	frame.nbytes if hasattr(frame, "nbytes") else 0
309	)
310	prev_frame = frame
311	last_capture_frame = frame_idx
312	logger.debug(f"Frame {frame_idx} extracted ({reason})")
313
314	# Memory safety valve: flush frames to disk when approaching limit
315	if approx_memory_used >= max_memory_bytes * 0.9:
316	if _flush_dir is None:
317	_flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
318	logger.info(
319	f"Memory limit ~{max_memory_mb}MB approaching, "
320	f"flushing frames to {_flush_dir}"
321	)
322	for fi, f in enumerate(extracted_frames):
323	flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
324	cv2.imwrite(str(flush_path), f)
325	_flushed_paths.append(flush_path)
326	extracted_frames.clear()
327	approx_memory_used = 0
328
329	pbar.set_postfix(extracted=len(extracted_frames))
330
331	# Check if we've reached the maximum
332	if max_frames is not None and len(extracted_frames) >= max_frames:
	@@ -306,10 +338,27 @@
338	frame_idx += 1
339	pbar.update(frame_interval)
340
341	pbar.close()
342	cap.release()
343
344	# If frames were flushed to disk, reload them
345	if _flushed_paths:
346	reloaded = []
347	for fp in _flushed_paths:
348	img = cv2.imread(str(fp))
349	if img is not None:
350	reloaded.append(img)
351	reloaded.extend(extracted_frames)
352	extracted_frames = reloaded
353	logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
354	# Clean up temp files
355	import shutil
356
357	if _flush_dir:
358	shutil.rmtree(_flush_dir, ignore_errors=True)
359
360	logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
361	return extracted_frames
362
363
364	def func_gpu(args, *kwargs):
365

PlanOpticon

Keyboard Shortcuts