PlanOpticon

planopticon / video_processor / extractors / frame_extractor.py

Source Blame History 401 lines

287a3bb…	leo	1	"""Frame extraction module for video processing."""
829e24a…	leo	2
287a3bb…	leo	3	import functools
287a3bb…	leo	4	import logging
0981a08…	noreply	5	import sys
0981a08…	noreply	6	import tempfile
287a3bb…	leo	7	from pathlib import Path
287a3bb…	leo	8	from typing import List, Optional, Tuple, Union
287a3bb…	leo	9
287a3bb…	leo	10	import cv2
287a3bb…	leo	11	import numpy as np
287a3bb…	leo	12	from tqdm import tqdm
287a3bb…	leo	13
287a3bb…	leo	14	logger = logging.getLogger(__name__)
287a3bb…	leo	15
287a3bb…	leo	16	# Haar cascade for face detection — ships with OpenCV
287a3bb…	leo	17	_FACE_CASCADE_PATH = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
287a3bb…	leo	18	_FACE_CASCADE = None
287a3bb…	leo	19
287a3bb…	leo	20
287a3bb…	leo	21	def _get_face_cascade() -> cv2.CascadeClassifier:
287a3bb…	leo	22	"""Lazy-load the face cascade classifier."""
287a3bb…	leo	23	global _FACE_CASCADE
287a3bb…	leo	24	if _FACE_CASCADE is None:
287a3bb…	leo	25	_FACE_CASCADE = cv2.CascadeClassifier(_FACE_CASCADE_PATH)
287a3bb…	leo	26	return _FACE_CASCADE
287a3bb…	leo	27
287a3bb…	leo	28
287a3bb…	leo	29	def detect_faces(frame: np.ndarray) -> List[Tuple[int, int, int, int]]:
287a3bb…	leo	30	"""Detect faces in a frame using Haar cascade. Returns list of (x, y, w, h)."""
287a3bb…	leo	31	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
287a3bb…	leo	32	cascade = _get_face_cascade()
287a3bb…	leo	33	faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40))
287a3bb…	leo	34	return list(faces) if len(faces) > 0 else []
287a3bb…	leo	35
287a3bb…	leo	36
287a3bb…	leo	37	def is_people_frame(
287a3bb…	leo	38	frame: np.ndarray,
287a3bb…	leo	39	face_area_threshold: float = 0.03,
287a3bb…	leo	40	min_face_size: int = 90,
287a3bb…	leo	41	) -> bool:
287a3bb…	leo	42	"""
287a3bb…	leo	43	Determine if a frame is primarily showing people (webcam/video conference).
287a3bb…	leo	44
287a3bb…	leo	45	Heuristics:
287a3bb…	leo	46	1. Face detection — if significant faces occupy enough frame area
287a3bb…	leo	47	2. Black bar detection — video conferences often have thick black bars
287a3bb…	leo	48	3. Small faces with black bars — profile pictures in conference UI
287a3bb…	leo	49
287a3bb…	leo	50	Faces smaller than min_face_size are ignored (sidebar thumbnails in screen shares).
287a3bb…	leo	51
287a3bb…	leo	52	Parameters
287a3bb…	leo	53	----------
287a3bb…	leo	54	frame : np.ndarray
287a3bb…	leo	55	BGR image frame
287a3bb…	leo	56	face_area_threshold : float
287a3bb…	leo	57	Minimum ratio of total face area to frame area to classify as people frame
287a3bb…	leo	58	min_face_size : int
287a3bb…	leo	59	Minimum face width/height in pixels to count as a significant face
287a3bb…	leo	60
287a3bb…	leo	61	Returns
287a3bb…	leo	62	-------
287a3bb…	leo	63	bool
287a3bb…	leo	64	True if frame is primarily people/webcam content
287a3bb…	leo	65	"""
287a3bb…	leo	66	h, w = frame.shape[:2]
287a3bb…	leo	67	frame_area = h * w
287a3bb…	leo	68
287a3bb…	leo	69	# Detect all faces
287a3bb…	leo	70	all_faces = detect_faces(frame)
287a3bb…	leo	71
287a3bb…	leo	72	# Separate significant faces (webcam-sized) from tiny ones (sidebar thumbnails)
287a3bb…	leo	73	significant_faces = [(x, y, fw, fh) for (x, y, fw, fh) in all_faces if fw >= min_face_size]
287a3bb…	leo	74
287a3bb…	leo	75	if significant_faces:
287a3bb…	leo	76	total_face_area = sum(fw * fh for (_, _, fw, fh) in significant_faces)
287a3bb…	leo	77	face_ratio = total_face_area / frame_area
287a3bb…	leo	78
287a3bb…	leo	79	# Multiple significant faces or large face area → people frame
287a3bb…	leo	80	if len(significant_faces) >= 2 or face_ratio >= face_area_threshold:
287a3bb…	leo	81	logger.debug(
287a3bb…	leo	82	f"People frame: {len(significant_faces)} significant faces, "
287a3bb…	leo	83	f"face_ratio={face_ratio:.3f}"
287a3bb…	leo	84	)
287a3bb…	leo	85	return True
287a3bb…	leo	86
287a3bb…	leo	87	# Check for video conference layout: large black border areas
287a3bb…	leo	88	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
287a3bb…	leo	89	black_pixels = np.sum(gray < 15)
287a3bb…	leo	90	black_ratio = black_pixels / frame_area
287a3bb…	leo	91
287a3bb…	leo	92	if black_ratio > 0.25 and all_faces:
287a3bb…	leo	93	# Significant black bars + any face = video conference UI (e.g., profile pic on black)
287a3bb…	leo	94	logger.debug(f"People frame: black_ratio={black_ratio:.2f} with {len(all_faces)} faces")
287a3bb…	leo	95	return True
287a3bb…	leo	96
287a3bb…	leo	97	return False
287a3bb…	leo	98
287a3bb…	leo	99
287a3bb…	leo	100	def filter_people_frames(
287a3bb…	leo	101	frames: List[np.ndarray],
287a3bb…	leo	102	face_area_threshold: float = 0.03,
287a3bb…	leo	103	) -> Tuple[List[np.ndarray], int]:
287a3bb…	leo	104	"""
287a3bb…	leo	105	Filter out frames that primarily show people/webcam views.
287a3bb…	leo	106
287a3bb…	leo	107	Returns (filtered_frames, num_removed).
287a3bb…	leo	108	"""
287a3bb…	leo	109	filtered = []
287a3bb…	leo	110	removed = 0
287a3bb…	leo	111	for frame in tqdm(frames, desc="Filtering people frames", unit="frame"):
287a3bb…	leo	112	if is_people_frame(frame, face_area_threshold):
287a3bb…	leo	113	removed += 1
287a3bb…	leo	114	else:
287a3bb…	leo	115	filtered.append(frame)
287a3bb…	leo	116
287a3bb…	leo	117	if removed:
287a3bb…	leo	118	logger.info(f"Filtered out {removed}/{len(frames)} people/webcam frames")
287a3bb…	leo	119	return filtered, removed
287a3bb…	leo	120
829e24a…	leo	121
287a3bb…	leo	122	def is_gpu_available() -> bool:
287a3bb…	leo	123	"""Check if GPU acceleration is available for OpenCV."""
287a3bb…	leo	124	try:
287a3bb…	leo	125	# Check if CUDA is available
287a3bb…	leo	126	count = cv2.cuda.getCudaEnabledDeviceCount()
287a3bb…	leo	127	return count > 0
287a3bb…	leo	128	except Exception:
287a3bb…	leo	129	return False
287a3bb…	leo	130
829e24a…	leo	131
287a3bb…	leo	132	def gpu_accelerated(func):
287a3bb…	leo	133	"""Decorator to use GPU implementation when available."""
829e24a…	leo	134
287a3bb…	leo	135	@functools.wraps(func)
287a3bb…	leo	136	def wrapper(args, *kwargs):
829e24a…	leo	137	if is_gpu_available() and not kwargs.get("disable_gpu"):
287a3bb…	leo	138	# Remove the disable_gpu kwarg if it exists
829e24a…	leo	139	kwargs.pop("disable_gpu", None)
287a3bb…	leo	140	return func_gpu(args, *kwargs)
287a3bb…	leo	141	# Remove the disable_gpu kwarg if it exists
829e24a…	leo	142	kwargs.pop("disable_gpu", None)
287a3bb…	leo	143	return func(args, *kwargs)
829e24a…	leo	144
287a3bb…	leo	145	return wrapper
829e24a…	leo	146
287a3bb…	leo	147
287a3bb…	leo	148	def calculate_frame_difference(prev_frame: np.ndarray, curr_frame: np.ndarray) -> float:
287a3bb…	leo	149	"""
287a3bb…	leo	150	Calculate the difference between two frames.
829e24a…	leo	151
287a3bb…	leo	152	Parameters
287a3bb…	leo	153	----------
287a3bb…	leo	154	prev_frame : np.ndarray
287a3bb…	leo	155	Previous frame
287a3bb…	leo	156	curr_frame : np.ndarray
287a3bb…	leo	157	Current frame
829e24a…	leo	158
287a3bb…	leo	159	Returns
287a3bb…	leo	160	-------
287a3bb…	leo	161	float
287a3bb…	leo	162	Difference score between 0 and 1
287a3bb…	leo	163	"""
287a3bb…	leo	164	# Convert to grayscale
287a3bb…	leo	165	if len(prev_frame.shape) == 3:
287a3bb…	leo	166	prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
287a3bb…	leo	167	else:
287a3bb…	leo	168	prev_gray = prev_frame
829e24a…	leo	169
287a3bb…	leo	170	if len(curr_frame.shape) == 3:
287a3bb…	leo	171	curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
287a3bb…	leo	172	else:
287a3bb…	leo	173	curr_gray = curr_frame
829e24a…	leo	174
287a3bb…	leo	175	# Calculate absolute difference
287a3bb…	leo	176	diff = cv2.absdiff(prev_gray, curr_gray)
829e24a…	leo	177
287a3bb…	leo	178	# Normalize and return mean difference
287a3bb…	leo	179	return np.mean(diff) / 255.0
829e24a…	leo	180
287a3bb…	leo	181
287a3bb…	leo	182	@gpu_accelerated
287a3bb…	leo	183	def extract_frames(
287a3bb…	leo	184	video_path: Union[str, Path],
287a3bb…	leo	185	sampling_rate: float = 1.0,
287a3bb…	leo	186	change_threshold: float = 0.15,
287a3bb…	leo	187	periodic_capture_seconds: float = 30.0,
287a3bb…	leo	188	max_frames: Optional[int] = None,
829e24a…	leo	189	resize_to: Optional[Tuple[int, int]] = None,
0981a08…	noreply	190	max_memory_mb: int = 1024,
287a3bb…	leo	191	) -> List[np.ndarray]:
287a3bb…	leo	192	"""
287a3bb…	leo	193	Extract frames from video based on visual change detection + periodic capture.
287a3bb…	leo	194
287a3bb…	leo	195	Two capture strategies work together:
287a3bb…	leo	196	1. Change detection: capture when visual difference exceeds threshold
287a3bb…	leo	197	(catches transitions like webcam ↔ screen share)
287a3bb…	leo	198	2. Periodic capture: capture every N seconds regardless of change
287a3bb…	leo	199	(catches slow-evolving content like document scrolling)
287a3bb…	leo	200
287a3bb…	leo	201	The downstream people filter removes any webcam frames captured periodically.
287a3bb…	leo	202
287a3bb…	leo	203	Parameters
287a3bb…	leo	204	----------
287a3bb…	leo	205	video_path : str or Path
287a3bb…	leo	206	Path to video file
287a3bb…	leo	207	sampling_rate : float
287a3bb…	leo	208	Frame sampling rate (1.0 = every frame)
287a3bb…	leo	209	change_threshold : float
287a3bb…	leo	210	Threshold for detecting significant visual changes
287a3bb…	leo	211	periodic_capture_seconds : float
287a3bb…	leo	212	Capture a frame every N seconds regardless of change (0 to disable)
287a3bb…	leo	213	max_frames : int, optional
287a3bb…	leo	214	Maximum number of frames to extract
287a3bb…	leo	215	resize_to : tuple of (width, height), optional
287a3bb…	leo	216	Resize frames to this dimension
0981a08…	noreply	217	max_memory_mb : int
0981a08…	noreply	218	Approximate memory limit in MB for held frames. When approaching this
0981a08…	noreply	219	limit, frames are flushed to disk early and only paths are retained
0981a08…	noreply	220	internally. The returned list still contains numpy arrays (reloaded
0981a08…	noreply	221	from the temp files at the end). Default 1024 MB.
287a3bb…	leo	222
287a3bb…	leo	223	Returns
287a3bb…	leo	224	-------
287a3bb…	leo	225	list
287a3bb…	leo	226	List of extracted frames as numpy arrays
287a3bb…	leo	227	"""
287a3bb…	leo	228	video_path = Path(video_path)
287a3bb…	leo	229	if not video_path.exists():
287a3bb…	leo	230	raise FileNotFoundError(f"Video file not found: {video_path}")
287a3bb…	leo	231
287a3bb…	leo	232	cap = cv2.VideoCapture(str(video_path))
287a3bb…	leo	233	if not cap.isOpened():
287a3bb…	leo	234	raise ValueError(f"Could not open video file: {video_path}")
287a3bb…	leo	235
287a3bb…	leo	236	# Get video properties
287a3bb…	leo	237	fps = cap.get(cv2.CAP_PROP_FPS)
287a3bb…	leo	238	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
287a3bb…	leo	239
287a3bb…	leo	240	# Calculate frame interval based on sampling rate
287a3bb…	leo	241	if sampling_rate <= 0:
287a3bb…	leo	242	raise ValueError("Sampling rate must be positive")
287a3bb…	leo	243
287a3bb…	leo	244	frame_interval = max(1, int(1 / sampling_rate))
287a3bb…	leo	245
287a3bb…	leo	246	# Periodic capture interval in frames (0 = disabled)
287a3bb…	leo	247	periodic_interval = int(periodic_capture_seconds * fps) if periodic_capture_seconds > 0 else 0
287a3bb…	leo	248
287a3bb…	leo	249	logger.info(
287a3bb…	leo	250	f"Video: {video_path.name}, FPS: {fps:.0f}, Frames: {frame_count}, "
287a3bb…	leo	251	f"Sample interval: {frame_interval}, "
287a3bb…	leo	252	f"Periodic capture: every {periodic_capture_seconds:.0f}s"
287a3bb…	leo	253	)
287a3bb…	leo	254
287a3bb…	leo	255	extracted_frames = []
287a3bb…	leo	256	prev_frame = None
287a3bb…	leo	257	frame_idx = 0
287a3bb…	leo	258	last_capture_frame = -periodic_interval # allow first periodic capture immediately
0981a08…	noreply	259
0981a08…	noreply	260	# Memory safety valve
0981a08…	noreply	261	max_memory_bytes = max_memory_mb * 1024 * 1024
0981a08…	noreply	262	approx_memory_used = 0
0981a08…	noreply	263	_flush_dir = None # lazily created temp dir for flushed frames
0981a08…	noreply	264	_flushed_paths: List[Path] = [] # paths of frames flushed to disk
829e24a…	leo	265
287a3bb…	leo	266	pbar = tqdm(
287a3bb…	leo	267	total=frame_count,
287a3bb…	leo	268	desc="Extracting frames",
287a3bb…	leo	269	unit="frame",
287a3bb…	leo	270	bar_format="{l_bar}{bar}\| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
287a3bb…	leo	271	)
287a3bb…	leo	272
287a3bb…	leo	273	while cap.isOpened():
287a3bb…	leo	274	# Process frame only if it's a sampling point
287a3bb…	leo	275	if frame_idx % frame_interval == 0:
287a3bb…	leo	276	success, frame = cap.read()
287a3bb…	leo	277	if not success:
287a3bb…	leo	278	break
287a3bb…	leo	279
287a3bb…	leo	280	# Resize if specified
287a3bb…	leo	281	if resize_to is not None:
287a3bb…	leo	282	frame = cv2.resize(frame, resize_to)
287a3bb…	leo	283
287a3bb…	leo	284	should_capture = False
287a3bb…	leo	285	reason = ""
287a3bb…	leo	286
287a3bb…	leo	287	# First frame always gets extracted
287a3bb…	leo	288	if prev_frame is None:
287a3bb…	leo	289	should_capture = True
287a3bb…	leo	290	reason = "first"
287a3bb…	leo	291	else:
287a3bb…	leo	292	# Change detection
287a3bb…	leo	293	diff = calculate_frame_difference(prev_frame, frame)
287a3bb…	leo	294	if diff > change_threshold:
287a3bb…	leo	295	should_capture = True
287a3bb…	leo	296	reason = f"change={diff:.3f}"
287a3bb…	leo	297
287a3bb…	leo	298	# Periodic capture — even if change is small
829e24a…	leo	299	elif (
829e24a…	leo	300	periodic_interval > 0 and (frame_idx - last_capture_frame) >= periodic_interval
829e24a…	leo	301	):
287a3bb…	leo	302	should_capture = True
287a3bb…	leo	303	reason = "periodic"
287a3bb…	leo	304
287a3bb…	leo	305	if should_capture:
287a3bb…	leo	306	extracted_frames.append(frame)
0981a08…	noreply	307	approx_memory_used += sys.getsizeof(frame) + (
0981a08…	noreply	308	frame.nbytes if hasattr(frame, "nbytes") else 0
0981a08…	noreply	309	)
287a3bb…	leo	310	prev_frame = frame
287a3bb…	leo	311	last_capture_frame = frame_idx
287a3bb…	leo	312	logger.debug(f"Frame {frame_idx} extracted ({reason})")
0981a08…	noreply	313
0981a08…	noreply	314	# Memory safety valve: flush frames to disk when approaching limit
0981a08…	noreply	315	if approx_memory_used >= max_memory_bytes * 0.9:
0981a08…	noreply	316	if _flush_dir is None:
0981a08…	noreply	317	_flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
0981a08…	noreply	318	logger.info(
0981a08…	noreply	319	f"Memory limit ~{max_memory_mb}MB approaching, "
0981a08…	noreply	320	f"flushing frames to {_flush_dir}"
0981a08…	noreply	321	)
0981a08…	noreply	322	for fi, f in enumerate(extracted_frames):
0981a08…	noreply	323	flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
0981a08…	noreply	324	cv2.imwrite(str(flush_path), f)
0981a08…	noreply	325	_flushed_paths.append(flush_path)
0981a08…	noreply	326	extracted_frames.clear()
0981a08…	noreply	327	approx_memory_used = 0
287a3bb…	leo	328
287a3bb…	leo	329	pbar.set_postfix(extracted=len(extracted_frames))
287a3bb…	leo	330
287a3bb…	leo	331	# Check if we've reached the maximum
287a3bb…	leo	332	if max_frames is not None and len(extracted_frames) >= max_frames:
287a3bb…	leo	333	break
287a3bb…	leo	334	else:
287a3bb…	leo	335	# Skip frame but advance counter
287a3bb…	leo	336	cap.grab()
287a3bb…	leo	337
287a3bb…	leo	338	frame_idx += 1
287a3bb…	leo	339	pbar.update(frame_interval)
287a3bb…	leo	340
287a3bb…	leo	341	pbar.close()
287a3bb…	leo	342	cap.release()
0981a08…	noreply	343
0981a08…	noreply	344	# If frames were flushed to disk, reload them
0981a08…	noreply	345	if _flushed_paths:
0981a08…	noreply	346	reloaded = []
0981a08…	noreply	347	for fp in _flushed_paths:
0981a08…	noreply	348	img = cv2.imread(str(fp))
0981a08…	noreply	349	if img is not None:
0981a08…	noreply	350	reloaded.append(img)
0981a08…	noreply	351	reloaded.extend(extracted_frames)
0981a08…	noreply	352	extracted_frames = reloaded
0981a08…	noreply	353	logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
0981a08…	noreply	354	# Clean up temp files
0981a08…	noreply	355	import shutil
0981a08…	noreply	356
0981a08…	noreply	357	if _flush_dir:
0981a08…	noreply	358	shutil.rmtree(_flush_dir, ignore_errors=True)
0981a08…	noreply	359
287a3bb…	leo	360	logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
287a3bb…	leo	361	return extracted_frames
829e24a…	leo	362
287a3bb…	leo	363
287a3bb…	leo	364	def func_gpu(args, *kwargs):
287a3bb…	leo	365	"""GPU-accelerated version of extract_frames."""
287a3bb…	leo	366	# This would be implemented with CUDA acceleration
287a3bb…	leo	367	# For now, fall back to the unwrapped CPU version
287a3bb…	leo	368	logger.info("GPU acceleration not yet implemented, falling back to CPU")
287a3bb…	leo	369	return extract_frames.__wrapped__(args, *kwargs)
287a3bb…	leo	370
829e24a…	leo	371
829e24a…	leo	372	def save_frames(
829e24a…	leo	373	frames: List[np.ndarray], output_dir: Union[str, Path], base_filename: str = "frame"
829e24a…	leo	374	) -> List[Path]:
287a3bb…	leo	375	"""
287a3bb…	leo	376	Save extracted frames to disk.
829e24a…	leo	377
287a3bb…	leo	378	Parameters
287a3bb…	leo	379	----------
287a3bb…	leo	380	frames : list
287a3bb…	leo	381	List of frames to save
287a3bb…	leo	382	output_dir : str or Path
287a3bb…	leo	383	Directory to save frames in
287a3bb…	leo	384	base_filename : str
287a3bb…	leo	385	Base name for frame files
829e24a…	leo	386
287a3bb…	leo	387	Returns
287a3bb…	leo	388	-------
287a3bb…	leo	389	list
287a3bb…	leo	390	List of paths to saved frame files
287a3bb…	leo	391	"""
287a3bb…	leo	392	output_dir = Path(output_dir)
287a3bb…	leo	393	output_dir.mkdir(parents=True, exist_ok=True)
829e24a…	leo	394
287a3bb…	leo	395	saved_paths = []
287a3bb…	leo	396	for i, frame in enumerate(frames):
287a3bb…	leo	397	output_path = output_dir / f"{base_filename}_{i:04d}.jpg"
287a3bb…	leo	398	cv2.imwrite(str(output_path), frame)
287a3bb…	leo	399	saved_paths.append(output_path)
829e24a…	leo	400
287a3bb…	leo	401	return saved_paths

PlanOpticon

Keyboard Shortcuts