PlanOpticon

planopticon / video_processor / extractors / frame_extractor.py
Source Blame History 401 lines
287a3bb… leo 1 """Frame extraction module for video processing."""
829e24a… leo 2
287a3bb… leo 3 import functools
287a3bb… leo 4 import logging
0981a08… noreply 5 import sys
0981a08… noreply 6 import tempfile
287a3bb… leo 7 from pathlib import Path
287a3bb… leo 8 from typing import List, Optional, Tuple, Union
287a3bb… leo 9
287a3bb… leo 10 import cv2
287a3bb… leo 11 import numpy as np
287a3bb… leo 12 from tqdm import tqdm
287a3bb… leo 13
287a3bb… leo 14 logger = logging.getLogger(__name__)
287a3bb… leo 15
287a3bb… leo 16 # Haar cascade for face detection — ships with OpenCV
287a3bb… leo 17 _FACE_CASCADE_PATH = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
287a3bb… leo 18 _FACE_CASCADE = None
287a3bb… leo 19
287a3bb… leo 20
287a3bb… leo 21 def _get_face_cascade() -> cv2.CascadeClassifier:
287a3bb… leo 22 """Lazy-load the face cascade classifier."""
287a3bb… leo 23 global _FACE_CASCADE
287a3bb… leo 24 if _FACE_CASCADE is None:
287a3bb… leo 25 _FACE_CASCADE = cv2.CascadeClassifier(_FACE_CASCADE_PATH)
287a3bb… leo 26 return _FACE_CASCADE
287a3bb… leo 27
287a3bb… leo 28
287a3bb… leo 29 def detect_faces(frame: np.ndarray) -> List[Tuple[int, int, int, int]]:
287a3bb… leo 30 """Detect faces in a frame using Haar cascade. Returns list of (x, y, w, h)."""
287a3bb… leo 31 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
287a3bb… leo 32 cascade = _get_face_cascade()
287a3bb… leo 33 faces = cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40))
287a3bb… leo 34 return list(faces) if len(faces) > 0 else []
287a3bb… leo 35
287a3bb… leo 36
287a3bb… leo 37 def is_people_frame(
287a3bb… leo 38 frame: np.ndarray,
287a3bb… leo 39 face_area_threshold: float = 0.03,
287a3bb… leo 40 min_face_size: int = 90,
287a3bb… leo 41 ) -> bool:
287a3bb… leo 42 """
287a3bb… leo 43 Determine if a frame is primarily showing people (webcam/video conference).
287a3bb… leo 44
287a3bb… leo 45 Heuristics:
287a3bb… leo 46 1. Face detection — if significant faces occupy enough frame area
287a3bb… leo 47 2. Black bar detection — video conferences often have thick black bars
287a3bb… leo 48 3. Small faces with black bars — profile pictures in conference UI
287a3bb… leo 49
287a3bb… leo 50 Faces smaller than min_face_size are ignored (sidebar thumbnails in screen shares).
287a3bb… leo 51
287a3bb… leo 52 Parameters
287a3bb… leo 53 ----------
287a3bb… leo 54 frame : np.ndarray
287a3bb… leo 55 BGR image frame
287a3bb… leo 56 face_area_threshold : float
287a3bb… leo 57 Minimum ratio of total face area to frame area to classify as people frame
287a3bb… leo 58 min_face_size : int
287a3bb… leo 59 Minimum face width/height in pixels to count as a significant face
287a3bb… leo 60
287a3bb… leo 61 Returns
287a3bb… leo 62 -------
287a3bb… leo 63 bool
287a3bb… leo 64 True if frame is primarily people/webcam content
287a3bb… leo 65 """
287a3bb… leo 66 h, w = frame.shape[:2]
287a3bb… leo 67 frame_area = h * w
287a3bb… leo 68
287a3bb… leo 69 # Detect all faces
287a3bb… leo 70 all_faces = detect_faces(frame)
287a3bb… leo 71
287a3bb… leo 72 # Separate significant faces (webcam-sized) from tiny ones (sidebar thumbnails)
287a3bb… leo 73 significant_faces = [(x, y, fw, fh) for (x, y, fw, fh) in all_faces if fw >= min_face_size]
287a3bb… leo 74
287a3bb… leo 75 if significant_faces:
287a3bb… leo 76 total_face_area = sum(fw * fh for (_, _, fw, fh) in significant_faces)
287a3bb… leo 77 face_ratio = total_face_area / frame_area
287a3bb… leo 78
287a3bb… leo 79 # Multiple significant faces or large face area → people frame
287a3bb… leo 80 if len(significant_faces) >= 2 or face_ratio >= face_area_threshold:
287a3bb… leo 81 logger.debug(
287a3bb… leo 82 f"People frame: {len(significant_faces)} significant faces, "
287a3bb… leo 83 f"face_ratio={face_ratio:.3f}"
287a3bb… leo 84 )
287a3bb… leo 85 return True
287a3bb… leo 86
287a3bb… leo 87 # Check for video conference layout: large black border areas
287a3bb… leo 88 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
287a3bb… leo 89 black_pixels = np.sum(gray < 15)
287a3bb… leo 90 black_ratio = black_pixels / frame_area
287a3bb… leo 91
287a3bb… leo 92 if black_ratio > 0.25 and all_faces:
287a3bb… leo 93 # Significant black bars + any face = video conference UI (e.g., profile pic on black)
287a3bb… leo 94 logger.debug(f"People frame: black_ratio={black_ratio:.2f} with {len(all_faces)} faces")
287a3bb… leo 95 return True
287a3bb… leo 96
287a3bb… leo 97 return False
287a3bb… leo 98
287a3bb… leo 99
287a3bb… leo 100 def filter_people_frames(
287a3bb… leo 101 frames: List[np.ndarray],
287a3bb… leo 102 face_area_threshold: float = 0.03,
287a3bb… leo 103 ) -> Tuple[List[np.ndarray], int]:
287a3bb… leo 104 """
287a3bb… leo 105 Filter out frames that primarily show people/webcam views.
287a3bb… leo 106
287a3bb… leo 107 Returns (filtered_frames, num_removed).
287a3bb… leo 108 """
287a3bb… leo 109 filtered = []
287a3bb… leo 110 removed = 0
287a3bb… leo 111 for frame in tqdm(frames, desc="Filtering people frames", unit="frame"):
287a3bb… leo 112 if is_people_frame(frame, face_area_threshold):
287a3bb… leo 113 removed += 1
287a3bb… leo 114 else:
287a3bb… leo 115 filtered.append(frame)
287a3bb… leo 116
287a3bb… leo 117 if removed:
287a3bb… leo 118 logger.info(f"Filtered out {removed}/{len(frames)} people/webcam frames")
287a3bb… leo 119 return filtered, removed
287a3bb… leo 120
829e24a… leo 121
287a3bb… leo 122 def is_gpu_available() -> bool:
287a3bb… leo 123 """Check if GPU acceleration is available for OpenCV."""
287a3bb… leo 124 try:
287a3bb… leo 125 # Check if CUDA is available
287a3bb… leo 126 count = cv2.cuda.getCudaEnabledDeviceCount()
287a3bb… leo 127 return count > 0
287a3bb… leo 128 except Exception:
287a3bb… leo 129 return False
287a3bb… leo 130
829e24a… leo 131
287a3bb… leo 132 def gpu_accelerated(func):
287a3bb… leo 133 """Decorator to use GPU implementation when available."""
829e24a… leo 134
287a3bb… leo 135 @functools.wraps(func)
287a3bb… leo 136 def wrapper(*args, **kwargs):
829e24a… leo 137 if is_gpu_available() and not kwargs.get("disable_gpu"):
287a3bb… leo 138 # Remove the disable_gpu kwarg if it exists
829e24a… leo 139 kwargs.pop("disable_gpu", None)
287a3bb… leo 140 return func_gpu(*args, **kwargs)
287a3bb… leo 141 # Remove the disable_gpu kwarg if it exists
829e24a… leo 142 kwargs.pop("disable_gpu", None)
287a3bb… leo 143 return func(*args, **kwargs)
829e24a… leo 144
287a3bb… leo 145 return wrapper
829e24a… leo 146
287a3bb… leo 147
287a3bb… leo 148 def calculate_frame_difference(prev_frame: np.ndarray, curr_frame: np.ndarray) -> float:
287a3bb… leo 149 """
287a3bb… leo 150 Calculate the difference between two frames.
829e24a… leo 151
287a3bb… leo 152 Parameters
287a3bb… leo 153 ----------
287a3bb… leo 154 prev_frame : np.ndarray
287a3bb… leo 155 Previous frame
287a3bb… leo 156 curr_frame : np.ndarray
287a3bb… leo 157 Current frame
829e24a… leo 158
287a3bb… leo 159 Returns
287a3bb… leo 160 -------
287a3bb… leo 161 float
287a3bb… leo 162 Difference score between 0 and 1
287a3bb… leo 163 """
287a3bb… leo 164 # Convert to grayscale
287a3bb… leo 165 if len(prev_frame.shape) == 3:
287a3bb… leo 166 prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
287a3bb… leo 167 else:
287a3bb… leo 168 prev_gray = prev_frame
829e24a… leo 169
287a3bb… leo 170 if len(curr_frame.shape) == 3:
287a3bb… leo 171 curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
287a3bb… leo 172 else:
287a3bb… leo 173 curr_gray = curr_frame
829e24a… leo 174
287a3bb… leo 175 # Calculate absolute difference
287a3bb… leo 176 diff = cv2.absdiff(prev_gray, curr_gray)
829e24a… leo 177
287a3bb… leo 178 # Normalize and return mean difference
287a3bb… leo 179 return np.mean(diff) / 255.0
829e24a… leo 180
287a3bb… leo 181
287a3bb… leo 182 @gpu_accelerated
287a3bb… leo 183 def extract_frames(
287a3bb… leo 184 video_path: Union[str, Path],
287a3bb… leo 185 sampling_rate: float = 1.0,
287a3bb… leo 186 change_threshold: float = 0.15,
287a3bb… leo 187 periodic_capture_seconds: float = 30.0,
287a3bb… leo 188 max_frames: Optional[int] = None,
829e24a… leo 189 resize_to: Optional[Tuple[int, int]] = None,
0981a08… noreply 190 max_memory_mb: int = 1024,
287a3bb… leo 191 ) -> List[np.ndarray]:
287a3bb… leo 192 """
287a3bb… leo 193 Extract frames from video based on visual change detection + periodic capture.
287a3bb… leo 194
287a3bb… leo 195 Two capture strategies work together:
287a3bb… leo 196 1. Change detection: capture when visual difference exceeds threshold
287a3bb… leo 197 (catches transitions like webcam ↔ screen share)
287a3bb… leo 198 2. Periodic capture: capture every N seconds regardless of change
287a3bb… leo 199 (catches slow-evolving content like document scrolling)
287a3bb… leo 200
287a3bb… leo 201 The downstream people filter removes any webcam frames captured periodically.
287a3bb… leo 202
287a3bb… leo 203 Parameters
287a3bb… leo 204 ----------
287a3bb… leo 205 video_path : str or Path
287a3bb… leo 206 Path to video file
287a3bb… leo 207 sampling_rate : float
287a3bb… leo 208 Frame sampling rate (1.0 = every frame)
287a3bb… leo 209 change_threshold : float
287a3bb… leo 210 Threshold for detecting significant visual changes
287a3bb… leo 211 periodic_capture_seconds : float
287a3bb… leo 212 Capture a frame every N seconds regardless of change (0 to disable)
287a3bb… leo 213 max_frames : int, optional
287a3bb… leo 214 Maximum number of frames to extract
287a3bb… leo 215 resize_to : tuple of (width, height), optional
287a3bb… leo 216 Resize frames to this dimension
0981a08… noreply 217 max_memory_mb : int
0981a08… noreply 218 Approximate memory limit in MB for held frames. When approaching this
0981a08… noreply 219 limit, frames are flushed to disk early and only paths are retained
0981a08… noreply 220 internally. The returned list still contains numpy arrays (reloaded
0981a08… noreply 221 from the temp files at the end). Default 1024 MB.
287a3bb… leo 222
287a3bb… leo 223 Returns
287a3bb… leo 224 -------
287a3bb… leo 225 list
287a3bb… leo 226 List of extracted frames as numpy arrays
287a3bb… leo 227 """
287a3bb… leo 228 video_path = Path(video_path)
287a3bb… leo 229 if not video_path.exists():
287a3bb… leo 230 raise FileNotFoundError(f"Video file not found: {video_path}")
287a3bb… leo 231
287a3bb… leo 232 cap = cv2.VideoCapture(str(video_path))
287a3bb… leo 233 if not cap.isOpened():
287a3bb… leo 234 raise ValueError(f"Could not open video file: {video_path}")
287a3bb… leo 235
287a3bb… leo 236 # Get video properties
287a3bb… leo 237 fps = cap.get(cv2.CAP_PROP_FPS)
287a3bb… leo 238 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
287a3bb… leo 239
287a3bb… leo 240 # Calculate frame interval based on sampling rate
287a3bb… leo 241 if sampling_rate <= 0:
287a3bb… leo 242 raise ValueError("Sampling rate must be positive")
287a3bb… leo 243
287a3bb… leo 244 frame_interval = max(1, int(1 / sampling_rate))
287a3bb… leo 245
287a3bb… leo 246 # Periodic capture interval in frames (0 = disabled)
287a3bb… leo 247 periodic_interval = int(periodic_capture_seconds * fps) if periodic_capture_seconds > 0 else 0
287a3bb… leo 248
287a3bb… leo 249 logger.info(
287a3bb… leo 250 f"Video: {video_path.name}, FPS: {fps:.0f}, Frames: {frame_count}, "
287a3bb… leo 251 f"Sample interval: {frame_interval}, "
287a3bb… leo 252 f"Periodic capture: every {periodic_capture_seconds:.0f}s"
287a3bb… leo 253 )
287a3bb… leo 254
287a3bb… leo 255 extracted_frames = []
287a3bb… leo 256 prev_frame = None
287a3bb… leo 257 frame_idx = 0
287a3bb… leo 258 last_capture_frame = -periodic_interval # allow first periodic capture immediately
0981a08… noreply 259
0981a08… noreply 260 # Memory safety valve
0981a08… noreply 261 max_memory_bytes = max_memory_mb * 1024 * 1024
0981a08… noreply 262 approx_memory_used = 0
0981a08… noreply 263 _flush_dir = None # lazily created temp dir for flushed frames
0981a08… noreply 264 _flushed_paths: List[Path] = [] # paths of frames flushed to disk
829e24a… leo 265
287a3bb… leo 266 pbar = tqdm(
287a3bb… leo 267 total=frame_count,
287a3bb… leo 268 desc="Extracting frames",
287a3bb… leo 269 unit="frame",
287a3bb… leo 270 bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
287a3bb… leo 271 )
287a3bb… leo 272
287a3bb… leo 273 while cap.isOpened():
287a3bb… leo 274 # Process frame only if it's a sampling point
287a3bb… leo 275 if frame_idx % frame_interval == 0:
287a3bb… leo 276 success, frame = cap.read()
287a3bb… leo 277 if not success:
287a3bb… leo 278 break
287a3bb… leo 279
287a3bb… leo 280 # Resize if specified
287a3bb… leo 281 if resize_to is not None:
287a3bb… leo 282 frame = cv2.resize(frame, resize_to)
287a3bb… leo 283
287a3bb… leo 284 should_capture = False
287a3bb… leo 285 reason = ""
287a3bb… leo 286
287a3bb… leo 287 # First frame always gets extracted
287a3bb… leo 288 if prev_frame is None:
287a3bb… leo 289 should_capture = True
287a3bb… leo 290 reason = "first"
287a3bb… leo 291 else:
287a3bb… leo 292 # Change detection
287a3bb… leo 293 diff = calculate_frame_difference(prev_frame, frame)
287a3bb… leo 294 if diff > change_threshold:
287a3bb… leo 295 should_capture = True
287a3bb… leo 296 reason = f"change={diff:.3f}"
287a3bb… leo 297
287a3bb… leo 298 # Periodic capture — even if change is small
829e24a… leo 299 elif (
829e24a… leo 300 periodic_interval > 0 and (frame_idx - last_capture_frame) >= periodic_interval
829e24a… leo 301 ):
287a3bb… leo 302 should_capture = True
287a3bb… leo 303 reason = "periodic"
287a3bb… leo 304
287a3bb… leo 305 if should_capture:
287a3bb… leo 306 extracted_frames.append(frame)
0981a08… noreply 307 approx_memory_used += sys.getsizeof(frame) + (
0981a08… noreply 308 frame.nbytes if hasattr(frame, "nbytes") else 0
0981a08… noreply 309 )
287a3bb… leo 310 prev_frame = frame
287a3bb… leo 311 last_capture_frame = frame_idx
287a3bb… leo 312 logger.debug(f"Frame {frame_idx} extracted ({reason})")
0981a08… noreply 313
0981a08… noreply 314 # Memory safety valve: flush frames to disk when approaching limit
0981a08… noreply 315 if approx_memory_used >= max_memory_bytes * 0.9:
0981a08… noreply 316 if _flush_dir is None:
0981a08… noreply 317 _flush_dir = tempfile.mkdtemp(prefix="planopticon_frames_")
0981a08… noreply 318 logger.info(
0981a08… noreply 319 f"Memory limit ~{max_memory_mb}MB approaching, "
0981a08… noreply 320 f"flushing frames to {_flush_dir}"
0981a08… noreply 321 )
0981a08… noreply 322 for fi, f in enumerate(extracted_frames):
0981a08… noreply 323 flush_path = Path(_flush_dir) / f"flush_{len(_flushed_paths) + fi:06d}.jpg"
0981a08… noreply 324 cv2.imwrite(str(flush_path), f)
0981a08… noreply 325 _flushed_paths.append(flush_path)
0981a08… noreply 326 extracted_frames.clear()
0981a08… noreply 327 approx_memory_used = 0
287a3bb… leo 328
287a3bb… leo 329 pbar.set_postfix(extracted=len(extracted_frames))
287a3bb… leo 330
287a3bb… leo 331 # Check if we've reached the maximum
287a3bb… leo 332 if max_frames is not None and len(extracted_frames) >= max_frames:
287a3bb… leo 333 break
287a3bb… leo 334 else:
287a3bb… leo 335 # Skip frame but advance counter
287a3bb… leo 336 cap.grab()
287a3bb… leo 337
287a3bb… leo 338 frame_idx += 1
287a3bb… leo 339 pbar.update(frame_interval)
287a3bb… leo 340
287a3bb… leo 341 pbar.close()
287a3bb… leo 342 cap.release()
0981a08… noreply 343
0981a08… noreply 344 # If frames were flushed to disk, reload them
0981a08… noreply 345 if _flushed_paths:
0981a08… noreply 346 reloaded = []
0981a08… noreply 347 for fp in _flushed_paths:
0981a08… noreply 348 img = cv2.imread(str(fp))
0981a08… noreply 349 if img is not None:
0981a08… noreply 350 reloaded.append(img)
0981a08… noreply 351 reloaded.extend(extracted_frames)
0981a08… noreply 352 extracted_frames = reloaded
0981a08… noreply 353 logger.info(f"Reloaded {len(_flushed_paths)} flushed frames from disk")
0981a08… noreply 354 # Clean up temp files
0981a08… noreply 355 import shutil
0981a08… noreply 356
0981a08… noreply 357 if _flush_dir:
0981a08… noreply 358 shutil.rmtree(_flush_dir, ignore_errors=True)
0981a08… noreply 359
287a3bb… leo 360 logger.info(f"Extracted {len(extracted_frames)} frames from {frame_count} total frames")
287a3bb… leo 361 return extracted_frames
829e24a… leo 362
287a3bb… leo 363
287a3bb… leo 364 def func_gpu(*args, **kwargs):
287a3bb… leo 365 """GPU-accelerated version of extract_frames."""
287a3bb… leo 366 # This would be implemented with CUDA acceleration
287a3bb… leo 367 # For now, fall back to the unwrapped CPU version
287a3bb… leo 368 logger.info("GPU acceleration not yet implemented, falling back to CPU")
287a3bb… leo 369 return extract_frames.__wrapped__(*args, **kwargs)
287a3bb… leo 370
829e24a… leo 371
829e24a… leo 372 def save_frames(
829e24a… leo 373 frames: List[np.ndarray], output_dir: Union[str, Path], base_filename: str = "frame"
829e24a… leo 374 ) -> List[Path]:
287a3bb… leo 375 """
287a3bb… leo 376 Save extracted frames to disk.
829e24a… leo 377
287a3bb… leo 378 Parameters
287a3bb… leo 379 ----------
287a3bb… leo 380 frames : list
287a3bb… leo 381 List of frames to save
287a3bb… leo 382 output_dir : str or Path
287a3bb… leo 383 Directory to save frames in
287a3bb… leo 384 base_filename : str
287a3bb… leo 385 Base name for frame files
829e24a… leo 386
287a3bb… leo 387 Returns
287a3bb… leo 388 -------
287a3bb… leo 389 list
287a3bb… leo 390 List of paths to saved frame files
287a3bb… leo 391 """
287a3bb… leo 392 output_dir = Path(output_dir)
287a3bb… leo 393 output_dir.mkdir(parents=True, exist_ok=True)
829e24a… leo 394
287a3bb… leo 395 saved_paths = []
287a3bb… leo 396 for i, frame in enumerate(frames):
287a3bb… leo 397 output_path = output_dir / f"{base_filename}_{i:04d}.jpg"
287a3bb… leo 398 cv2.imwrite(str(output_path), frame)
287a3bb… leo 399 saved_paths.append(output_path)
829e24a… leo 400
287a3bb… leo 401 return saved_paths

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button