PlanOpticon

planopticon / video_processor / extractors / text_extractor.py

Blame History Raw 295 lines

1	`"""Text extraction module for frames and diagrams."""`
2
3	`import logging`
4	`from pathlib import Path`
5	`from typing import Dict, List, Optional, Tuple, Union`
6
7	`import cv2`
8	`import numpy as np`
9
10	`logger = logging.getLogger(__name__)`
11
12
13	`class TextExtractor:`
14	`"""Extract text from images, frames, and diagrams."""`
15
16	`def __init__(self, tesseract_path: Optional[str] = None):`
17	`"""`
18	`Initialize text extractor.`
19
20	`Parameters`
21	`----------`
22	`tesseract_path : str, optional`
23	`Path to tesseract executable for local OCR`
24	`"""`
25	`self.tesseract_path = tesseract_path`
26
27	`# Check if we're using tesseract locally`
28	`self.use_local_ocr = False`
29	`if tesseract_path:`
30	`try:`
31	`import pytesseract`
32
33	`pytesseract.pytesseract.tesseract_cmd = tesseract_path`
34	`self.use_local_ocr = True`
35	`except ImportError:`
36	`logger.warning("pytesseract not installed, local OCR unavailable")`
37
38	`def preprocess_image(self, image: np.ndarray) -> np.ndarray:`
39	`"""`
40	`Preprocess image for better text extraction.`
41
42	`Parameters`
43	`----------`
44	`image : np.ndarray`
45	`Input image`
46
47	`Returns`
48	`-------`
49	`np.ndarray`
50	`Preprocessed image`
51	`"""`
52	`# Convert to grayscale if not already`
53	`if len(image.shape) == 3:`
54	`gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)`
55	`else:`
56	`gray = image`
57
58	`# Apply adaptive thresholding`
59	`thresh = cv2.adaptiveThreshold(`
60	`gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2`
61	`)`
62
63	`# Noise removal`
64	`kernel = np.ones((1, 1), np.uint8)`
65	`opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)`
66
67	`# Invert back`
68	`result = cv2.bitwise_not(opening)`
69
70	`return result`
71
72	`def extract_text_local(self, image: np.ndarray) -> str:`
73	`"""`
74	`Extract text from image using local OCR (Tesseract).`
75
76	`Parameters`
77	`----------`
78	`image : np.ndarray`
79	`Input image`
80
81	`Returns`
82	`-------`
83	`str`
84	`Extracted text`
85	`"""`
86	`if not self.use_local_ocr:`
87	`raise RuntimeError("Local OCR not configured")`
88
89	`import pytesseract`
90
91	`# Preprocess image`
92	`processed = self.preprocess_image(image)`
93
94	`# Extract text`
95	`text = pytesseract.image_to_string(processed)`
96
97	`return text`
98
99	`def detect_text_regions(self, image: np.ndarray) -> List[Tuple[int, int, int, int]]:`
100	`"""`
101	`Detect potential text regions in image.`
102
103	`Parameters`
104	`----------`
105	`image : np.ndarray`
106	`Input image`
107
108	`Returns`
109	`-------`
110	`list`
111	`List of bounding boxes for text regions (x, y, w, h)`
112	`"""`
113	`# Convert to grayscale`
114	`if len(image.shape) == 3:`
115	`gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)`
116	`else:`
117	`gray = image`
118
119	`# Apply MSER (Maximally Stable Extremal Regions)`
120	`mser = cv2.MSER_create()`
121	`regions, _ = mser.detectRegions(gray)`
122
123	`# Convert regions to bounding boxes`
124	`bboxes = []`
125	`for region in regions:`
126	`x, y, w, h = cv2.boundingRect(region.reshape(-1, 1, 2))`
127
128	`# Apply filtering criteria for text-like regions`
129	`aspect_ratio = w / float(h)`
130	`if 0.1 < aspect_ratio < 10 and h > 5 and w > 5:`
131	`bboxes.append((x, y, w, h))`
132
133	`# Merge overlapping boxes`
134	`merged_bboxes = self._merge_overlapping_boxes(bboxes)`
135
136	`logger.debug(f"Detected {len(merged_bboxes)} text regions")`
137	`return merged_bboxes`
138
139	`def _merge_overlapping_boxes(`
140	`self, boxes: List[Tuple[int, int, int, int]]`
141	`) -> List[Tuple[int, int, int, int]]:`
142	`"""`
143	`Merge overlapping bounding boxes.`
144
145	`Parameters`
146	`----------`
147	`boxes : list`
148	`List of bounding boxes (x, y, w, h)`
149
150	`Returns`
151	`-------`
152	`list`
153	`Merged bounding boxes`
154	`"""`
155	`if not boxes:`
156	`return []`
157
158	`# Sort boxes by x coordinate`
159	`sorted_boxes = sorted(boxes, key=lambda b: b[0])`
160
161	`merged = []`
162	`current = list(sorted_boxes[0])`
163
164	`for box in sorted_boxes[1:]:`
165	`# Check if current box overlaps with the next one`
166	`if (`
167	`current[0] <= box[0] + box[2]`
168	`and box[0] <= current[0] + current[2]`
169	`and current[1] <= box[1] + box[3]`
170	`and box[1] <= current[1] + current[3]`
171	`):`
172	`# Calculate merged box`
173	`x1 = min(current[0], box[0])`
174	`y1 = min(current[1], box[1])`
175	`x2 = max(current[0] + current[2], box[0] + box[2])`
176	`y2 = max(current[1] + current[3], box[1] + box[3])`
177
178	`# Update current box`
179	`current = [x1, y1, x2 - x1, y2 - y1]`
180	`else:`
181	`# Add current box to merged list and update current`
182	`merged.append(tuple(current))`
183	`current = list(box)`
184
185	`# Add the last box`
186	`merged.append(tuple(current))`
187
188	`return merged`
189
190	`def extract_text_from_regions(`
191	`self, image: np.ndarray, regions: List[Tuple[int, int, int, int]]`
192	`) -> Dict[Tuple[int, int, int, int], str]:`
193	`"""`
194	`Extract text from specified regions in image.`
195
196	`Parameters`
197	`----------`
198	`image : np.ndarray`
199	`Input image`
200	`regions : list`
201	`List of regions as (x, y, w, h)`
202
203	`Returns`
204	`-------`
205	`dict`
206	`Dictionary of {region: text}`
207	`"""`
208	`results = {}`
209
210	`for region in regions:`
211	`x, y, w, h = region`
212
213	`# Extract region`
214	`roi = image[y : y + h, x : x + w]`
215
216	`# Skip empty regions`
217	`if roi.size == 0:`
218	`continue`
219
220	`# Extract text`
221	`if self.use_local_ocr:`
222	`text = self.extract_text_local(roi)`
223	`else:`
224	`text = "API-based text extraction not yet implemented"`
225
226	`# Store non-empty results`
227	`if text.strip():`
228	`results[region] = text.strip()`
229
230	`return results`
231
232	`def extract_text_from_image(self, image: np.ndarray, detect_regions: bool = True) -> str:`
233	`"""`
234	`Extract text from entire image.`
235
236	`Parameters`
237	`----------`
238	`image : np.ndarray`
239	`Input image`
240	`detect_regions : bool`
241	`Whether to detect and process text regions separately`
242
243	`Returns`
244	`-------`
245	`str`
246	`Extracted text`
247	`"""`
248	`if detect_regions:`
249	`# Detect regions and extract text from each`
250	`regions = self.detect_text_regions(image)`
251	`region_texts = self.extract_text_from_regions(image, regions)`
252
253	`# Combine text from all regions`
254	`text = "\n".join(region_texts.values())`
255	`else:`
256	`# Extract text from entire image`
257	`if self.use_local_ocr:`
258	`text = self.extract_text_local(image)`
259	`else:`
260	`text = "API-based text extraction not yet implemented"`
261
262	`return text`
263
264	`def extract_text_from_file(`
265	`self, image_path: Union[str, Path], detect_regions: bool = True`
266	`) -> str:`
267	`"""`
268	`Extract text from image file.`
269
270	`Parameters`
271	`----------`
272	`image_path : str or Path`
273	`Path to image file`
274	`detect_regions : bool`
275	`Whether to detect and process text regions separately`
276
277	`Returns`
278	`-------`
279	`str`
280	`Extracted text`
281	`"""`
282	`image_path = Path(image_path)`
283	`if not image_path.exists():`
284	`raise FileNotFoundError(f"Image file not found: {image_path}")`
285
286	`# Load image`
287	`image = cv2.imread(str(image_path))`
288	`if image is None:`
289	`raise ValueError(f"Failed to load image: {image_path}")`
290
291	`# Extract text`
292	`text = self.extract_text_from_image(image, detect_regions)`
293
294	`return text`
295

PlanOpticon

Keyboard Shortcuts