PlanOpticon
Merge pull request #109 from ConflictHQ/feat/screenshot-knowledge-extraction feat: screenshot knowledge extraction
Commit
2a1b11a993e0b1409089c89718da3b09fc44b35c1fa1b22355ddb5103cb69f72
Parent
3551b8081b07992…
7 files changed
+49
-3
+74
+22
+61
-3
+32
+14
-1
+3
+49
-3
| --- tests/test_diagram_analyzer.py | ||
| +++ tests/test_diagram_analyzer.py | ||
| @@ -146,23 +146,34 @@ | ||
| 146 | 146 | "relationships": ["Start -> End"], |
| 147 | 147 | "mermaid": "graph LR\n Start-->End", |
| 148 | 148 | "chart_data": None, |
| 149 | 149 | } |
| 150 | 150 | ) |
| 151 | + | |
| 152 | + # Screenshot extraction response for medium-confidence frame | |
| 153 | + screenshot_response = json.dumps( | |
| 154 | + { | |
| 155 | + "content_type": "slide", | |
| 156 | + "caption": "A slide about something", | |
| 157 | + "text_content": "Key Points\n- Item 1\n- Item 2", | |
| 158 | + "entities": ["Item 1", "Item 2"], | |
| 159 | + "topics": ["presentation"], | |
| 160 | + } | |
| 161 | + ) | |
| 151 | 162 | |
| 152 | 163 | # Calls are interleaved per-frame: |
| 153 | 164 | # call 0: classify frame 0 (high conf) |
| 154 | 165 | # call 1: analyze frame 0 (full analysis) |
| 155 | 166 | # call 2: classify frame 1 (low conf - skip) |
| 156 | 167 | # call 3: classify frame 2 (medium conf) |
| 157 | - # call 4: caption frame 2 (screengrab) | |
| 168 | + # call 4: screenshot extraction frame 2 | |
| 158 | 169 | call_sequence = [ |
| 159 | 170 | classify_responses[0], # classify frame 0 |
| 160 | 171 | analysis_response, # analyze frame 0 |
| 161 | 172 | classify_responses[1], # classify frame 1 |
| 162 | 173 | classify_responses[2], # classify frame 2 |
| 163 | - "A slide about something", # caption frame 2 | |
| 174 | + screenshot_response, # screenshot extraction frame 2 | |
| 164 | 175 | ] |
| 165 | 176 | call_count = [0] |
| 166 | 177 | |
| 167 | 178 | def side_effect(image_bytes, prompt, max_tokens=4096): |
| 168 | 179 | idx = call_count[0] |
| @@ -178,10 +189,14 @@ | ||
| 178 | 189 | assert diagrams[0].diagram_type == DiagramType.flowchart |
| 179 | 190 | assert diagrams[0].mermaid == "graph LR\n Start-->End" |
| 180 | 191 | |
| 181 | 192 | assert len(captures) == 1 |
| 182 | 193 | assert captures[0].frame_index == 2 |
| 194 | + assert captures[0].content_type == "slide" | |
| 195 | + assert captures[0].text_content == "Key Points\n- Item 1\n- Item 2" | |
| 196 | + assert "Item 1" in captures[0].entities | |
| 197 | + assert "presentation" in captures[0].topics | |
| 183 | 198 | |
| 184 | 199 | # Check files were saved |
| 185 | 200 | assert (diagrams_dir / "diagram_0.jpg").exists() |
| 186 | 201 | assert (diagrams_dir / "diagram_0.mermaid").exists() |
| 187 | 202 | assert (diagrams_dir / "diagram_0.json").exists() |
| @@ -208,13 +223,44 @@ | ||
| 208 | 223 | "brief_description": "chart", |
| 209 | 224 | } |
| 210 | 225 | ) |
| 211 | 226 | if idx == 1: |
| 212 | 227 | return "This is not valid JSON" # Analysis fails |
| 213 | - return "A chart showing data" # Caption | |
| 228 | + # Screenshot extraction for the fallback screengrab | |
| 229 | + return json.dumps( | |
| 230 | + { | |
| 231 | + "content_type": "chart", | |
| 232 | + "caption": "A chart showing data", | |
| 233 | + "text_content": "Sales Q1 Q2 Q3", | |
| 234 | + "entities": ["Sales"], | |
| 235 | + "topics": ["metrics"], | |
| 236 | + } | |
| 237 | + ) | |
| 214 | 238 | |
| 215 | 239 | mock_pm.analyze_image.side_effect = side_effect |
| 216 | 240 | |
| 217 | 241 | diagrams, captures = analyzer.process_frames([fp], captures_dir=captures_dir) |
| 218 | 242 | assert len(diagrams) == 0 |
| 219 | 243 | assert len(captures) == 1 |
| 220 | 244 | assert captures[0].frame_index == 0 |
| 245 | + assert captures[0].content_type == "chart" | |
| 246 | + assert captures[0].text_content == "Sales Q1 Q2 Q3" | |
| 247 | + | |
| 248 | + def test_extract_screenshot_knowledge(self, analyzer, mock_pm, fake_frame): | |
| 249 | + mock_pm.analyze_image.return_value = json.dumps( | |
| 250 | + { | |
| 251 | + "content_type": "code", | |
| 252 | + "caption": "Python source code", | |
| 253 | + "text_content": "def main():\n print('hello')", | |
| 254 | + "entities": ["Python", "main function"], | |
| 255 | + "topics": ["programming", "source code"], | |
| 256 | + } | |
| 257 | + ) | |
| 258 | + result = analyzer.extract_screenshot_knowledge(fake_frame) | |
| 259 | + assert result["content_type"] == "code" | |
| 260 | + assert "Python" in result["entities"] | |
| 261 | + assert "def main" in result["text_content"] | |
| 262 | + | |
| 263 | + def test_extract_screenshot_knowledge_failure(self, analyzer, mock_pm, fake_frame): | |
| 264 | + mock_pm.analyze_image.return_value = "not json" | |
| 265 | + result = analyzer.extract_screenshot_knowledge(fake_frame) | |
| 266 | + assert result == {} | |
| 221 | 267 |
| --- tests/test_diagram_analyzer.py | |
| +++ tests/test_diagram_analyzer.py | |
| @@ -146,23 +146,34 @@ | |
| 146 | "relationships": ["Start -> End"], |
| 147 | "mermaid": "graph LR\n Start-->End", |
| 148 | "chart_data": None, |
| 149 | } |
| 150 | ) |
| 151 | |
| 152 | # Calls are interleaved per-frame: |
| 153 | # call 0: classify frame 0 (high conf) |
| 154 | # call 1: analyze frame 0 (full analysis) |
| 155 | # call 2: classify frame 1 (low conf - skip) |
| 156 | # call 3: classify frame 2 (medium conf) |
| 157 | # call 4: caption frame 2 (screengrab) |
| 158 | call_sequence = [ |
| 159 | classify_responses[0], # classify frame 0 |
| 160 | analysis_response, # analyze frame 0 |
| 161 | classify_responses[1], # classify frame 1 |
| 162 | classify_responses[2], # classify frame 2 |
| 163 | "A slide about something", # caption frame 2 |
| 164 | ] |
| 165 | call_count = [0] |
| 166 | |
| 167 | def side_effect(image_bytes, prompt, max_tokens=4096): |
| 168 | idx = call_count[0] |
| @@ -178,10 +189,14 @@ | |
| 178 | assert diagrams[0].diagram_type == DiagramType.flowchart |
| 179 | assert diagrams[0].mermaid == "graph LR\n Start-->End" |
| 180 | |
| 181 | assert len(captures) == 1 |
| 182 | assert captures[0].frame_index == 2 |
| 183 | |
| 184 | # Check files were saved |
| 185 | assert (diagrams_dir / "diagram_0.jpg").exists() |
| 186 | assert (diagrams_dir / "diagram_0.mermaid").exists() |
| 187 | assert (diagrams_dir / "diagram_0.json").exists() |
| @@ -208,13 +223,44 @@ | |
| 208 | "brief_description": "chart", |
| 209 | } |
| 210 | ) |
| 211 | if idx == 1: |
| 212 | return "This is not valid JSON" # Analysis fails |
| 213 | return "A chart showing data" # Caption |
| 214 | |
| 215 | mock_pm.analyze_image.side_effect = side_effect |
| 216 | |
| 217 | diagrams, captures = analyzer.process_frames([fp], captures_dir=captures_dir) |
| 218 | assert len(diagrams) == 0 |
| 219 | assert len(captures) == 1 |
| 220 | assert captures[0].frame_index == 0 |
| 221 |
| --- tests/test_diagram_analyzer.py | |
| +++ tests/test_diagram_analyzer.py | |
| @@ -146,23 +146,34 @@ | |
| 146 | "relationships": ["Start -> End"], |
| 147 | "mermaid": "graph LR\n Start-->End", |
| 148 | "chart_data": None, |
| 149 | } |
| 150 | ) |
| 151 | |
| 152 | # Screenshot extraction response for medium-confidence frame |
| 153 | screenshot_response = json.dumps( |
| 154 | { |
| 155 | "content_type": "slide", |
| 156 | "caption": "A slide about something", |
| 157 | "text_content": "Key Points\n- Item 1\n- Item 2", |
| 158 | "entities": ["Item 1", "Item 2"], |
| 159 | "topics": ["presentation"], |
| 160 | } |
| 161 | ) |
| 162 | |
| 163 | # Calls are interleaved per-frame: |
| 164 | # call 0: classify frame 0 (high conf) |
| 165 | # call 1: analyze frame 0 (full analysis) |
| 166 | # call 2: classify frame 1 (low conf - skip) |
| 167 | # call 3: classify frame 2 (medium conf) |
| 168 | # call 4: screenshot extraction frame 2 |
| 169 | call_sequence = [ |
| 170 | classify_responses[0], # classify frame 0 |
| 171 | analysis_response, # analyze frame 0 |
| 172 | classify_responses[1], # classify frame 1 |
| 173 | classify_responses[2], # classify frame 2 |
| 174 | screenshot_response, # screenshot extraction frame 2 |
| 175 | ] |
| 176 | call_count = [0] |
| 177 | |
| 178 | def side_effect(image_bytes, prompt, max_tokens=4096): |
| 179 | idx = call_count[0] |
| @@ -178,10 +189,14 @@ | |
| 189 | assert diagrams[0].diagram_type == DiagramType.flowchart |
| 190 | assert diagrams[0].mermaid == "graph LR\n Start-->End" |
| 191 | |
| 192 | assert len(captures) == 1 |
| 193 | assert captures[0].frame_index == 2 |
| 194 | assert captures[0].content_type == "slide" |
| 195 | assert captures[0].text_content == "Key Points\n- Item 1\n- Item 2" |
| 196 | assert "Item 1" in captures[0].entities |
| 197 | assert "presentation" in captures[0].topics |
| 198 | |
| 199 | # Check files were saved |
| 200 | assert (diagrams_dir / "diagram_0.jpg").exists() |
| 201 | assert (diagrams_dir / "diagram_0.mermaid").exists() |
| 202 | assert (diagrams_dir / "diagram_0.json").exists() |
| @@ -208,13 +223,44 @@ | |
| 223 | "brief_description": "chart", |
| 224 | } |
| 225 | ) |
| 226 | if idx == 1: |
| 227 | return "This is not valid JSON" # Analysis fails |
| 228 | # Screenshot extraction for the fallback screengrab |
| 229 | return json.dumps( |
| 230 | { |
| 231 | "content_type": "chart", |
| 232 | "caption": "A chart showing data", |
| 233 | "text_content": "Sales Q1 Q2 Q3", |
| 234 | "entities": ["Sales"], |
| 235 | "topics": ["metrics"], |
| 236 | } |
| 237 | ) |
| 238 | |
| 239 | mock_pm.analyze_image.side_effect = side_effect |
| 240 | |
| 241 | diagrams, captures = analyzer.process_frames([fp], captures_dir=captures_dir) |
| 242 | assert len(diagrams) == 0 |
| 243 | assert len(captures) == 1 |
| 244 | assert captures[0].frame_index == 0 |
| 245 | assert captures[0].content_type == "chart" |
| 246 | assert captures[0].text_content == "Sales Q1 Q2 Q3" |
| 247 | |
| 248 | def test_extract_screenshot_knowledge(self, analyzer, mock_pm, fake_frame): |
| 249 | mock_pm.analyze_image.return_value = json.dumps( |
| 250 | { |
| 251 | "content_type": "code", |
| 252 | "caption": "Python source code", |
| 253 | "text_content": "def main():\n print('hello')", |
| 254 | "entities": ["Python", "main function"], |
| 255 | "topics": ["programming", "source code"], |
| 256 | } |
| 257 | ) |
| 258 | result = analyzer.extract_screenshot_knowledge(fake_frame) |
| 259 | assert result["content_type"] == "code" |
| 260 | assert "Python" in result["entities"] |
| 261 | assert "def main" in result["text_content"] |
| 262 | |
| 263 | def test_extract_screenshot_knowledge_failure(self, analyzer, mock_pm, fake_frame): |
| 264 | mock_pm.analyze_image.return_value = "not json" |
| 265 | result = analyzer.extract_screenshot_knowledge(fake_frame) |
| 266 | assert result == {} |
| 267 |
| --- tests/test_knowledge_graph.py | ||
| +++ tests/test_knowledge_graph.py | ||
| @@ -133,10 +133,84 @@ | ||
| 133 | 133 | ] |
| 134 | 134 | kg_with_provider.process_diagrams(diagrams) |
| 135 | 135 | assert kg_with_provider._store.has_entity("diagram_0") |
| 136 | 136 | assert kg_with_provider._store.has_entity("diagram_1") |
| 137 | 137 | |
| 138 | + | |
| 139 | +class TestProcessScreenshots: | |
| 140 | + @pytest.fixture | |
| 141 | + def mock_pm(self): | |
| 142 | + pm = MagicMock() | |
| 143 | + pm.chat.return_value = json.dumps( | |
| 144 | + [ | |
| 145 | + {"name": "Python", "type": "technology", "description": "Language"}, | |
| 146 | + {"name": "Flask", "type": "technology", "description": "Framework"}, | |
| 147 | + ] | |
| 148 | + ) | |
| 149 | + return pm | |
| 150 | + | |
| 151 | + @pytest.fixture | |
| 152 | + def kg_with_provider(self, mock_pm): | |
| 153 | + return KnowledgeGraph(provider_manager=mock_pm) | |
| 154 | + | |
| 155 | + def test_process_screenshots_with_text(self, kg_with_provider, mock_pm): | |
| 156 | + screenshots = [ | |
| 157 | + { | |
| 158 | + "text_content": "import flask\napp = Flask(__name__)", | |
| 159 | + "content_type": "code", | |
| 160 | + "entities": ["Flask", "Python"], | |
| 161 | + "frame_index": 3, | |
| 162 | + }, | |
| 163 | + ] | |
| 164 | + kg_with_provider.process_screenshots(screenshots) | |
| 165 | + # LLM extraction from text_content | |
| 166 | + mock_pm.chat.assert_called() | |
| 167 | + # Explicitly listed entities should be added | |
| 168 | + assert kg_with_provider._store.has_entity("Flask") | |
| 169 | + assert kg_with_provider._store.has_entity("Python") | |
| 170 | + | |
| 171 | + def test_process_screenshots_without_text(self, kg_with_provider, mock_pm): | |
| 172 | + screenshots = [ | |
| 173 | + { | |
| 174 | + "text_content": "", | |
| 175 | + "content_type": "other", | |
| 176 | + "entities": ["Docker"], | |
| 177 | + "frame_index": 5, | |
| 178 | + }, | |
| 179 | + ] | |
| 180 | + kg_with_provider.process_screenshots(screenshots) | |
| 181 | + # No chat call for empty text | |
| 182 | + mock_pm.chat.assert_not_called() | |
| 183 | + # But explicit entities still added | |
| 184 | + assert kg_with_provider._store.has_entity("Docker") | |
| 185 | + | |
| 186 | + def test_process_screenshots_empty_entities(self, kg_with_provider): | |
| 187 | + screenshots = [ | |
| 188 | + { | |
| 189 | + "text_content": "", | |
| 190 | + "content_type": "slide", | |
| 191 | + "entities": [], | |
| 192 | + "frame_index": 0, | |
| 193 | + }, | |
| 194 | + ] | |
| 195 | + kg_with_provider.process_screenshots(screenshots) | |
| 196 | + # No crash, no entities added | |
| 197 | + | |
| 198 | + def test_process_screenshots_filters_short_names(self, kg_with_provider): | |
| 199 | + screenshots = [ | |
| 200 | + { | |
| 201 | + "text_content": "", | |
| 202 | + "entities": ["A", "Go", "Python"], | |
| 203 | + "frame_index": 0, | |
| 204 | + }, | |
| 205 | + ] | |
| 206 | + kg_with_provider.process_screenshots(screenshots) | |
| 207 | + # "A" is too short (< 2 chars), filtered out | |
| 208 | + assert not kg_with_provider._store.has_entity("A") | |
| 209 | + assert kg_with_provider._store.has_entity("Go") | |
| 210 | + assert kg_with_provider._store.has_entity("Python") | |
| 211 | + | |
| 138 | 212 | |
| 139 | 213 | class TestToDictFromDict: |
| 140 | 214 | def test_round_trip_empty(self): |
| 141 | 215 | kg = KnowledgeGraph() |
| 142 | 216 | data = kg.to_dict() |
| 143 | 217 |
| --- tests/test_knowledge_graph.py | |
| +++ tests/test_knowledge_graph.py | |
| @@ -133,10 +133,84 @@ | |
| 133 | ] |
| 134 | kg_with_provider.process_diagrams(diagrams) |
| 135 | assert kg_with_provider._store.has_entity("diagram_0") |
| 136 | assert kg_with_provider._store.has_entity("diagram_1") |
| 137 | |
| 138 | |
| 139 | class TestToDictFromDict: |
| 140 | def test_round_trip_empty(self): |
| 141 | kg = KnowledgeGraph() |
| 142 | data = kg.to_dict() |
| 143 |
| --- tests/test_knowledge_graph.py | |
| +++ tests/test_knowledge_graph.py | |
| @@ -133,10 +133,84 @@ | |
| 133 | ] |
| 134 | kg_with_provider.process_diagrams(diagrams) |
| 135 | assert kg_with_provider._store.has_entity("diagram_0") |
| 136 | assert kg_with_provider._store.has_entity("diagram_1") |
| 137 | |
| 138 | |
| 139 | class TestProcessScreenshots: |
| 140 | @pytest.fixture |
| 141 | def mock_pm(self): |
| 142 | pm = MagicMock() |
| 143 | pm.chat.return_value = json.dumps( |
| 144 | [ |
| 145 | {"name": "Python", "type": "technology", "description": "Language"}, |
| 146 | {"name": "Flask", "type": "technology", "description": "Framework"}, |
| 147 | ] |
| 148 | ) |
| 149 | return pm |
| 150 | |
| 151 | @pytest.fixture |
| 152 | def kg_with_provider(self, mock_pm): |
| 153 | return KnowledgeGraph(provider_manager=mock_pm) |
| 154 | |
| 155 | def test_process_screenshots_with_text(self, kg_with_provider, mock_pm): |
| 156 | screenshots = [ |
| 157 | { |
| 158 | "text_content": "import flask\napp = Flask(__name__)", |
| 159 | "content_type": "code", |
| 160 | "entities": ["Flask", "Python"], |
| 161 | "frame_index": 3, |
| 162 | }, |
| 163 | ] |
| 164 | kg_with_provider.process_screenshots(screenshots) |
| 165 | # LLM extraction from text_content |
| 166 | mock_pm.chat.assert_called() |
| 167 | # Explicitly listed entities should be added |
| 168 | assert kg_with_provider._store.has_entity("Flask") |
| 169 | assert kg_with_provider._store.has_entity("Python") |
| 170 | |
| 171 | def test_process_screenshots_without_text(self, kg_with_provider, mock_pm): |
| 172 | screenshots = [ |
| 173 | { |
| 174 | "text_content": "", |
| 175 | "content_type": "other", |
| 176 | "entities": ["Docker"], |
| 177 | "frame_index": 5, |
| 178 | }, |
| 179 | ] |
| 180 | kg_with_provider.process_screenshots(screenshots) |
| 181 | # No chat call for empty text |
| 182 | mock_pm.chat.assert_not_called() |
| 183 | # But explicit entities still added |
| 184 | assert kg_with_provider._store.has_entity("Docker") |
| 185 | |
| 186 | def test_process_screenshots_empty_entities(self, kg_with_provider): |
| 187 | screenshots = [ |
| 188 | { |
| 189 | "text_content": "", |
| 190 | "content_type": "slide", |
| 191 | "entities": [], |
| 192 | "frame_index": 0, |
| 193 | }, |
| 194 | ] |
| 195 | kg_with_provider.process_screenshots(screenshots) |
| 196 | # No crash, no entities added |
| 197 | |
| 198 | def test_process_screenshots_filters_short_names(self, kg_with_provider): |
| 199 | screenshots = [ |
| 200 | { |
| 201 | "text_content": "", |
| 202 | "entities": ["A", "Go", "Python"], |
| 203 | "frame_index": 0, |
| 204 | }, |
| 205 | ] |
| 206 | kg_with_provider.process_screenshots(screenshots) |
| 207 | # "A" is too short (< 2 chars), filtered out |
| 208 | assert not kg_with_provider._store.has_entity("A") |
| 209 | assert kg_with_provider._store.has_entity("Go") |
| 210 | assert kg_with_provider._store.has_entity("Python") |
| 211 | |
| 212 | |
| 213 | class TestToDictFromDict: |
| 214 | def test_round_trip_empty(self): |
| 215 | kg = KnowledgeGraph() |
| 216 | data = kg.to_dict() |
| 217 |
+22
| --- tests/test_models.py | ||
| +++ tests/test_models.py | ||
| @@ -115,18 +115,40 @@ | ||
| 115 | 115 | |
| 116 | 116 | class TestScreenCapture: |
| 117 | 117 | def test_basic(self): |
| 118 | 118 | sc = ScreenCapture(frame_index=10, caption="Architecture overview slide", confidence=0.5) |
| 119 | 119 | assert sc.image_path is None |
| 120 | + assert sc.content_type is None | |
| 121 | + assert sc.text_content is None | |
| 122 | + assert sc.entities == [] | |
| 123 | + assert sc.topics == [] | |
| 124 | + | |
| 125 | + def test_with_extraction(self): | |
| 126 | + sc = ScreenCapture( | |
| 127 | + frame_index=5, | |
| 128 | + caption="Code editor showing Python", | |
| 129 | + confidence=0.5, | |
| 130 | + content_type="code", | |
| 131 | + text_content="def main():\n print('hello')", | |
| 132 | + entities=["Python", "main function"], | |
| 133 | + topics=["programming"], | |
| 134 | + ) | |
| 135 | + assert sc.content_type == "code" | |
| 136 | + assert "Python" in sc.entities | |
| 137 | + assert sc.text_content is not None | |
| 120 | 138 | |
| 121 | 139 | def test_round_trip(self): |
| 122 | 140 | sc = ScreenCapture( |
| 123 | 141 | frame_index=7, |
| 124 | 142 | timestamp=30.0, |
| 125 | 143 | caption="Timeline", |
| 126 | 144 | image_path="captures/capture_0.jpg", |
| 127 | 145 | confidence=0.45, |
| 146 | + content_type="slide", | |
| 147 | + text_content="Q4 Roadmap", | |
| 148 | + entities=["Roadmap"], | |
| 149 | + topics=["planning"], | |
| 128 | 150 | ) |
| 129 | 151 | restored = ScreenCapture.model_validate_json(sc.model_dump_json()) |
| 130 | 152 | assert restored == sc |
| 131 | 153 | |
| 132 | 154 | |
| 133 | 155 |
| --- tests/test_models.py | |
| +++ tests/test_models.py | |
| @@ -115,18 +115,40 @@ | |
| 115 | |
| 116 | class TestScreenCapture: |
| 117 | def test_basic(self): |
| 118 | sc = ScreenCapture(frame_index=10, caption="Architecture overview slide", confidence=0.5) |
| 119 | assert sc.image_path is None |
| 120 | |
| 121 | def test_round_trip(self): |
| 122 | sc = ScreenCapture( |
| 123 | frame_index=7, |
| 124 | timestamp=30.0, |
| 125 | caption="Timeline", |
| 126 | image_path="captures/capture_0.jpg", |
| 127 | confidence=0.45, |
| 128 | ) |
| 129 | restored = ScreenCapture.model_validate_json(sc.model_dump_json()) |
| 130 | assert restored == sc |
| 131 | |
| 132 | |
| 133 |
| --- tests/test_models.py | |
| +++ tests/test_models.py | |
| @@ -115,18 +115,40 @@ | |
| 115 | |
| 116 | class TestScreenCapture: |
| 117 | def test_basic(self): |
| 118 | sc = ScreenCapture(frame_index=10, caption="Architecture overview slide", confidence=0.5) |
| 119 | assert sc.image_path is None |
| 120 | assert sc.content_type is None |
| 121 | assert sc.text_content is None |
| 122 | assert sc.entities == [] |
| 123 | assert sc.topics == [] |
| 124 | |
| 125 | def test_with_extraction(self): |
| 126 | sc = ScreenCapture( |
| 127 | frame_index=5, |
| 128 | caption="Code editor showing Python", |
| 129 | confidence=0.5, |
| 130 | content_type="code", |
| 131 | text_content="def main():\n print('hello')", |
| 132 | entities=["Python", "main function"], |
| 133 | topics=["programming"], |
| 134 | ) |
| 135 | assert sc.content_type == "code" |
| 136 | assert "Python" in sc.entities |
| 137 | assert sc.text_content is not None |
| 138 | |
| 139 | def test_round_trip(self): |
| 140 | sc = ScreenCapture( |
| 141 | frame_index=7, |
| 142 | timestamp=30.0, |
| 143 | caption="Timeline", |
| 144 | image_path="captures/capture_0.jpg", |
| 145 | confidence=0.45, |
| 146 | content_type="slide", |
| 147 | text_content="Q4 Roadmap", |
| 148 | entities=["Roadmap"], |
| 149 | topics=["planning"], |
| 150 | ) |
| 151 | restored = ScreenCapture.model_validate_json(sc.model_dump_json()) |
| 152 | assert restored == sc |
| 153 | |
| 154 | |
| 155 |
| --- video_processor/analyzers/diagram_analyzer.py | ||
| +++ video_processor/analyzers/diagram_analyzer.py | ||
| @@ -55,10 +55,31 @@ | ||
| 55 | 55 | """ |
| 56 | 56 | |
| 57 | 57 | # Caption prompt for screengrab fallback |
| 58 | 58 | _CAPTION_PROMPT = "Briefly describe what this image shows in 1-2 sentences." |
| 59 | 59 | |
| 60 | +# Rich screenshot extraction prompt — extracts knowledge from shared screens | |
| 61 | +_SCREENSHOT_EXTRACT_PROMPT = """\ | |
| 62 | +Analyze this screenshot from a video recording. Extract all visible knowledge. | |
| 63 | +This is shared screen content (slides, code, documents, browser, terminal, etc.). | |
| 64 | + | |
| 65 | +Return ONLY a JSON object (no markdown fences): | |
| 66 | +{ | |
| 67 | + "content_type": "slide"|"code"|"document"|"terminal"|"browser"|"chat"|"other", | |
| 68 | + "caption": "one-sentence description of what is shown", | |
| 69 | + "text_content": "all visible text, preserving structure and line breaks", | |
| 70 | + "entities": ["named things visible: people, technologies, tools, services, \ | |
| 71 | +projects, libraries, APIs, error codes, URLs, file paths"], | |
| 72 | + "topics": ["concepts or subjects this content is about"] | |
| 73 | +} | |
| 74 | + | |
| 75 | +For text_content: extract ALL readable text — code, titles, bullet points, URLs, | |
| 76 | +error messages, terminal output, chat messages, file names. Be thorough. | |
| 77 | +For entities: extract specific named things, not generic words. | |
| 78 | +For topics: extract 2-5 high-level topics this content relates to. | |
| 79 | +""" | |
| 80 | + | |
| 60 | 81 | |
| 61 | 82 | def _read_image_bytes(image_path: Union[str, Path]) -> bytes: |
| 62 | 83 | """Read image file as bytes.""" |
| 63 | 84 | return Path(image_path).read_bytes() |
| 64 | 85 | |
| @@ -129,10 +150,17 @@ | ||
| 129 | 150 | |
| 130 | 151 | def caption_frame(self, image_path: Union[str, Path]) -> str: |
| 131 | 152 | """Get a brief caption for a screengrab fallback.""" |
| 132 | 153 | image_bytes = _read_image_bytes(image_path) |
| 133 | 154 | return self.pm.analyze_image(image_bytes, _CAPTION_PROMPT, max_tokens=256) |
| 155 | + | |
| 156 | + def extract_screenshot_knowledge(self, image_path: Union[str, Path]) -> dict: | |
| 157 | + """Extract knowledge from a screenshot — text, entities, topics.""" | |
| 158 | + image_bytes = _read_image_bytes(image_path) | |
| 159 | + raw = self.pm.analyze_image(image_bytes, _SCREENSHOT_EXTRACT_PROMPT, max_tokens=2048) | |
| 160 | + result = _parse_json_response(raw) | |
| 161 | + return result or {} | |
| 134 | 162 | |
| 135 | 163 | def process_frames( |
| 136 | 164 | self, |
| 137 | 165 | frame_paths: List[Union[str, Path]], |
| 138 | 166 | diagrams_dir: Optional[Path] = None, |
| @@ -312,21 +340,51 @@ | ||
| 312 | 340 | frame_index: int, |
| 313 | 341 | capture_index: int, |
| 314 | 342 | captures_dir: Optional[Path], |
| 315 | 343 | confidence: float, |
| 316 | 344 | ) -> ScreenCapture: |
| 317 | - """Save a frame as a captioned screengrab.""" | |
| 345 | + """Extract knowledge from a screenshot and save it.""" | |
| 346 | + # Try rich extraction first, fall back to caption-only | |
| 318 | 347 | caption = "" |
| 348 | + content_type = None | |
| 349 | + text_content = None | |
| 350 | + entities: List[str] = [] | |
| 351 | + topics: List[str] = [] | |
| 352 | + | |
| 319 | 353 | try: |
| 320 | - caption = self.caption_frame(frame_path) | |
| 354 | + extraction = self.extract_screenshot_knowledge(frame_path) | |
| 355 | + if extraction: | |
| 356 | + caption = extraction.get("caption", "") | |
| 357 | + content_type = extraction.get("content_type") | |
| 358 | + text_content = extraction.get("text_content") | |
| 359 | + raw_entities = extraction.get("entities", []) | |
| 360 | + entities = [str(e) for e in raw_entities] if isinstance(raw_entities, list) else [] | |
| 361 | + raw_topics = extraction.get("topics", []) | |
| 362 | + topics = [str(t) for t in raw_topics] if isinstance(raw_topics, list) else [] | |
| 363 | + logger.info( | |
| 364 | + f"Frame {frame_index}: extracted " | |
| 365 | + f"{len(entities)} entities, " | |
| 366 | + f"{len(topics)} topics from {content_type}" | |
| 367 | + ) | |
| 321 | 368 | except Exception as e: |
| 322 | - logger.warning(f"Caption failed for frame {frame_index}: {e}") | |
| 369 | + logger.warning( | |
| 370 | + f"Screenshot extraction failed for frame " | |
| 371 | + f"{frame_index}: {e}, falling back to caption" | |
| 372 | + ) | |
| 373 | + try: | |
| 374 | + caption = self.caption_frame(frame_path) | |
| 375 | + except Exception as e2: | |
| 376 | + logger.warning(f"Caption also failed for frame {frame_index}: {e2}") | |
| 323 | 377 | |
| 324 | 378 | sc = ScreenCapture( |
| 325 | 379 | frame_index=frame_index, |
| 326 | 380 | caption=caption, |
| 327 | 381 | confidence=confidence, |
| 382 | + content_type=content_type, | |
| 383 | + text_content=text_content, | |
| 384 | + entities=entities, | |
| 385 | + topics=topics, | |
| 328 | 386 | ) |
| 329 | 387 | |
| 330 | 388 | if captures_dir: |
| 331 | 389 | captures_dir.mkdir(parents=True, exist_ok=True) |
| 332 | 390 | prefix = f"capture_{capture_index}" |
| 333 | 391 |
| --- video_processor/analyzers/diagram_analyzer.py | |
| +++ video_processor/analyzers/diagram_analyzer.py | |
| @@ -55,10 +55,31 @@ | |
| 55 | """ |
| 56 | |
| 57 | # Caption prompt for screengrab fallback |
| 58 | _CAPTION_PROMPT = "Briefly describe what this image shows in 1-2 sentences." |
| 59 | |
| 60 | |
| 61 | def _read_image_bytes(image_path: Union[str, Path]) -> bytes: |
| 62 | """Read image file as bytes.""" |
| 63 | return Path(image_path).read_bytes() |
| 64 | |
| @@ -129,10 +150,17 @@ | |
| 129 | |
| 130 | def caption_frame(self, image_path: Union[str, Path]) -> str: |
| 131 | """Get a brief caption for a screengrab fallback.""" |
| 132 | image_bytes = _read_image_bytes(image_path) |
| 133 | return self.pm.analyze_image(image_bytes, _CAPTION_PROMPT, max_tokens=256) |
| 134 | |
| 135 | def process_frames( |
| 136 | self, |
| 137 | frame_paths: List[Union[str, Path]], |
| 138 | diagrams_dir: Optional[Path] = None, |
| @@ -312,21 +340,51 @@ | |
| 312 | frame_index: int, |
| 313 | capture_index: int, |
| 314 | captures_dir: Optional[Path], |
| 315 | confidence: float, |
| 316 | ) -> ScreenCapture: |
| 317 | """Save a frame as a captioned screengrab.""" |
| 318 | caption = "" |
| 319 | try: |
| 320 | caption = self.caption_frame(frame_path) |
| 321 | except Exception as e: |
| 322 | logger.warning(f"Caption failed for frame {frame_index}: {e}") |
| 323 | |
| 324 | sc = ScreenCapture( |
| 325 | frame_index=frame_index, |
| 326 | caption=caption, |
| 327 | confidence=confidence, |
| 328 | ) |
| 329 | |
| 330 | if captures_dir: |
| 331 | captures_dir.mkdir(parents=True, exist_ok=True) |
| 332 | prefix = f"capture_{capture_index}" |
| 333 |
| --- video_processor/analyzers/diagram_analyzer.py | |
| +++ video_processor/analyzers/diagram_analyzer.py | |
| @@ -55,10 +55,31 @@ | |
| 55 | """ |
| 56 | |
| 57 | # Caption prompt for screengrab fallback |
| 58 | _CAPTION_PROMPT = "Briefly describe what this image shows in 1-2 sentences." |
| 59 | |
| 60 | # Rich screenshot extraction prompt — extracts knowledge from shared screens |
| 61 | _SCREENSHOT_EXTRACT_PROMPT = """\ |
| 62 | Analyze this screenshot from a video recording. Extract all visible knowledge. |
| 63 | This is shared screen content (slides, code, documents, browser, terminal, etc.). |
| 64 | |
| 65 | Return ONLY a JSON object (no markdown fences): |
| 66 | { |
| 67 | "content_type": "slide"|"code"|"document"|"terminal"|"browser"|"chat"|"other", |
| 68 | "caption": "one-sentence description of what is shown", |
| 69 | "text_content": "all visible text, preserving structure and line breaks", |
| 70 | "entities": ["named things visible: people, technologies, tools, services, \ |
| 71 | projects, libraries, APIs, error codes, URLs, file paths"], |
| 72 | "topics": ["concepts or subjects this content is about"] |
| 73 | } |
| 74 | |
| 75 | For text_content: extract ALL readable text — code, titles, bullet points, URLs, |
| 76 | error messages, terminal output, chat messages, file names. Be thorough. |
| 77 | For entities: extract specific named things, not generic words. |
| 78 | For topics: extract 2-5 high-level topics this content relates to. |
| 79 | """ |
| 80 | |
| 81 | |
| 82 | def _read_image_bytes(image_path: Union[str, Path]) -> bytes: |
| 83 | """Read image file as bytes.""" |
| 84 | return Path(image_path).read_bytes() |
| 85 | |
| @@ -129,10 +150,17 @@ | |
| 150 | |
| 151 | def caption_frame(self, image_path: Union[str, Path]) -> str: |
| 152 | """Get a brief caption for a screengrab fallback.""" |
| 153 | image_bytes = _read_image_bytes(image_path) |
| 154 | return self.pm.analyze_image(image_bytes, _CAPTION_PROMPT, max_tokens=256) |
| 155 | |
| 156 | def extract_screenshot_knowledge(self, image_path: Union[str, Path]) -> dict: |
| 157 | """Extract knowledge from a screenshot — text, entities, topics.""" |
| 158 | image_bytes = _read_image_bytes(image_path) |
| 159 | raw = self.pm.analyze_image(image_bytes, _SCREENSHOT_EXTRACT_PROMPT, max_tokens=2048) |
| 160 | result = _parse_json_response(raw) |
| 161 | return result or {} |
| 162 | |
| 163 | def process_frames( |
| 164 | self, |
| 165 | frame_paths: List[Union[str, Path]], |
| 166 | diagrams_dir: Optional[Path] = None, |
| @@ -312,21 +340,51 @@ | |
| 340 | frame_index: int, |
| 341 | capture_index: int, |
| 342 | captures_dir: Optional[Path], |
| 343 | confidence: float, |
| 344 | ) -> ScreenCapture: |
| 345 | """Extract knowledge from a screenshot and save it.""" |
| 346 | # Try rich extraction first, fall back to caption-only |
| 347 | caption = "" |
| 348 | content_type = None |
| 349 | text_content = None |
| 350 | entities: List[str] = [] |
| 351 | topics: List[str] = [] |
| 352 | |
| 353 | try: |
| 354 | extraction = self.extract_screenshot_knowledge(frame_path) |
| 355 | if extraction: |
| 356 | caption = extraction.get("caption", "") |
| 357 | content_type = extraction.get("content_type") |
| 358 | text_content = extraction.get("text_content") |
| 359 | raw_entities = extraction.get("entities", []) |
| 360 | entities = [str(e) for e in raw_entities] if isinstance(raw_entities, list) else [] |
| 361 | raw_topics = extraction.get("topics", []) |
| 362 | topics = [str(t) for t in raw_topics] if isinstance(raw_topics, list) else [] |
| 363 | logger.info( |
| 364 | f"Frame {frame_index}: extracted " |
| 365 | f"{len(entities)} entities, " |
| 366 | f"{len(topics)} topics from {content_type}" |
| 367 | ) |
| 368 | except Exception as e: |
| 369 | logger.warning( |
| 370 | f"Screenshot extraction failed for frame " |
| 371 | f"{frame_index}: {e}, falling back to caption" |
| 372 | ) |
| 373 | try: |
| 374 | caption = self.caption_frame(frame_path) |
| 375 | except Exception as e2: |
| 376 | logger.warning(f"Caption also failed for frame {frame_index}: {e2}") |
| 377 | |
| 378 | sc = ScreenCapture( |
| 379 | frame_index=frame_index, |
| 380 | caption=caption, |
| 381 | confidence=confidence, |
| 382 | content_type=content_type, |
| 383 | text_content=text_content, |
| 384 | entities=entities, |
| 385 | topics=topics, |
| 386 | ) |
| 387 | |
| 388 | if captures_dir: |
| 389 | captures_dir.mkdir(parents=True, exist_ok=True) |
| 390 | prefix = f"capture_{capture_index}" |
| 391 |
| --- video_processor/integrators/knowledge_graph.py | ||
| +++ video_processor/integrators/knowledge_graph.py | ||
| @@ -196,10 +196,42 @@ | ||
| 196 | 196 | self._store.add_occurrence( |
| 197 | 197 | diagram_id, |
| 198 | 198 | source if text_content else diagram_id, |
| 199 | 199 | text=f"frame_index={diagram.get('frame_index')}", |
| 200 | 200 | ) |
| 201 | + | |
| 202 | + def process_screenshots(self, screenshots: List[Dict]) -> None: | |
| 203 | + """Process screenshot captures into knowledge graph. | |
| 204 | + | |
| 205 | + Extracts entities from text_content and adds screenshot-specific | |
| 206 | + entities from the entities list. | |
| 207 | + """ | |
| 208 | + for i, capture in enumerate(screenshots): | |
| 209 | + text_content = capture.get("text_content", "") | |
| 210 | + source = f"screenshot_{i}" | |
| 211 | + content_type = capture.get("content_type", "screenshot") | |
| 212 | + | |
| 213 | + # Extract entities from visible text via LLM | |
| 214 | + if text_content: | |
| 215 | + self.add_content(text_content, source) | |
| 216 | + | |
| 217 | + # Add explicitly identified entities from vision extraction | |
| 218 | + for entity_name in capture.get("entities", []): | |
| 219 | + if not entity_name or len(entity_name) < 2: | |
| 220 | + continue | |
| 221 | + if not self._store.has_entity(entity_name): | |
| 222 | + self._store.merge_entity( | |
| 223 | + entity_name, | |
| 224 | + "concept", | |
| 225 | + [f"Identified in {content_type} screenshot"], | |
| 226 | + source=source, | |
| 227 | + ) | |
| 228 | + self._store.add_occurrence( | |
| 229 | + entity_name, | |
| 230 | + source, | |
| 231 | + text=f"Visible in {content_type} (frame {capture.get('frame_index', '?')})", | |
| 232 | + ) | |
| 201 | 233 | |
| 202 | 234 | def to_data(self) -> KnowledgeGraphData: |
| 203 | 235 | """Convert to pydantic KnowledgeGraphData model.""" |
| 204 | 236 | nodes = [] |
| 205 | 237 | for entity in self._store.get_all_entities(): |
| 206 | 238 |
| --- video_processor/integrators/knowledge_graph.py | |
| +++ video_processor/integrators/knowledge_graph.py | |
| @@ -196,10 +196,42 @@ | |
| 196 | self._store.add_occurrence( |
| 197 | diagram_id, |
| 198 | source if text_content else diagram_id, |
| 199 | text=f"frame_index={diagram.get('frame_index')}", |
| 200 | ) |
| 201 | |
| 202 | def to_data(self) -> KnowledgeGraphData: |
| 203 | """Convert to pydantic KnowledgeGraphData model.""" |
| 204 | nodes = [] |
| 205 | for entity in self._store.get_all_entities(): |
| 206 |
| --- video_processor/integrators/knowledge_graph.py | |
| +++ video_processor/integrators/knowledge_graph.py | |
| @@ -196,10 +196,42 @@ | |
| 196 | self._store.add_occurrence( |
| 197 | diagram_id, |
| 198 | source if text_content else diagram_id, |
| 199 | text=f"frame_index={diagram.get('frame_index')}", |
| 200 | ) |
| 201 | |
| 202 | def process_screenshots(self, screenshots: List[Dict]) -> None: |
| 203 | """Process screenshot captures into knowledge graph. |
| 204 | |
| 205 | Extracts entities from text_content and adds screenshot-specific |
| 206 | entities from the entities list. |
| 207 | """ |
| 208 | for i, capture in enumerate(screenshots): |
| 209 | text_content = capture.get("text_content", "") |
| 210 | source = f"screenshot_{i}" |
| 211 | content_type = capture.get("content_type", "screenshot") |
| 212 | |
| 213 | # Extract entities from visible text via LLM |
| 214 | if text_content: |
| 215 | self.add_content(text_content, source) |
| 216 | |
| 217 | # Add explicitly identified entities from vision extraction |
| 218 | for entity_name in capture.get("entities", []): |
| 219 | if not entity_name or len(entity_name) < 2: |
| 220 | continue |
| 221 | if not self._store.has_entity(entity_name): |
| 222 | self._store.merge_entity( |
| 223 | entity_name, |
| 224 | "concept", |
| 225 | [f"Identified in {content_type} screenshot"], |
| 226 | source=source, |
| 227 | ) |
| 228 | self._store.add_occurrence( |
| 229 | entity_name, |
| 230 | source, |
| 231 | text=f"Visible in {content_type} (frame {capture.get('frame_index', '?')})", |
| 232 | ) |
| 233 | |
| 234 | def to_data(self) -> KnowledgeGraphData: |
| 235 | """Convert to pydantic KnowledgeGraphData model.""" |
| 236 | nodes = [] |
| 237 | for entity in self._store.get_all_entities(): |
| 238 |
+14
-1
| --- video_processor/models.py | ||
| +++ video_processor/models.py | ||
| @@ -97,19 +97,32 @@ | ||
| 97 | 97 | png_path: Optional[str] = Field(default=None, description="Relative path to rendered PNG") |
| 98 | 98 | mermaid_path: Optional[str] = Field(default=None, description="Relative path to mermaid source") |
| 99 | 99 | |
| 100 | 100 | |
| 101 | 101 | class ScreenCapture(BaseModel): |
| 102 | - """A screengrab fallback when diagram extraction fails or is uncertain.""" | |
| 102 | + """A screen capture with knowledge extraction from shared content.""" | |
| 103 | 103 | |
| 104 | 104 | frame_index: int = Field(description="Index of the source frame") |
| 105 | 105 | timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)") |
| 106 | 106 | caption: Optional[str] = Field(default=None, description="Brief description of the content") |
| 107 | 107 | image_path: Optional[str] = Field(default=None, description="Relative path to screenshot") |
| 108 | 108 | confidence: float = Field( |
| 109 | 109 | default=0.0, description="Detection confidence that triggered fallback" |
| 110 | 110 | ) |
| 111 | + content_type: Optional[str] = Field( | |
| 112 | + default=None, | |
| 113 | + description="Content type: slide, code, document, terminal, browser, chat, other", | |
| 114 | + ) | |
| 115 | + text_content: Optional[str] = Field( | |
| 116 | + default=None, description="All visible text extracted from the screenshot" | |
| 117 | + ) | |
| 118 | + entities: List[str] = Field( | |
| 119 | + default_factory=list, description="Entities identified in the screenshot" | |
| 120 | + ) | |
| 121 | + topics: List[str] = Field( | |
| 122 | + default_factory=list, description="Topics or concepts visible in the screenshot" | |
| 123 | + ) | |
| 111 | 124 | |
| 112 | 125 | |
| 113 | 126 | class SourceRecord(BaseModel): |
| 114 | 127 | """A content source registered in the knowledge graph for provenance tracking.""" |
| 115 | 128 | |
| 116 | 129 |
| --- video_processor/models.py | |
| +++ video_processor/models.py | |
| @@ -97,19 +97,32 @@ | |
| 97 | png_path: Optional[str] = Field(default=None, description="Relative path to rendered PNG") |
| 98 | mermaid_path: Optional[str] = Field(default=None, description="Relative path to mermaid source") |
| 99 | |
| 100 | |
| 101 | class ScreenCapture(BaseModel): |
| 102 | """A screengrab fallback when diagram extraction fails or is uncertain.""" |
| 103 | |
| 104 | frame_index: int = Field(description="Index of the source frame") |
| 105 | timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)") |
| 106 | caption: Optional[str] = Field(default=None, description="Brief description of the content") |
| 107 | image_path: Optional[str] = Field(default=None, description="Relative path to screenshot") |
| 108 | confidence: float = Field( |
| 109 | default=0.0, description="Detection confidence that triggered fallback" |
| 110 | ) |
| 111 | |
| 112 | |
| 113 | class SourceRecord(BaseModel): |
| 114 | """A content source registered in the knowledge graph for provenance tracking.""" |
| 115 | |
| 116 |
| --- video_processor/models.py | |
| +++ video_processor/models.py | |
| @@ -97,19 +97,32 @@ | |
| 97 | png_path: Optional[str] = Field(default=None, description="Relative path to rendered PNG") |
| 98 | mermaid_path: Optional[str] = Field(default=None, description="Relative path to mermaid source") |
| 99 | |
| 100 | |
| 101 | class ScreenCapture(BaseModel): |
| 102 | """A screen capture with knowledge extraction from shared content.""" |
| 103 | |
| 104 | frame_index: int = Field(description="Index of the source frame") |
| 105 | timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)") |
| 106 | caption: Optional[str] = Field(default=None, description="Brief description of the content") |
| 107 | image_path: Optional[str] = Field(default=None, description="Relative path to screenshot") |
| 108 | confidence: float = Field( |
| 109 | default=0.0, description="Detection confidence that triggered fallback" |
| 110 | ) |
| 111 | content_type: Optional[str] = Field( |
| 112 | default=None, |
| 113 | description="Content type: slide, code, document, terminal, browser, chat, other", |
| 114 | ) |
| 115 | text_content: Optional[str] = Field( |
| 116 | default=None, description="All visible text extracted from the screenshot" |
| 117 | ) |
| 118 | entities: List[str] = Field( |
| 119 | default_factory=list, description="Entities identified in the screenshot" |
| 120 | ) |
| 121 | topics: List[str] = Field( |
| 122 | default_factory=list, description="Topics or concepts visible in the screenshot" |
| 123 | ) |
| 124 | |
| 125 | |
| 126 | class SourceRecord(BaseModel): |
| 127 | """A content source registered in the knowledge graph for provenance tracking.""" |
| 128 | |
| 129 |
| --- video_processor/pipeline.py | ||
| +++ video_processor/pipeline.py | ||
| @@ -243,10 +243,13 @@ | ||
| 243 | 243 | ) |
| 244 | 244 | kg.process_transcript(transcript_data) |
| 245 | 245 | if diagrams: |
| 246 | 246 | diagram_dicts = [d.model_dump() for d in diagrams] |
| 247 | 247 | kg.process_diagrams(diagram_dicts) |
| 248 | + if screen_captures: | |
| 249 | + capture_dicts = [sc.model_dump() for sc in screen_captures] | |
| 250 | + kg.process_screenshots(capture_dicts) | |
| 248 | 251 | # Export JSON copy alongside the SQLite db |
| 249 | 252 | kg.save(kg_json_path) |
| 250 | 253 | pipeline_bar.update(1) |
| 251 | 254 | _notify(progress_callback, "on_step_complete", steps[4], 5, total_steps) |
| 252 | 255 | |
| 253 | 256 |
| --- video_processor/pipeline.py | |
| +++ video_processor/pipeline.py | |
| @@ -243,10 +243,13 @@ | |
| 243 | ) |
| 244 | kg.process_transcript(transcript_data) |
| 245 | if diagrams: |
| 246 | diagram_dicts = [d.model_dump() for d in diagrams] |
| 247 | kg.process_diagrams(diagram_dicts) |
| 248 | # Export JSON copy alongside the SQLite db |
| 249 | kg.save(kg_json_path) |
| 250 | pipeline_bar.update(1) |
| 251 | _notify(progress_callback, "on_step_complete", steps[4], 5, total_steps) |
| 252 | |
| 253 |
| --- video_processor/pipeline.py | |
| +++ video_processor/pipeline.py | |
| @@ -243,10 +243,13 @@ | |
| 243 | ) |
| 244 | kg.process_transcript(transcript_data) |
| 245 | if diagrams: |
| 246 | diagram_dicts = [d.model_dump() for d in diagrams] |
| 247 | kg.process_diagrams(diagram_dicts) |
| 248 | if screen_captures: |
| 249 | capture_dicts = [sc.model_dump() for sc in screen_captures] |
| 250 | kg.process_screenshots(capture_dicts) |
| 251 | # Export JSON copy alongside the SQLite db |
| 252 | kg.save(kg_json_path) |
| 253 | pipeline_bar.update(1) |
| 254 | _notify(progress_callback, "on_step_complete", steps[4], 5, total_steps) |
| 255 | |
| 256 |