PlanOpticon

planopticon / tests / test_content_analyzer.py
Source Blame History 213 lines
ccf32cc… leo 1 """Tests for content cross-referencing between transcript and diagram entities."""
ccf32cc… leo 2
ccf32cc… leo 3 import json
829e24a… leo 4 from unittest.mock import MagicMock
ccf32cc… leo 5
ccf32cc… leo 6 from video_processor.analyzers.content_analyzer import ContentAnalyzer
ccf32cc… leo 7 from video_processor.models import Entity, KeyPoint
ccf32cc… leo 8
ccf32cc… leo 9
ccf32cc… leo 10 class TestCrossReference:
ccf32cc… leo 11 def test_exact_match_merges(self):
ccf32cc… leo 12 analyzer = ContentAnalyzer()
ccf32cc… leo 13 t_entities = [
ccf32cc… leo 14 Entity(name="Python", type="concept", descriptions=["A language"]),
ccf32cc… leo 15 ]
ccf32cc… leo 16 d_entities = [
ccf32cc… leo 17 Entity(name="Python", type="concept", descriptions=["A snake-named lang"]),
ccf32cc… leo 18 ]
ccf32cc… leo 19 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 20 assert len(result) == 1
ccf32cc… leo 21 assert result[0].source == "both"
ccf32cc… leo 22 assert "A language" in result[0].descriptions
ccf32cc… leo 23 assert "A snake-named lang" in result[0].descriptions
ccf32cc… leo 24
ccf32cc… leo 25 def test_case_insensitive_merge(self):
ccf32cc… leo 26 analyzer = ContentAnalyzer()
ccf32cc… leo 27 t_entities = [Entity(name="Docker", type="technology", descriptions=["Containers"])]
ccf32cc… leo 28 d_entities = [Entity(name="docker", type="technology", descriptions=["Container runtime"])]
ccf32cc… leo 29 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 30 assert len(result) == 1
ccf32cc… leo 31 assert result[0].source == "both"
ccf32cc… leo 32
ccf32cc… leo 33 def test_no_overlap_keeps_both(self):
ccf32cc… leo 34 analyzer = ContentAnalyzer()
ccf32cc… leo 35 t_entities = [Entity(name="Python", type="concept", descriptions=["Lang"])]
ccf32cc… leo 36 d_entities = [Entity(name="Rust", type="concept", descriptions=["Systems"])]
ccf32cc… leo 37 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 38 assert len(result) == 2
ccf32cc… leo 39 names = {e.name for e in result}
ccf32cc… leo 40 assert names == {"Python", "Rust"}
ccf32cc… leo 41
ccf32cc… leo 42 def test_transcript_only(self):
ccf32cc… leo 43 analyzer = ContentAnalyzer()
ccf32cc… leo 44 t_entities = [Entity(name="Foo", type="concept")]
ccf32cc… leo 45 result = analyzer.cross_reference(t_entities, [])
ccf32cc… leo 46 assert len(result) == 1
ccf32cc… leo 47 assert result[0].source == "transcript"
ccf32cc… leo 48
ccf32cc… leo 49 def test_diagram_only(self):
ccf32cc… leo 50 analyzer = ContentAnalyzer()
ccf32cc… leo 51 d_entities = [Entity(name="Bar", type="concept")]
ccf32cc… leo 52 result = analyzer.cross_reference([], d_entities)
ccf32cc… leo 53 assert len(result) == 1
ccf32cc… leo 54 assert result[0].source == "diagram"
ccf32cc… leo 55
ccf32cc… leo 56 def test_empty_inputs(self):
ccf32cc… leo 57 analyzer = ContentAnalyzer()
ccf32cc… leo 58 result = analyzer.cross_reference([], [])
ccf32cc… leo 59 assert result == []
ccf32cc… leo 60
ccf32cc… leo 61 def test_occurrences_merged(self):
ccf32cc… leo 62 analyzer = ContentAnalyzer()
ccf32cc… leo 63 t_entities = [
ccf32cc… leo 64 Entity(name="API", type="concept", occurrences=[{"source": "transcript", "ts": 10}]),
ccf32cc… leo 65 ]
ccf32cc… leo 66 d_entities = [
ccf32cc… leo 67 Entity(name="API", type="concept", occurrences=[{"source": "diagram", "ts": 20}]),
ccf32cc… leo 68 ]
ccf32cc… leo 69 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 70 assert len(result) == 1
ccf32cc… leo 71 assert len(result[0].occurrences) == 2
ccf32cc… leo 72
ccf32cc… leo 73
ccf32cc… leo 74 class TestFuzzyMatch:
ccf32cc… leo 75 def test_fuzzy_match_with_llm(self):
ccf32cc… leo 76 pm = MagicMock()
829e24a… leo 77 pm.chat.return_value = json.dumps(
829e24a… leo 78 [
829e24a… leo 79 {"transcript": "K8s", "diagram": "Kubernetes"},
829e24a… leo 80 ]
829e24a… leo 81 )
ccf32cc… leo 82 analyzer = ContentAnalyzer(provider_manager=pm)
ccf32cc… leo 83
ccf32cc… leo 84 t_entities = [
ccf32cc… leo 85 Entity(name="K8s", type="technology", descriptions=["Container orchestration"]),
ccf32cc… leo 86 ]
ccf32cc… leo 87 d_entities = [
ccf32cc… leo 88 Entity(name="Kubernetes", type="technology", descriptions=["K8s system"]),
ccf32cc… leo 89 ]
ccf32cc… leo 90 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 91
ccf32cc… leo 92 # Fuzzy match should merge these
ccf32cc… leo 93 assert len(result) == 1
ccf32cc… leo 94 assert result[0].source == "both"
ccf32cc… leo 95 assert result[0].name == "K8s"
ccf32cc… leo 96
ccf32cc… leo 97 def test_fuzzy_match_no_matches(self):
ccf32cc… leo 98 pm = MagicMock()
ccf32cc… leo 99 pm.chat.return_value = "[]"
ccf32cc… leo 100 analyzer = ContentAnalyzer(provider_manager=pm)
ccf32cc… leo 101
ccf32cc… leo 102 t_entities = [Entity(name="Alpha", type="concept")]
ccf32cc… leo 103 d_entities = [Entity(name="Beta", type="concept")]
ccf32cc… leo 104 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 105 assert len(result) == 2
ccf32cc… leo 106
ccf32cc… leo 107 def test_fuzzy_match_llm_error(self):
ccf32cc… leo 108 pm = MagicMock()
ccf32cc… leo 109 pm.chat.side_effect = Exception("API error")
ccf32cc… leo 110 analyzer = ContentAnalyzer(provider_manager=pm)
ccf32cc… leo 111
ccf32cc… leo 112 t_entities = [Entity(name="X", type="concept")]
ccf32cc… leo 113 d_entities = [Entity(name="Y", type="concept")]
ccf32cc… leo 114 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 115 # Should still return both entities despite error
ccf32cc… leo 116 assert len(result) == 2
ccf32cc… leo 117
ccf32cc… leo 118 def test_fuzzy_match_bad_json(self):
ccf32cc… leo 119 pm = MagicMock()
ccf32cc… leo 120 pm.chat.return_value = "not json at all"
ccf32cc… leo 121 analyzer = ContentAnalyzer(provider_manager=pm)
ccf32cc… leo 122
ccf32cc… leo 123 t_entities = [Entity(name="A", type="concept")]
ccf32cc… leo 124 d_entities = [Entity(name="B", type="concept")]
ccf32cc… leo 125 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 126 assert len(result) == 2
ccf32cc… leo 127
ccf32cc… leo 128 def test_fuzzy_match_skipped_without_provider(self):
ccf32cc… leo 129 analyzer = ContentAnalyzer()
ccf32cc… leo 130 t_entities = [Entity(name="ML", type="concept")]
ccf32cc… leo 131 d_entities = [Entity(name="Machine Learning", type="concept")]
ccf32cc… leo 132 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 133 # No LLM so no fuzzy matching — both remain separate
ccf32cc… leo 134 assert len(result) == 2
ccf32cc… leo 135
ccf32cc… leo 136 def test_fuzzy_match_skipped_when_all_exact(self):
ccf32cc… leo 137 pm = MagicMock()
ccf32cc… leo 138 analyzer = ContentAnalyzer(provider_manager=pm)
ccf32cc… leo 139
ccf32cc… leo 140 t_entities = [Entity(name="Same", type="concept")]
ccf32cc… leo 141 d_entities = [Entity(name="Same", type="concept")]
ccf32cc… leo 142 result = analyzer.cross_reference(t_entities, d_entities)
ccf32cc… leo 143 # All matched exactly — no fuzzy match call needed
ccf32cc… leo 144 pm.chat.assert_not_called()
ccf32cc… leo 145 assert len(result) == 1
ccf32cc… leo 146
ccf32cc… leo 147
ccf32cc… leo 148 class TestEnrichKeyPoints:
ccf32cc… leo 149 def test_enriches_with_matching_diagrams(self):
ccf32cc… leo 150 analyzer = ContentAnalyzer()
ccf32cc… leo 151 kps = [
ccf32cc… leo 152 KeyPoint(point="The deployment pipeline uses Docker containers"),
ccf32cc… leo 153 ]
ccf32cc… leo 154 diagrams = [
ccf32cc… leo 155 {"elements": ["Docker", "Pipeline", "Build"], "text_content": "CI/CD flow"},
ccf32cc… leo 156 ]
ccf32cc… leo 157 result = analyzer.enrich_key_points(kps, diagrams, "")
ccf32cc… leo 158 assert len(result) == 1
ccf32cc… leo 159 assert result[0].related_diagrams == [0]
ccf32cc… leo 160
ccf32cc… leo 161 def test_no_match_below_threshold(self):
ccf32cc… leo 162 analyzer = ContentAnalyzer()
ccf32cc… leo 163 kps = [
ccf32cc… leo 164 KeyPoint(point="Meeting scheduled for Friday"),
ccf32cc… leo 165 ]
ccf32cc… leo 166 diagrams = [
ccf32cc… leo 167 {"elements": ["Docker", "Pipeline"], "text_content": "Architecture diagram"},
ccf32cc… leo 168 ]
ccf32cc… leo 169 result = analyzer.enrich_key_points(kps, diagrams, "")
ccf32cc… leo 170 assert result[0].related_diagrams == []
ccf32cc… leo 171
ccf32cc… leo 172 def test_empty_diagrams_returns_unchanged(self):
ccf32cc… leo 173 analyzer = ContentAnalyzer()
ccf32cc… leo 174 kps = [KeyPoint(point="Test point")]
ccf32cc… leo 175 result = analyzer.enrich_key_points(kps, [], "")
ccf32cc… leo 176 assert len(result) == 1
ccf32cc… leo 177 assert result[0].related_diagrams == []
ccf32cc… leo 178
ccf32cc… leo 179 def test_multiple_diagram_matches(self):
ccf32cc… leo 180 analyzer = ContentAnalyzer()
ccf32cc… leo 181 kps = [
ccf32cc… leo 182 KeyPoint(point="Database migration requires testing schema changes"),
ccf32cc… leo 183 ]
ccf32cc… leo 184 diagrams = [
ccf32cc… leo 185 {"elements": ["Database", "Schema", "Migration"], "text_content": ""},
ccf32cc… leo 186 {"elements": ["Testing", "Schema", "Validation"], "text_content": ""},
ccf32cc… leo 187 ]
ccf32cc… leo 188 result = analyzer.enrich_key_points(kps, diagrams, "")
ccf32cc… leo 189 assert len(result[0].related_diagrams) == 2
ccf32cc… leo 190
ccf32cc… leo 191 def test_details_used_for_matching(self):
ccf32cc… leo 192 analyzer = ContentAnalyzer()
ccf32cc… leo 193 kps = [
829e24a… leo 194 KeyPoint(
829e24a… leo 195 point="Architecture overview", details="Uses Docker and Kubernetes for deployment"
829e24a… leo 196 ),
ccf32cc… leo 197 ]
ccf32cc… leo 198 diagrams = [
ccf32cc… leo 199 {"elements": ["Docker", "Kubernetes"], "text_content": "deployment infrastructure"},
ccf32cc… leo 200 ]
ccf32cc… leo 201 result = analyzer.enrich_key_points(kps, diagrams, "")
ccf32cc… leo 202 assert 0 in result[0].related_diagrams
ccf32cc… leo 203
ccf32cc… leo 204 def test_diagram_as_object_with_attrs(self):
ccf32cc… leo 205 analyzer = ContentAnalyzer()
ccf32cc… leo 206
ccf32cc… leo 207 class FakeDiagram:
ccf32cc… leo 208 elements = ["Alpha", "Beta"]
ccf32cc… leo 209 text_content = "some relevant content"
ccf32cc… leo 210
ccf32cc… leo 211 kps = [KeyPoint(point="Alpha Beta interaction patterns")]
ccf32cc… leo 212 result = analyzer.enrich_key_points(kps, [FakeDiagram()], "")
ccf32cc… leo 213 assert result[0].related_diagrams == [0]

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button