PlanOpticon

planopticon / tests / test_content_analyzer.py
Blame History Raw 214 lines
1
"""Tests for content cross-referencing between transcript and diagram entities."""
2
3
import json
4
from unittest.mock import MagicMock
5
6
from video_processor.analyzers.content_analyzer import ContentAnalyzer
7
from video_processor.models import Entity, KeyPoint
8
9
10
class TestCrossReference:
11
def test_exact_match_merges(self):
12
analyzer = ContentAnalyzer()
13
t_entities = [
14
Entity(name="Python", type="concept", descriptions=["A language"]),
15
]
16
d_entities = [
17
Entity(name="Python", type="concept", descriptions=["A snake-named lang"]),
18
]
19
result = analyzer.cross_reference(t_entities, d_entities)
20
assert len(result) == 1
21
assert result[0].source == "both"
22
assert "A language" in result[0].descriptions
23
assert "A snake-named lang" in result[0].descriptions
24
25
def test_case_insensitive_merge(self):
26
analyzer = ContentAnalyzer()
27
t_entities = [Entity(name="Docker", type="technology", descriptions=["Containers"])]
28
d_entities = [Entity(name="docker", type="technology", descriptions=["Container runtime"])]
29
result = analyzer.cross_reference(t_entities, d_entities)
30
assert len(result) == 1
31
assert result[0].source == "both"
32
33
def test_no_overlap_keeps_both(self):
34
analyzer = ContentAnalyzer()
35
t_entities = [Entity(name="Python", type="concept", descriptions=["Lang"])]
36
d_entities = [Entity(name="Rust", type="concept", descriptions=["Systems"])]
37
result = analyzer.cross_reference(t_entities, d_entities)
38
assert len(result) == 2
39
names = {e.name for e in result}
40
assert names == {"Python", "Rust"}
41
42
def test_transcript_only(self):
43
analyzer = ContentAnalyzer()
44
t_entities = [Entity(name="Foo", type="concept")]
45
result = analyzer.cross_reference(t_entities, [])
46
assert len(result) == 1
47
assert result[0].source == "transcript"
48
49
def test_diagram_only(self):
50
analyzer = ContentAnalyzer()
51
d_entities = [Entity(name="Bar", type="concept")]
52
result = analyzer.cross_reference([], d_entities)
53
assert len(result) == 1
54
assert result[0].source == "diagram"
55
56
def test_empty_inputs(self):
57
analyzer = ContentAnalyzer()
58
result = analyzer.cross_reference([], [])
59
assert result == []
60
61
def test_occurrences_merged(self):
62
analyzer = ContentAnalyzer()
63
t_entities = [
64
Entity(name="API", type="concept", occurrences=[{"source": "transcript", "ts": 10}]),
65
]
66
d_entities = [
67
Entity(name="API", type="concept", occurrences=[{"source": "diagram", "ts": 20}]),
68
]
69
result = analyzer.cross_reference(t_entities, d_entities)
70
assert len(result) == 1
71
assert len(result[0].occurrences) == 2
72
73
74
class TestFuzzyMatch:
75
def test_fuzzy_match_with_llm(self):
76
pm = MagicMock()
77
pm.chat.return_value = json.dumps(
78
[
79
{"transcript": "K8s", "diagram": "Kubernetes"},
80
]
81
)
82
analyzer = ContentAnalyzer(provider_manager=pm)
83
84
t_entities = [
85
Entity(name="K8s", type="technology", descriptions=["Container orchestration"]),
86
]
87
d_entities = [
88
Entity(name="Kubernetes", type="technology", descriptions=["K8s system"]),
89
]
90
result = analyzer.cross_reference(t_entities, d_entities)
91
92
# Fuzzy match should merge these
93
assert len(result) == 1
94
assert result[0].source == "both"
95
assert result[0].name == "K8s"
96
97
def test_fuzzy_match_no_matches(self):
98
pm = MagicMock()
99
pm.chat.return_value = "[]"
100
analyzer = ContentAnalyzer(provider_manager=pm)
101
102
t_entities = [Entity(name="Alpha", type="concept")]
103
d_entities = [Entity(name="Beta", type="concept")]
104
result = analyzer.cross_reference(t_entities, d_entities)
105
assert len(result) == 2
106
107
def test_fuzzy_match_llm_error(self):
108
pm = MagicMock()
109
pm.chat.side_effect = Exception("API error")
110
analyzer = ContentAnalyzer(provider_manager=pm)
111
112
t_entities = [Entity(name="X", type="concept")]
113
d_entities = [Entity(name="Y", type="concept")]
114
result = analyzer.cross_reference(t_entities, d_entities)
115
# Should still return both entities despite error
116
assert len(result) == 2
117
118
def test_fuzzy_match_bad_json(self):
119
pm = MagicMock()
120
pm.chat.return_value = "not json at all"
121
analyzer = ContentAnalyzer(provider_manager=pm)
122
123
t_entities = [Entity(name="A", type="concept")]
124
d_entities = [Entity(name="B", type="concept")]
125
result = analyzer.cross_reference(t_entities, d_entities)
126
assert len(result) == 2
127
128
def test_fuzzy_match_skipped_without_provider(self):
129
analyzer = ContentAnalyzer()
130
t_entities = [Entity(name="ML", type="concept")]
131
d_entities = [Entity(name="Machine Learning", type="concept")]
132
result = analyzer.cross_reference(t_entities, d_entities)
133
# No LLM so no fuzzy matching — both remain separate
134
assert len(result) == 2
135
136
def test_fuzzy_match_skipped_when_all_exact(self):
137
pm = MagicMock()
138
analyzer = ContentAnalyzer(provider_manager=pm)
139
140
t_entities = [Entity(name="Same", type="concept")]
141
d_entities = [Entity(name="Same", type="concept")]
142
result = analyzer.cross_reference(t_entities, d_entities)
143
# All matched exactly — no fuzzy match call needed
144
pm.chat.assert_not_called()
145
assert len(result) == 1
146
147
148
class TestEnrichKeyPoints:
149
def test_enriches_with_matching_diagrams(self):
150
analyzer = ContentAnalyzer()
151
kps = [
152
KeyPoint(point="The deployment pipeline uses Docker containers"),
153
]
154
diagrams = [
155
{"elements": ["Docker", "Pipeline", "Build"], "text_content": "CI/CD flow"},
156
]
157
result = analyzer.enrich_key_points(kps, diagrams, "")
158
assert len(result) == 1
159
assert result[0].related_diagrams == [0]
160
161
def test_no_match_below_threshold(self):
162
analyzer = ContentAnalyzer()
163
kps = [
164
KeyPoint(point="Meeting scheduled for Friday"),
165
]
166
diagrams = [
167
{"elements": ["Docker", "Pipeline"], "text_content": "Architecture diagram"},
168
]
169
result = analyzer.enrich_key_points(kps, diagrams, "")
170
assert result[0].related_diagrams == []
171
172
def test_empty_diagrams_returns_unchanged(self):
173
analyzer = ContentAnalyzer()
174
kps = [KeyPoint(point="Test point")]
175
result = analyzer.enrich_key_points(kps, [], "")
176
assert len(result) == 1
177
assert result[0].related_diagrams == []
178
179
def test_multiple_diagram_matches(self):
180
analyzer = ContentAnalyzer()
181
kps = [
182
KeyPoint(point="Database migration requires testing schema changes"),
183
]
184
diagrams = [
185
{"elements": ["Database", "Schema", "Migration"], "text_content": ""},
186
{"elements": ["Testing", "Schema", "Validation"], "text_content": ""},
187
]
188
result = analyzer.enrich_key_points(kps, diagrams, "")
189
assert len(result[0].related_diagrams) == 2
190
191
def test_details_used_for_matching(self):
192
analyzer = ContentAnalyzer()
193
kps = [
194
KeyPoint(
195
point="Architecture overview", details="Uses Docker and Kubernetes for deployment"
196
),
197
]
198
diagrams = [
199
{"elements": ["Docker", "Kubernetes"], "text_content": "deployment infrastructure"},
200
]
201
result = analyzer.enrich_key_points(kps, diagrams, "")
202
assert 0 in result[0].related_diagrams
203
204
def test_diagram_as_object_with_attrs(self):
205
analyzer = ContentAnalyzer()
206
207
class FakeDiagram:
208
elements = ["Alpha", "Beta"]
209
text_content = "some relevant content"
210
211
kps = [KeyPoint(point="Alpha Beta interaction patterns")]
212
result = analyzer.enrich_key_points(kps, [FakeDiagram()], "")
213
assert result[0].related_diagrams == [0]
214

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button