PlanOpticon

Blame History Raw 315 lines
1
"""Pydantic data models for PlanOpticon output."""
2
3
from datetime import datetime
4
from enum import Enum
5
from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
6
7
from pydantic import BaseModel, Field
8
9
10
@runtime_checkable
11
class ProgressCallback(Protocol):
12
"""Optional callback for pipeline progress updates."""
13
14
def on_step_start(self, step: str, index: int, total: int) -> None: ...
15
def on_step_complete(self, step: str, index: int, total: int) -> None: ...
16
def on_progress(self, step: str, percent: float, message: str = "") -> None: ...
17
18
19
class DiagramType(str, Enum):
20
"""Types of visual content detected in video frames."""
21
22
flowchart = "flowchart"
23
sequence = "sequence"
24
architecture = "architecture"
25
whiteboard = "whiteboard"
26
chart = "chart"
27
table = "table"
28
slide = "slide"
29
screenshot = "screenshot"
30
unknown = "unknown"
31
32
33
class OutputFormat(str, Enum):
34
"""Available output formats."""
35
36
markdown = "markdown"
37
json = "json"
38
html = "html"
39
pdf = "pdf"
40
pptx = "pptx"
41
svg = "svg"
42
png = "png"
43
44
45
class TranscriptSegment(BaseModel):
46
"""A single segment of transcribed audio."""
47
48
start: float = Field(description="Start time in seconds")
49
end: float = Field(description="End time in seconds")
50
text: str = Field(description="Transcribed text")
51
speaker: Optional[str] = Field(default=None, description="Speaker identifier")
52
confidence: Optional[float] = Field(default=None, description="Transcription confidence 0-1")
53
54
55
class ActionItem(BaseModel):
56
"""An action item extracted from content."""
57
58
action: str = Field(description="The action to be taken")
59
assignee: Optional[str] = Field(default=None, description="Person responsible")
60
deadline: Optional[str] = Field(default=None, description="Deadline or timeframe")
61
priority: Optional[str] = Field(default=None, description="Priority level")
62
context: Optional[str] = Field(default=None, description="Additional context")
63
source: Optional[str] = Field(
64
default=None, description="Where this was found (transcript/diagram)"
65
)
66
67
68
class KeyPoint(BaseModel):
69
"""A key point extracted from content."""
70
71
point: str = Field(description="The key point")
72
topic: Optional[str] = Field(default=None, description="Topic or category")
73
details: Optional[str] = Field(default=None, description="Supporting details")
74
timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)")
75
source: Optional[str] = Field(default=None, description="Where this was found")
76
related_diagrams: List[int] = Field(
77
default_factory=list, description="Indices of related diagrams"
78
)
79
80
81
class DiagramResult(BaseModel):
82
"""Result from diagram extraction and analysis."""
83
84
frame_index: int = Field(description="Index of the source frame")
85
timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)")
86
diagram_type: DiagramType = Field(default=DiagramType.unknown, description="Type of diagram")
87
confidence: float = Field(default=0.0, description="Detection confidence 0-1")
88
description: Optional[str] = Field(default=None, description="Description of the diagram")
89
text_content: Optional[str] = Field(default=None, description="Text visible in the diagram")
90
elements: List[str] = Field(default_factory=list, description="Identified elements")
91
relationships: List[str] = Field(default_factory=list, description="Identified relationships")
92
mermaid: Optional[str] = Field(default=None, description="Mermaid syntax representation")
93
chart_data: Optional[Dict[str, Any]] = Field(
94
default=None, description="Chart data for reproduction (labels, values, chart_type)"
95
)
96
image_path: Optional[str] = Field(default=None, description="Relative path to original frame")
97
svg_path: Optional[str] = Field(default=None, description="Relative path to rendered SVG")
98
png_path: Optional[str] = Field(default=None, description="Relative path to rendered PNG")
99
mermaid_path: Optional[str] = Field(default=None, description="Relative path to mermaid source")
100
101
102
class ScreenCapture(BaseModel):
103
"""A screen capture with knowledge extraction from shared content."""
104
105
frame_index: int = Field(description="Index of the source frame")
106
timestamp: Optional[float] = Field(default=None, description="Timestamp in video (seconds)")
107
caption: Optional[str] = Field(default=None, description="Brief description of the content")
108
image_path: Optional[str] = Field(default=None, description="Relative path to screenshot")
109
confidence: float = Field(
110
default=0.0, description="Detection confidence that triggered fallback"
111
)
112
content_type: Optional[str] = Field(
113
default=None,
114
description="Content type: slide, code, document, terminal, browser, chat, other",
115
)
116
text_content: Optional[str] = Field(
117
default=None, description="All visible text extracted from the screenshot"
118
)
119
entities: List[str] = Field(
120
default_factory=list, description="Entities identified in the screenshot"
121
)
122
topics: List[str] = Field(
123
default_factory=list, description="Topics or concepts visible in the screenshot"
124
)
125
126
127
class SourceRecord(BaseModel):
128
"""A content source registered in the knowledge graph for provenance tracking."""
129
130
source_id: str = Field(description="Unique identifier for this source")
131
source_type: str = Field(description="Source type: video, document, url, api, manual")
132
title: str = Field(description="Human-readable title")
133
path: Optional[str] = Field(default=None, description="Local file path")
134
url: Optional[str] = Field(default=None, description="URL if applicable")
135
mime_type: Optional[str] = Field(default=None, description="MIME type of the source")
136
ingested_at: str = Field(
137
default_factory=lambda: datetime.now().isoformat(),
138
description="ISO format ingestion timestamp",
139
)
140
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional source metadata")
141
142
143
class Entity(BaseModel):
144
"""An entity in the knowledge graph."""
145
146
name: str = Field(description="Entity name")
147
type: str = Field(default="concept", description="Entity type (person, concept, time, diagram)")
148
descriptions: List[str] = Field(default_factory=list, description="Descriptions of this entity")
149
source: Optional[str] = Field(
150
default=None, description="Source attribution (transcript/diagram/both)"
151
)
152
occurrences: List[Dict[str, Any]] = Field(
153
default_factory=list, description="List of occurrences with source, timestamp, text"
154
)
155
156
157
class Relationship(BaseModel):
158
"""A relationship between entities in the knowledge graph."""
159
160
source: str = Field(description="Source entity name")
161
target: str = Field(description="Target entity name")
162
type: str = Field(default="related_to", description="Relationship type")
163
content_source: Optional[str] = Field(default=None, description="Content source identifier")
164
timestamp: Optional[float] = Field(default=None, description="Timestamp in seconds")
165
166
167
class KnowledgeGraphData(BaseModel):
168
"""Serializable knowledge graph data."""
169
170
nodes: List[Entity] = Field(default_factory=list, description="Graph nodes/entities")
171
relationships: List[Relationship] = Field(
172
default_factory=list, description="Graph relationships"
173
)
174
sources: List[SourceRecord] = Field(
175
default_factory=list, description="Content sources for provenance tracking"
176
)
177
178
179
class PlanningEntityType(str, Enum):
180
"""Types of entities in a planning taxonomy."""
181
182
GOAL = "goal"
183
REQUIREMENT = "requirement"
184
CONSTRAINT = "constraint"
185
DECISION = "decision"
186
RISK = "risk"
187
ASSUMPTION = "assumption"
188
DEPENDENCY = "dependency"
189
MILESTONE = "milestone"
190
TASK = "task"
191
FEATURE = "feature"
192
193
194
class PlanningEntity(BaseModel):
195
"""An entity classified for planning purposes."""
196
197
name: str
198
planning_type: PlanningEntityType
199
description: str = ""
200
priority: Optional[str] = None # "high", "medium", "low"
201
status: Optional[str] = None # "identified", "confirmed", "resolved"
202
source_entities: List[str] = Field(default_factory=list)
203
metadata: Dict[str, Any] = Field(default_factory=dict)
204
205
206
class PlanningRelationshipType(str, Enum):
207
"""Relationship types within a planning taxonomy."""
208
209
REQUIRES = "requires"
210
BLOCKED_BY = "blocked_by"
211
HAS_RISK = "has_risk"
212
DEPENDS_ON = "depends_on"
213
ADDRESSES = "addresses"
214
HAS_TRADEOFF = "has_tradeoff"
215
DELIVERS = "delivers"
216
IMPLEMENTS = "implements"
217
PARENT_OF = "parent_of"
218
219
220
class ProcessingStats(BaseModel):
221
"""Statistics about a processing run."""
222
223
start_time: Optional[str] = Field(default=None, description="ISO format start time")
224
end_time: Optional[str] = Field(default=None, description="ISO format end time")
225
duration_seconds: Optional[float] = Field(default=None, description="Total processing time")
226
frames_extracted: int = Field(default=0)
227
people_frames_filtered: int = Field(default=0)
228
diagrams_detected: int = Field(default=0)
229
screen_captures: int = Field(default=0)
230
transcript_duration_seconds: Optional[float] = Field(default=None)
231
models_used: Dict[str, str] = Field(
232
default_factory=dict, description="Map of task to model used (e.g. vision: gpt-4o)"
233
)
234
235
236
class VideoMetadata(BaseModel):
237
"""Metadata about the source video."""
238
239
title: str = Field(description="Video title")
240
source_path: Optional[str] = Field(default=None, description="Original video file path")
241
duration_seconds: Optional[float] = Field(default=None, description="Video duration")
242
resolution: Optional[str] = Field(default=None, description="Video resolution (e.g. 1920x1080)")
243
processed_at: str = Field(
244
default_factory=lambda: datetime.now().isoformat(),
245
description="ISO format processing timestamp",
246
)
247
248
249
class VideoManifest(BaseModel):
250
"""Manifest for a single video processing run - the single source of truth."""
251
252
version: str = Field(default="1.0", description="Manifest schema version")
253
video: VideoMetadata = Field(description="Source video metadata")
254
stats: ProcessingStats = Field(default_factory=ProcessingStats)
255
256
# Relative paths to output files
257
transcript_json: Optional[str] = Field(default=None)
258
transcript_txt: Optional[str] = Field(default=None)
259
transcript_srt: Optional[str] = Field(default=None)
260
analysis_md: Optional[str] = Field(default=None)
261
analysis_html: Optional[str] = Field(default=None)
262
analysis_pdf: Optional[str] = Field(default=None)
263
knowledge_graph_json: Optional[str] = Field(default=None)
264
knowledge_graph_db: Optional[str] = Field(default=None)
265
key_points_json: Optional[str] = Field(default=None)
266
action_items_json: Optional[str] = Field(default=None)
267
268
# Inline structured data
269
key_points: List[KeyPoint] = Field(default_factory=list)
270
action_items: List[ActionItem] = Field(default_factory=list)
271
diagrams: List[DiagramResult] = Field(default_factory=list)
272
screen_captures: List[ScreenCapture] = Field(default_factory=list)
273
274
# Frame paths
275
frame_paths: List[str] = Field(
276
default_factory=list, description="Relative paths to extracted frames"
277
)
278
279
280
class BatchVideoEntry(BaseModel):
281
"""Summary of a single video within a batch."""
282
283
video_name: str
284
manifest_path: str = Field(description="Relative path to video manifest")
285
status: str = Field(default="pending", description="pending/completed/failed")
286
error: Optional[str] = Field(default=None, description="Error message if failed")
287
diagrams_count: int = Field(default=0)
288
action_items_count: int = Field(default=0)
289
key_points_count: int = Field(default=0)
290
duration_seconds: Optional[float] = Field(default=None)
291
292
293
class BatchManifest(BaseModel):
294
"""Manifest for a batch processing run."""
295
296
version: str = Field(default="1.0")
297
title: str = Field(default="Batch Processing Results")
298
processed_at: str = Field(default_factory=lambda: datetime.now().isoformat())
299
stats: ProcessingStats = Field(default_factory=ProcessingStats)
300
301
videos: List[BatchVideoEntry] = Field(default_factory=list)
302
303
# Aggregated counts
304
total_videos: int = Field(default=0)
305
completed_videos: int = Field(default=0)
306
failed_videos: int = Field(default=0)
307
total_diagrams: int = Field(default=0)
308
total_action_items: int = Field(default=0)
309
total_key_points: int = Field(default=0)
310
311
# Batch-level output paths (relative)
312
batch_summary_md: Optional[str] = Field(default=None)
313
merged_knowledge_graph_json: Optional[str] = Field(default=None)
314
merged_knowledge_graph_db: Optional[str] = Field(default=None)
315

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button