PlanOpticon

planopticon / video_processor / integrators / graph_discovery.py
Blame History Raw 141 lines
1
"""Auto-detect knowledge graph files in the filesystem."""
2
3
import logging
4
from pathlib import Path
5
from typing import Dict, List, Optional
6
7
logger = logging.getLogger(__name__)
8
9
# Common output subdirectories where graphs may live
10
_OUTPUT_SUBDIRS = ["results", "output", "knowledge-base"]
11
12
# Filenames we look for, in preference order
13
_DB_FILENAMES = ["knowledge_graph.db"]
14
_JSON_FILENAMES = ["knowledge_graph.json"]
15
16
17
def find_knowledge_graphs(
18
start_dir: Optional[Path] = None,
19
walk_up: bool = True,
20
max_depth_down: int = 4,
21
) -> List[Path]:
22
"""Find knowledge graph files near *start_dir*, sorted by proximity.
23
24
Search order:
25
1. start_dir itself
26
2. Common output subdirs (results/, output/, knowledge-base/)
27
3. Recursive walk downward (up to *max_depth_down* levels)
28
4. Walk upward through parent directories (if *walk_up* is True)
29
30
Returns .db files first, then .json, each group sorted closest-first.
31
"""
32
start_dir = Path(start_dir or Path.cwd()).resolve()
33
found_db: List[tuple] = [] # (distance, path)
34
found_json: List[tuple] = []
35
seen: set = set()
36
37
def _record(path: Path, distance: int) -> None:
38
rp = path.resolve()
39
if rp in seen or not rp.is_file():
40
return
41
seen.add(rp)
42
bucket = found_db if rp.suffix == ".db" else found_json
43
bucket.append((distance, rp))
44
45
# 1. Direct check in start_dir
46
for name in _DB_FILENAMES + _JSON_FILENAMES:
47
_record(start_dir / name, 0)
48
49
# 2. Common output subdirs
50
for subdir in _OUTPUT_SUBDIRS:
51
for name in _DB_FILENAMES + _JSON_FILENAMES:
52
_record(start_dir / subdir / name, 1)
53
54
# 3. Walk downward
55
def _walk_down(directory: Path, depth: int) -> None:
56
if depth > max_depth_down:
57
return
58
try:
59
for child in sorted(directory.iterdir()):
60
if child.is_file() and child.name in (_DB_FILENAMES + _JSON_FILENAMES):
61
_record(child, depth)
62
elif child.is_dir() and not child.name.startswith("."):
63
_walk_down(child, depth + 1)
64
except PermissionError:
65
pass
66
67
_walk_down(start_dir, 1)
68
69
# 4. Walk upward
70
if walk_up:
71
parent = start_dir.parent
72
distance = 1
73
while parent != parent.parent:
74
for name in _DB_FILENAMES + _JSON_FILENAMES:
75
_record(parent / name, distance)
76
for subdir in _OUTPUT_SUBDIRS:
77
for name in _DB_FILENAMES + _JSON_FILENAMES:
78
_record(parent / subdir / name, distance + 1)
79
parent = parent.parent
80
distance += 1
81
82
# Sort each group by distance, then combine db-first
83
found_db.sort(key=lambda x: x[0])
84
found_json.sort(key=lambda x: x[0])
85
return [p for _, p in found_db] + [p for _, p in found_json]
86
87
88
def find_nearest_graph(start_dir: Optional[Path] = None) -> Optional[Path]:
89
"""Return the closest knowledge graph file, or None."""
90
graphs = find_knowledge_graphs(start_dir)
91
return graphs[0] if graphs else None
92
93
94
def describe_graph(db_path: Path) -> Dict:
95
"""Return summary stats for a knowledge graph file.
96
97
Returns dict with: entity_count, relationship_count, entity_types, store_type.
98
"""
99
from video_processor.integrators.graph_store import (
100
InMemoryStore,
101
SQLiteStore,
102
create_store,
103
)
104
105
db_path = Path(db_path)
106
107
if db_path.suffix == ".json":
108
import json
109
110
data = json.loads(db_path.read_text())
111
store = InMemoryStore()
112
for node in data.get("nodes", []):
113
store.merge_entity(
114
node.get("name", ""),
115
node.get("type", "concept"),
116
node.get("descriptions", []),
117
)
118
for rel in data.get("relationships", []):
119
store.add_relationship(
120
rel.get("source", ""),
121
rel.get("target", ""),
122
rel.get("type", "related_to"),
123
)
124
store_type = "json"
125
else:
126
store = create_store(db_path)
127
store_type = "sqlite" if isinstance(store, SQLiteStore) else "inmemory"
128
129
entities = store.get_all_entities()
130
entity_types = {}
131
for e in entities:
132
t = e.get("type", "concept")
133
entity_types[t] = entity_types.get(t, 0) + 1
134
135
return {
136
"entity_count": store.get_entity_count(),
137
"relationship_count": store.get_relationship_count(),
138
"entity_types": entity_types,
139
"store_type": store_type,
140
}
141

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button