PlanOpticon

planopticon / video_processor / integrators / graph_discovery.py
Source Blame History 140 lines
b363c5b… noreply 1 """Auto-detect knowledge graph files in the filesystem."""
b363c5b… noreply 2
b363c5b… noreply 3 import logging
b363c5b… noreply 4 from pathlib import Path
b363c5b… noreply 5 from typing import Dict, List, Optional
b363c5b… noreply 6
b363c5b… noreply 7 logger = logging.getLogger(__name__)
b363c5b… noreply 8
b363c5b… noreply 9 # Common output subdirectories where graphs may live
b363c5b… noreply 10 _OUTPUT_SUBDIRS = ["results", "output", "knowledge-base"]
b363c5b… noreply 11
b363c5b… noreply 12 # Filenames we look for, in preference order
b363c5b… noreply 13 _DB_FILENAMES = ["knowledge_graph.db"]
b363c5b… noreply 14 _JSON_FILENAMES = ["knowledge_graph.json"]
b363c5b… noreply 15
b363c5b… noreply 16
b363c5b… noreply 17 def find_knowledge_graphs(
b363c5b… noreply 18 start_dir: Optional[Path] = None,
b363c5b… noreply 19 walk_up: bool = True,
b363c5b… noreply 20 max_depth_down: int = 4,
b363c5b… noreply 21 ) -> List[Path]:
b363c5b… noreply 22 """Find knowledge graph files near *start_dir*, sorted by proximity.
b363c5b… noreply 23
b363c5b… noreply 24 Search order:
b363c5b… noreply 25 1. start_dir itself
b363c5b… noreply 26 2. Common output subdirs (results/, output/, knowledge-base/)
b363c5b… noreply 27 3. Recursive walk downward (up to *max_depth_down* levels)
b363c5b… noreply 28 4. Walk upward through parent directories (if *walk_up* is True)
b363c5b… noreply 29
b363c5b… noreply 30 Returns .db files first, then .json, each group sorted closest-first.
b363c5b… noreply 31 """
b363c5b… noreply 32 start_dir = Path(start_dir or Path.cwd()).resolve()
b363c5b… noreply 33 found_db: List[tuple] = [] # (distance, path)
b363c5b… noreply 34 found_json: List[tuple] = []
b363c5b… noreply 35 seen: set = set()
b363c5b… noreply 36
b363c5b… noreply 37 def _record(path: Path, distance: int) -> None:
b363c5b… noreply 38 rp = path.resolve()
b363c5b… noreply 39 if rp in seen or not rp.is_file():
b363c5b… noreply 40 return
b363c5b… noreply 41 seen.add(rp)
b363c5b… noreply 42 bucket = found_db if rp.suffix == ".db" else found_json
b363c5b… noreply 43 bucket.append((distance, rp))
b363c5b… noreply 44
b363c5b… noreply 45 # 1. Direct check in start_dir
b363c5b… noreply 46 for name in _DB_FILENAMES + _JSON_FILENAMES:
b363c5b… noreply 47 _record(start_dir / name, 0)
b363c5b… noreply 48
b363c5b… noreply 49 # 2. Common output subdirs
b363c5b… noreply 50 for subdir in _OUTPUT_SUBDIRS:
b363c5b… noreply 51 for name in _DB_FILENAMES + _JSON_FILENAMES:
b363c5b… noreply 52 _record(start_dir / subdir / name, 1)
b363c5b… noreply 53
b363c5b… noreply 54 # 3. Walk downward
b363c5b… noreply 55 def _walk_down(directory: Path, depth: int) -> None:
b363c5b… noreply 56 if depth > max_depth_down:
b363c5b… noreply 57 return
b363c5b… noreply 58 try:
b363c5b… noreply 59 for child in sorted(directory.iterdir()):
b363c5b… noreply 60 if child.is_file() and child.name in (_DB_FILENAMES + _JSON_FILENAMES):
b363c5b… noreply 61 _record(child, depth)
b363c5b… noreply 62 elif child.is_dir() and not child.name.startswith("."):
b363c5b… noreply 63 _walk_down(child, depth + 1)
b363c5b… noreply 64 except PermissionError:
b363c5b… noreply 65 pass
b363c5b… noreply 66
b363c5b… noreply 67 _walk_down(start_dir, 1)
b363c5b… noreply 68
b363c5b… noreply 69 # 4. Walk upward
b363c5b… noreply 70 if walk_up:
b363c5b… noreply 71 parent = start_dir.parent
b363c5b… noreply 72 distance = 1
b363c5b… noreply 73 while parent != parent.parent:
b363c5b… noreply 74 for name in _DB_FILENAMES + _JSON_FILENAMES:
b363c5b… noreply 75 _record(parent / name, distance)
b363c5b… noreply 76 for subdir in _OUTPUT_SUBDIRS:
b363c5b… noreply 77 for name in _DB_FILENAMES + _JSON_FILENAMES:
b363c5b… noreply 78 _record(parent / subdir / name, distance + 1)
b363c5b… noreply 79 parent = parent.parent
b363c5b… noreply 80 distance += 1
b363c5b… noreply 81
b363c5b… noreply 82 # Sort each group by distance, then combine db-first
b363c5b… noreply 83 found_db.sort(key=lambda x: x[0])
b363c5b… noreply 84 found_json.sort(key=lambda x: x[0])
b363c5b… noreply 85 return [p for _, p in found_db] + [p for _, p in found_json]
b363c5b… noreply 86
b363c5b… noreply 87
b363c5b… noreply 88 def find_nearest_graph(start_dir: Optional[Path] = None) -> Optional[Path]:
b363c5b… noreply 89 """Return the closest knowledge graph file, or None."""
b363c5b… noreply 90 graphs = find_knowledge_graphs(start_dir)
b363c5b… noreply 91 return graphs[0] if graphs else None
b363c5b… noreply 92
b363c5b… noreply 93
b363c5b… noreply 94 def describe_graph(db_path: Path) -> Dict:
b363c5b… noreply 95 """Return summary stats for a knowledge graph file.
b363c5b… noreply 96
b363c5b… noreply 97 Returns dict with: entity_count, relationship_count, entity_types, store_type.
b363c5b… noreply 98 """
b363c5b… noreply 99 from video_processor.integrators.graph_store import (
b363c5b… noreply 100 InMemoryStore,
0981a08… noreply 101 SQLiteStore,
b363c5b… noreply 102 create_store,
b363c5b… noreply 103 )
b363c5b… noreply 104
b363c5b… noreply 105 db_path = Path(db_path)
b363c5b… noreply 106
b363c5b… noreply 107 if db_path.suffix == ".json":
b363c5b… noreply 108 import json
b363c5b… noreply 109
b363c5b… noreply 110 data = json.loads(db_path.read_text())
b363c5b… noreply 111 store = InMemoryStore()
b363c5b… noreply 112 for node in data.get("nodes", []):
b363c5b… noreply 113 store.merge_entity(
b363c5b… noreply 114 node.get("name", ""),
b363c5b… noreply 115 node.get("type", "concept"),
b363c5b… noreply 116 node.get("descriptions", []),
b363c5b… noreply 117 )
b363c5b… noreply 118 for rel in data.get("relationships", []):
b363c5b… noreply 119 store.add_relationship(
b363c5b… noreply 120 rel.get("source", ""),
b363c5b… noreply 121 rel.get("target", ""),
b363c5b… noreply 122 rel.get("type", "related_to"),
b363c5b… noreply 123 )
b363c5b… noreply 124 store_type = "json"
b363c5b… noreply 125 else:
b363c5b… noreply 126 store = create_store(db_path)
0981a08… noreply 127 store_type = "sqlite" if isinstance(store, SQLiteStore) else "inmemory"
b363c5b… noreply 128
b363c5b… noreply 129 entities = store.get_all_entities()
b363c5b… noreply 130 entity_types = {}
b363c5b… noreply 131 for e in entities:
b363c5b… noreply 132 t = e.get("type", "concept")
b363c5b… noreply 133 entity_types[t] = entity_types.get(t, 0) + 1
b363c5b… noreply 134
b363c5b… noreply 135 return {
b363c5b… noreply 136 "entity_count": store.get_entity_count(),
b363c5b… noreply 137 "relationship_count": store.get_relationship_count(),
b363c5b… noreply 138 "entity_types": entity_types,
b363c5b… noreply 139 "store_type": store_type,
b363c5b… noreply 140 }

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button