|
0981a08…
|
noreply
|
1 |
"""Graph visualization and analysis utilities using NetworkX.""" |
|
0981a08…
|
noreply
|
2 |
|
|
0981a08…
|
noreply
|
3 |
from typing import Dict, List, Optional |
|
0981a08…
|
noreply
|
4 |
|
|
0981a08…
|
noreply
|
5 |
try: |
|
0981a08…
|
noreply
|
6 |
import networkx as nx |
|
0981a08…
|
noreply
|
7 |
except ImportError: |
|
0981a08…
|
noreply
|
8 |
nx = None |
|
0981a08…
|
noreply
|
9 |
|
|
0981a08…
|
noreply
|
10 |
|
|
0981a08…
|
noreply
|
11 |
def _require_nx(): |
|
0981a08…
|
noreply
|
12 |
if nx is None: |
|
0981a08…
|
noreply
|
13 |
raise ImportError( |
|
0981a08…
|
noreply
|
14 |
"networkx is required for graph visualization. Install it with: pip install networkx" |
|
0981a08…
|
noreply
|
15 |
) |
|
0981a08…
|
noreply
|
16 |
|
|
0981a08…
|
noreply
|
17 |
|
|
0981a08…
|
noreply
|
18 |
def graph_to_networkx(kg_data: dict) -> "nx.DiGraph": |
|
0981a08…
|
noreply
|
19 |
"""Convert knowledge graph dict (from to_dict()) to NetworkX directed graph. |
|
0981a08…
|
noreply
|
20 |
|
|
0981a08…
|
noreply
|
21 |
Nodes get attributes: type, descriptions, source, occurrences |
|
0981a08…
|
noreply
|
22 |
Edges get attributes: type, content_source, timestamp |
|
0981a08…
|
noreply
|
23 |
""" |
|
0981a08…
|
noreply
|
24 |
_require_nx() |
|
0981a08…
|
noreply
|
25 |
G = nx.DiGraph() |
|
0981a08…
|
noreply
|
26 |
|
|
0981a08…
|
noreply
|
27 |
for node in kg_data.get("nodes", []): |
|
0981a08…
|
noreply
|
28 |
name = node.get("name", node.get("id", "")) |
|
0981a08…
|
noreply
|
29 |
if not name: |
|
0981a08…
|
noreply
|
30 |
continue |
|
0981a08…
|
noreply
|
31 |
G.add_node( |
|
0981a08…
|
noreply
|
32 |
name, |
|
0981a08…
|
noreply
|
33 |
type=node.get("type", "concept"), |
|
0981a08…
|
noreply
|
34 |
descriptions=node.get("descriptions", []), |
|
0981a08…
|
noreply
|
35 |
source=node.get("source"), |
|
0981a08…
|
noreply
|
36 |
occurrences=node.get("occurrences", []), |
|
0981a08…
|
noreply
|
37 |
) |
|
0981a08…
|
noreply
|
38 |
|
|
0981a08…
|
noreply
|
39 |
for rel in kg_data.get("relationships", []): |
|
0981a08…
|
noreply
|
40 |
src = rel.get("source", "") |
|
0981a08…
|
noreply
|
41 |
tgt = rel.get("target", "") |
|
0981a08…
|
noreply
|
42 |
if not src or not tgt: |
|
0981a08…
|
noreply
|
43 |
continue |
|
0981a08…
|
noreply
|
44 |
G.add_edge( |
|
0981a08…
|
noreply
|
45 |
src, |
|
0981a08…
|
noreply
|
46 |
tgt, |
|
0981a08…
|
noreply
|
47 |
type=rel.get("type", "related_to"), |
|
0981a08…
|
noreply
|
48 |
content_source=rel.get("content_source"), |
|
0981a08…
|
noreply
|
49 |
timestamp=rel.get("timestamp"), |
|
0981a08…
|
noreply
|
50 |
) |
|
0981a08…
|
noreply
|
51 |
|
|
0981a08…
|
noreply
|
52 |
return G |
|
0981a08…
|
noreply
|
53 |
|
|
0981a08…
|
noreply
|
54 |
|
|
0981a08…
|
noreply
|
55 |
def compute_graph_stats(G: "nx.DiGraph") -> dict: |
|
0981a08…
|
noreply
|
56 |
"""Return graph statistics. |
|
0981a08…
|
noreply
|
57 |
|
|
0981a08…
|
noreply
|
58 |
Keys: node_count, edge_count, density, connected_components, |
|
0981a08…
|
noreply
|
59 |
type_breakdown, top_entities (by degree, top 10). |
|
0981a08…
|
noreply
|
60 |
""" |
|
0981a08…
|
noreply
|
61 |
undirected = G.to_undirected() |
|
0981a08…
|
noreply
|
62 |
components = nx.number_connected_components(undirected) if len(G) > 0 else 0 |
|
0981a08…
|
noreply
|
63 |
|
|
0981a08…
|
noreply
|
64 |
type_breakdown: Dict[str, int] = {} |
|
0981a08…
|
noreply
|
65 |
for _, data in G.nodes(data=True): |
|
0981a08…
|
noreply
|
66 |
ntype = data.get("type", "concept") |
|
0981a08…
|
noreply
|
67 |
type_breakdown[ntype] = type_breakdown.get(ntype, 0) + 1 |
|
0981a08…
|
noreply
|
68 |
|
|
0981a08…
|
noreply
|
69 |
degree_list = sorted(G.degree(), key=lambda x: x[1], reverse=True) |
|
0981a08…
|
noreply
|
70 |
top_entities = [{"name": name, "degree": deg} for name, deg in degree_list[:10]] |
|
0981a08…
|
noreply
|
71 |
|
|
0981a08…
|
noreply
|
72 |
return { |
|
0981a08…
|
noreply
|
73 |
"node_count": G.number_of_nodes(), |
|
0981a08…
|
noreply
|
74 |
"edge_count": G.number_of_edges(), |
|
0981a08…
|
noreply
|
75 |
"density": nx.density(G), |
|
0981a08…
|
noreply
|
76 |
"connected_components": components, |
|
0981a08…
|
noreply
|
77 |
"type_breakdown": type_breakdown, |
|
0981a08…
|
noreply
|
78 |
"top_entities": top_entities, |
|
0981a08…
|
noreply
|
79 |
} |
|
0981a08…
|
noreply
|
80 |
|
|
0981a08…
|
noreply
|
81 |
|
|
0981a08…
|
noreply
|
82 |
def filter_graph( |
|
0981a08…
|
noreply
|
83 |
G: "nx.DiGraph", |
|
0981a08…
|
noreply
|
84 |
entity_types: Optional[List[str]] = None, |
|
0981a08…
|
noreply
|
85 |
min_degree: Optional[int] = None, |
|
0981a08…
|
noreply
|
86 |
) -> "nx.DiGraph": |
|
0981a08…
|
noreply
|
87 |
"""Return subgraph filtered by entity type list and/or minimum degree.""" |
|
0981a08…
|
noreply
|
88 |
nodes = set(G.nodes()) |
|
0981a08…
|
noreply
|
89 |
|
|
0981a08…
|
noreply
|
90 |
if entity_types is not None: |
|
0981a08…
|
noreply
|
91 |
types_set = set(entity_types) |
|
0981a08…
|
noreply
|
92 |
nodes = {n for n in nodes if G.nodes[n].get("type", "concept") in types_set} |
|
0981a08…
|
noreply
|
93 |
|
|
0981a08…
|
noreply
|
94 |
if min_degree is not None: |
|
0981a08…
|
noreply
|
95 |
nodes = {n for n in nodes if G.degree(n) >= min_degree} |
|
0981a08…
|
noreply
|
96 |
|
|
0981a08…
|
noreply
|
97 |
return G.subgraph(nodes).copy() |
|
0981a08…
|
noreply
|
98 |
|
|
0981a08…
|
noreply
|
99 |
|
|
0981a08…
|
noreply
|
100 |
def _sanitize_id(name: str) -> str: |
|
0981a08…
|
noreply
|
101 |
"""Create a Mermaid-safe identifier from a node name.""" |
|
0981a08…
|
noreply
|
102 |
return "".join(c if c.isalnum() or c == "_" else "_" for c in name) |
|
0981a08…
|
noreply
|
103 |
|
|
0981a08…
|
noreply
|
104 |
|
|
0981a08…
|
noreply
|
105 |
def generate_mermaid(G: "nx.DiGraph", max_nodes: int = 30, layout: str = "LR") -> str: |
|
0981a08…
|
noreply
|
106 |
"""Generate Mermaid diagram from NetworkX graph. |
|
0981a08…
|
noreply
|
107 |
|
|
0981a08…
|
noreply
|
108 |
Selects top nodes by degree. Layout can be LR, TD, etc. |
|
0981a08…
|
noreply
|
109 |
""" |
|
0981a08…
|
noreply
|
110 |
degree_sorted = sorted(G.degree(), key=lambda x: x[1], reverse=True) |
|
0981a08…
|
noreply
|
111 |
top_nodes = {name for name, _ in degree_sorted[:max_nodes]} |
|
0981a08…
|
noreply
|
112 |
|
|
0981a08…
|
noreply
|
113 |
lines = [f"graph {layout}"] |
|
0981a08…
|
noreply
|
114 |
|
|
0981a08…
|
noreply
|
115 |
for name in top_nodes: |
|
0981a08…
|
noreply
|
116 |
data = G.nodes[name] |
|
0981a08…
|
noreply
|
117 |
ntype = data.get("type", "concept") |
|
0981a08…
|
noreply
|
118 |
safe_id = _sanitize_id(name) |
|
0981a08…
|
noreply
|
119 |
safe_name = name.replace('"', "'") |
|
0981a08…
|
noreply
|
120 |
lines.append(f' {safe_id}["{safe_name}"]:::{ntype}') |
|
0981a08…
|
noreply
|
121 |
|
|
0981a08…
|
noreply
|
122 |
added = set() |
|
0981a08…
|
noreply
|
123 |
for src, tgt, data in G.edges(data=True): |
|
0981a08…
|
noreply
|
124 |
if src in top_nodes and tgt in top_nodes: |
|
0981a08…
|
noreply
|
125 |
rtype = data.get("type", "related_to") |
|
0981a08…
|
noreply
|
126 |
key = (src, tgt, rtype) |
|
0981a08…
|
noreply
|
127 |
if key not in added: |
|
0981a08…
|
noreply
|
128 |
lines.append(f' {_sanitize_id(src)} -- "{rtype}" --> {_sanitize_id(tgt)}') |
|
0981a08…
|
noreply
|
129 |
added.add(key) |
|
0981a08…
|
noreply
|
130 |
|
|
0981a08…
|
noreply
|
131 |
lines.append(" classDef person fill:#f9d5e5,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
132 |
lines.append(" classDef concept fill:#eeeeee,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
133 |
lines.append(" classDef technology fill:#d5e5f9,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
134 |
lines.append(" classDef organization fill:#f9f5d5,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
135 |
lines.append(" classDef diagram fill:#d5f9e5,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
136 |
lines.append(" classDef time fill:#e5d5f9,stroke:#333,stroke-width:1px") |
|
0981a08…
|
noreply
|
137 |
|
|
0981a08…
|
noreply
|
138 |
return "\n".join(lines) |
|
0981a08…
|
noreply
|
139 |
|
|
0981a08…
|
noreply
|
140 |
|
|
0981a08…
|
noreply
|
141 |
def graph_to_d3_json(G: "nx.DiGraph") -> dict: |
|
0981a08…
|
noreply
|
142 |
"""Export to D3-compatible format. |
|
0981a08…
|
noreply
|
143 |
|
|
0981a08…
|
noreply
|
144 |
Returns {"nodes": [{"id": ..., "group": ...}], "links": [...]}. |
|
0981a08…
|
noreply
|
145 |
""" |
|
0981a08…
|
noreply
|
146 |
nodes = [] |
|
0981a08…
|
noreply
|
147 |
for name, data in G.nodes(data=True): |
|
0981a08…
|
noreply
|
148 |
nodes.append( |
|
0981a08…
|
noreply
|
149 |
{ |
|
0981a08…
|
noreply
|
150 |
"id": name, |
|
0981a08…
|
noreply
|
151 |
"group": data.get("type", "concept"), |
|
0981a08…
|
noreply
|
152 |
"descriptions": data.get("descriptions", []), |
|
0981a08…
|
noreply
|
153 |
} |
|
0981a08…
|
noreply
|
154 |
) |
|
0981a08…
|
noreply
|
155 |
|
|
0981a08…
|
noreply
|
156 |
links = [] |
|
0981a08…
|
noreply
|
157 |
for src, tgt, data in G.edges(data=True): |
|
0981a08…
|
noreply
|
158 |
links.append( |
|
0981a08…
|
noreply
|
159 |
{ |
|
0981a08…
|
noreply
|
160 |
"source": src, |
|
0981a08…
|
noreply
|
161 |
"target": tgt, |
|
0981a08…
|
noreply
|
162 |
"type": data.get("type", "related_to"), |
|
0981a08…
|
noreply
|
163 |
} |
|
0981a08…
|
noreply
|
164 |
) |
|
0981a08…
|
noreply
|
165 |
|
|
0981a08…
|
noreply
|
166 |
return {"nodes": nodes, "links": links} |
|
0981a08…
|
noreply
|
167 |
|
|
0981a08…
|
noreply
|
168 |
|
|
0981a08…
|
noreply
|
169 |
def graph_to_dot(G: "nx.DiGraph") -> str: |
|
0981a08…
|
noreply
|
170 |
"""Export to Graphviz DOT format.""" |
|
0981a08…
|
noreply
|
171 |
lines = ["digraph KnowledgeGraph {"] |
|
0981a08…
|
noreply
|
172 |
lines.append(" rankdir=LR;") |
|
0981a08…
|
noreply
|
173 |
lines.append(' node [shape=box, style="rounded,filled", fontname="Helvetica"];') |
|
0981a08…
|
noreply
|
174 |
lines.append("") |
|
0981a08…
|
noreply
|
175 |
|
|
0981a08…
|
noreply
|
176 |
type_colors = { |
|
0981a08…
|
noreply
|
177 |
"person": "#f9d5e5", |
|
0981a08…
|
noreply
|
178 |
"concept": "#eeeeee", |
|
0981a08…
|
noreply
|
179 |
"technology": "#d5e5f9", |
|
0981a08…
|
noreply
|
180 |
"organization": "#f9f5d5", |
|
0981a08…
|
noreply
|
181 |
"diagram": "#d5f9e5", |
|
0981a08…
|
noreply
|
182 |
"time": "#e5d5f9", |
|
0981a08…
|
noreply
|
183 |
} |
|
0981a08…
|
noreply
|
184 |
|
|
0981a08…
|
noreply
|
185 |
for name, data in G.nodes(data=True): |
|
0981a08…
|
noreply
|
186 |
ntype = data.get("type", "concept") |
|
0981a08…
|
noreply
|
187 |
color = type_colors.get(ntype, "#eeeeee") |
|
0981a08…
|
noreply
|
188 |
escaped = name.replace('"', '\\"') |
|
0981a08…
|
noreply
|
189 |
lines.append(f' "{escaped}" [fillcolor="{color}", label="{escaped}"];') |
|
0981a08…
|
noreply
|
190 |
|
|
0981a08…
|
noreply
|
191 |
lines.append("") |
|
0981a08…
|
noreply
|
192 |
for src, tgt, data in G.edges(data=True): |
|
0981a08…
|
noreply
|
193 |
rtype = data.get("type", "related_to") |
|
0981a08…
|
noreply
|
194 |
escaped_src = src.replace('"', '\\"') |
|
0981a08…
|
noreply
|
195 |
escaped_tgt = tgt.replace('"', '\\"') |
|
0981a08…
|
noreply
|
196 |
escaped_type = rtype.replace('"', '\\"') |
|
0981a08…
|
noreply
|
197 |
lines.append(f' "{escaped_src}" -> "{escaped_tgt}" [label="{escaped_type}"];') |
|
0981a08…
|
noreply
|
198 |
|
|
0981a08…
|
noreply
|
199 |
lines.append("}") |
|
0981a08…
|
noreply
|
200 |
return "\n".join(lines) |