PlanOpticon

planopticon / video_processor / utils / json_parsing.py
Blame History Raw 59 lines
1
"""Robust JSON extraction from LLM responses."""
2
3
import json
4
import re
5
from typing import Optional, Union
6
7
8
def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:
9
"""
10
Extract JSON from an LLM response, handling markdown fences,
11
explanatory text, and minor formatting issues.
12
13
Strategies tried in order:
14
1. Direct parse
15
2. Strip markdown fences and parse
16
3. Find [...] or {...} substring and parse
17
4. Return None
18
"""
19
if not text or not text.strip():
20
return None
21
22
cleaned = text.strip()
23
24
# Strategy 1: direct parse
25
try:
26
return json.loads(cleaned)
27
except json.JSONDecodeError:
28
pass
29
30
# Strategy 2: strip markdown fences
31
fence_pattern = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL)
32
match = fence_pattern.search(cleaned)
33
if match:
34
try:
35
return json.loads(match.group(1).strip())
36
except json.JSONDecodeError:
37
pass
38
39
# Strategy 3: find JSON array or object
40
# Try array first (often the outermost structure for lists)
41
for opener, closer in [("[", "]"), ("{", "}")]:
42
start = cleaned.find(opener)
43
if start < 0:
44
continue
45
# Find matching closer (handle nesting)
46
depth = 0
47
for i in range(start, len(cleaned)):
48
if cleaned[i] == opener:
49
depth += 1
50
elif cleaned[i] == closer:
51
depth -= 1
52
if depth == 0:
53
try:
54
return json.loads(cleaned[start : i + 1])
55
except json.JSONDecodeError:
56
break
57
58
return None
59

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button