PlanOpticon

planopticon / video_processor / utils / json_parsing.py

Blame History Raw 59 lines

1	`"""Robust JSON extraction from LLM responses."""`
2
3	`import json`
4	`import re`
5	`from typing import Optional, Union`
6
7
8	`def parse_json_from_response(text: str) -> Optional[Union[list, dict]]:`
9	`"""`
10	`Extract JSON from an LLM response, handling markdown fences,`
11	`explanatory text, and minor formatting issues.`
12
13	`Strategies tried in order:`
14	`1. Direct parse`
15	`2. Strip markdown fences and parse`
16	`3. Find [...] or {...} substring and parse`
17	`4. Return None`
18	`"""`
19	`if not text or not text.strip():`
20	`return None`
21
22	`cleaned = text.strip()`
23
24	`# Strategy 1: direct parse`
25	`try:`
26	`return json.loads(cleaned)`
27	`except json.JSONDecodeError:`
28	`pass`
29
30	`# Strategy 2: strip markdown fences`
31	fence_pattern = re.compile(r"```(?:json)?\s\n?(.?)\n?\s*```", re.DOTALL)
32	`match = fence_pattern.search(cleaned)`
33	`if match:`
34	`try:`
35	`return json.loads(match.group(1).strip())`
36	`except json.JSONDecodeError:`
37	`pass`
38
39	`# Strategy 3: find JSON array or object`
40	`# Try array first (often the outermost structure for lists)`
41	`for opener, closer in [("[", "]"), ("{", "}")]:`
42	`start = cleaned.find(opener)`
43	`if start < 0:`
44	`continue`
45	`# Find matching closer (handle nesting)`
46	`depth = 0`
47	`for i in range(start, len(cleaned)):`
48	`if cleaned[i] == opener:`
49	`depth += 1`
50	`elif cleaned[i] == closer:`
51	`depth -= 1`
52	`if depth == 0:`
53	`try:`
54	`return json.loads(cleaned[start : i + 1])`
55	`except json.JSONDecodeError:`
56	`break`
57
58	`return None`
59