PlanOpticon
| 321f2f5… | leo | 1 | """Robust JSON extraction from LLM responses.""" |
| 321f2f5… | leo | 2 | |
| 321f2f5… | leo | 3 | import json |
| 321f2f5… | leo | 4 | import re |
| 321f2f5… | leo | 5 | from typing import Optional, Union |
| 321f2f5… | leo | 6 | |
| 321f2f5… | leo | 7 | |
| 321f2f5… | leo | 8 | def parse_json_from_response(text: str) -> Optional[Union[list, dict]]: |
| 321f2f5… | leo | 9 | """ |
| 321f2f5… | leo | 10 | Extract JSON from an LLM response, handling markdown fences, |
| 321f2f5… | leo | 11 | explanatory text, and minor formatting issues. |
| 321f2f5… | leo | 12 | |
| 321f2f5… | leo | 13 | Strategies tried in order: |
| 321f2f5… | leo | 14 | 1. Direct parse |
| 321f2f5… | leo | 15 | 2. Strip markdown fences and parse |
| 321f2f5… | leo | 16 | 3. Find [...] or {...} substring and parse |
| 321f2f5… | leo | 17 | 4. Return None |
| 321f2f5… | leo | 18 | """ |
| 321f2f5… | leo | 19 | if not text or not text.strip(): |
| 321f2f5… | leo | 20 | return None |
| 321f2f5… | leo | 21 | |
| 321f2f5… | leo | 22 | cleaned = text.strip() |
| 321f2f5… | leo | 23 | |
| 321f2f5… | leo | 24 | # Strategy 1: direct parse |
| 321f2f5… | leo | 25 | try: |
| 321f2f5… | leo | 26 | return json.loads(cleaned) |
| 321f2f5… | leo | 27 | except json.JSONDecodeError: |
| 321f2f5… | leo | 28 | pass |
| 321f2f5… | leo | 29 | |
| 321f2f5… | leo | 30 | # Strategy 2: strip markdown fences |
| 321f2f5… | leo | 31 | fence_pattern = re.compile(r"```(?:json)?\s*\n?(.*?)\n?\s*```", re.DOTALL) |
| 321f2f5… | leo | 32 | match = fence_pattern.search(cleaned) |
| 321f2f5… | leo | 33 | if match: |
| 321f2f5… | leo | 34 | try: |
| 321f2f5… | leo | 35 | return json.loads(match.group(1).strip()) |
| 321f2f5… | leo | 36 | except json.JSONDecodeError: |
| 321f2f5… | leo | 37 | pass |
| 321f2f5… | leo | 38 | |
| 321f2f5… | leo | 39 | # Strategy 3: find JSON array or object |
| 321f2f5… | leo | 40 | # Try array first (often the outermost structure for lists) |
| 321f2f5… | leo | 41 | for opener, closer in [("[", "]"), ("{", "}")]: |
| 321f2f5… | leo | 42 | start = cleaned.find(opener) |
| 321f2f5… | leo | 43 | if start < 0: |
| 321f2f5… | leo | 44 | continue |
| 321f2f5… | leo | 45 | # Find matching closer (handle nesting) |
| 321f2f5… | leo | 46 | depth = 0 |
| 321f2f5… | leo | 47 | for i in range(start, len(cleaned)): |
| 321f2f5… | leo | 48 | if cleaned[i] == opener: |
| 321f2f5… | leo | 49 | depth += 1 |
| 321f2f5… | leo | 50 | elif cleaned[i] == closer: |
| 321f2f5… | leo | 51 | depth -= 1 |
| 321f2f5… | leo | 52 | if depth == 0: |
| 321f2f5… | leo | 53 | try: |
| 321f2f5… | leo | 54 | return json.loads(cleaned[start : i + 1]) |
| 321f2f5… | leo | 55 | except json.JSONDecodeError: |
| 321f2f5… | leo | 56 | break |
| 321f2f5… | leo | 57 | |
| 321f2f5… | leo | 58 | return None |