PlanOpticon

Source Blame History 193 lines
0981a08… noreply 1 """Taxonomy classifier for planning entity extraction.
0981a08… noreply 2
0981a08… noreply 3 Bridges raw knowledge graph entities (person, technology, concept) into
0981a08… noreply 4 planning-ready structures (goals, requirements, decisions, risks).
0981a08… noreply 5 """
0981a08… noreply 6
0981a08… noreply 7 import logging
0981a08… noreply 8 from typing import Any, Dict, List, Optional
0981a08… noreply 9
0981a08… noreply 10 from video_processor.models import PlanningEntity, PlanningEntityType
0981a08… noreply 11
0981a08… noreply 12 logger = logging.getLogger(__name__)
0981a08… noreply 13
0981a08… noreply 14 # Keyword rules for heuristic classification. Each tuple is
0981a08… noreply 15 # (PlanningEntityType, list-of-keywords). Order matters — first match wins.
0981a08… noreply 16 _KEYWORD_RULES: List[tuple] = [
0981a08… noreply 17 (PlanningEntityType.GOAL, ["goal", "objective", "aim", "target outcome"]),
0981a08… noreply 18 (
0981a08… noreply 19 PlanningEntityType.REQUIREMENT,
0981a08… noreply 20 ["must", "should", "requirement", "need", "required"],
0981a08… noreply 21 ),
0981a08… noreply 22 (
0981a08… noreply 23 PlanningEntityType.CONSTRAINT,
0981a08… noreply 24 ["constraint", "limitation", "restrict", "cannot", "must not"],
0981a08… noreply 25 ),
0981a08… noreply 26 (
0981a08… noreply 27 PlanningEntityType.DECISION,
0981a08… noreply 28 ["decided", "decision", "chose", "selected", "agreed"],
0981a08… noreply 29 ),
0981a08… noreply 30 (PlanningEntityType.RISK, ["risk", "concern", "worry", "danger", "threat"]),
0981a08… noreply 31 (
0981a08… noreply 32 PlanningEntityType.ASSUMPTION,
0981a08… noreply 33 ["assume", "assumption", "expecting", "presume"],
0981a08… noreply 34 ),
0981a08… noreply 35 (
0981a08… noreply 36 PlanningEntityType.DEPENDENCY,
0981a08… noreply 37 ["depends", "dependency", "relies on", "prerequisite", "blocked"],
0981a08… noreply 38 ),
0981a08… noreply 39 (
0981a08… noreply 40 PlanningEntityType.MILESTONE,
0981a08… noreply 41 ["milestone", "deadline", "deliverable", "release", "launch"],
0981a08… noreply 42 ),
0981a08… noreply 43 (
0981a08… noreply 44 PlanningEntityType.TASK,
0981a08… noreply 45 ["task", "todo", "action item", "work item", "implement"],
0981a08… noreply 46 ),
0981a08… noreply 47 (PlanningEntityType.FEATURE, ["feature", "capability", "functionality"]),
0981a08… noreply 48 ]
0981a08… noreply 49
0981a08… noreply 50
0981a08… noreply 51 class TaxonomyClassifier:
0981a08… noreply 52 """Classifies raw knowledge graph entities into planning taxonomy types."""
0981a08… noreply 53
0981a08… noreply 54 def __init__(self, provider_manager: Optional[Any] = None):
0981a08… noreply 55 self.pm = provider_manager
0981a08… noreply 56
0981a08… noreply 57 # ------------------------------------------------------------------
0981a08… noreply 58 # Public API
0981a08… noreply 59 # ------------------------------------------------------------------
0981a08… noreply 60
0981a08… noreply 61 def classify_entities(
0981a08… noreply 62 self,
0981a08… noreply 63 entities: List[Dict],
0981a08… noreply 64 relationships: List[Dict],
0981a08… noreply 65 ) -> List[PlanningEntity]:
0981a08… noreply 66 """Classify extracted entities into planning entity types.
0981a08… noreply 67
0981a08… noreply 68 Uses heuristic classification first, then LLM refinement if a
0981a08… noreply 69 provider manager is available.
0981a08… noreply 70 """
0981a08… noreply 71 planning_entities: List[PlanningEntity] = []
0981a08… noreply 72
0981a08… noreply 73 # Step 1: heuristic classification
0981a08… noreply 74 for entity in entities:
0981a08… noreply 75 planning_type = self._heuristic_classify(entity, relationships)
0981a08… noreply 76 if planning_type:
0981a08… noreply 77 descs = entity.get("descriptions", [])
0981a08… noreply 78 planning_entities.append(
0981a08… noreply 79 PlanningEntity(
0981a08… noreply 80 name=entity["name"],
0981a08… noreply 81 planning_type=planning_type,
0981a08… noreply 82 description="; ".join(descs[:2]),
0981a08… noreply 83 source_entities=[entity["name"]],
0981a08… noreply 84 )
0981a08… noreply 85 )
0981a08… noreply 86
0981a08… noreply 87 # Step 2: LLM refinement (if provider available)
0981a08… noreply 88 if self.pm and entities:
0981a08… noreply 89 llm_classified = self._llm_classify(entities, relationships)
0981a08… noreply 90 planning_entities = self._merge_classifications(planning_entities, llm_classified)
0981a08… noreply 91
0981a08… noreply 92 return planning_entities
0981a08… noreply 93
0981a08… noreply 94 def organize_by_workstream(
0981a08… noreply 95 self, planning_entities: List[PlanningEntity]
0981a08… noreply 96 ) -> Dict[str, List[PlanningEntity]]:
0981a08… noreply 97 """Group planning entities into logical workstreams by type."""
0981a08… noreply 98 workstreams: Dict[str, List[PlanningEntity]] = {}
0981a08… noreply 99 for pe in planning_entities:
0981a08… noreply 100 group = pe.planning_type.value + "s"
0981a08… noreply 101 workstreams.setdefault(group, []).append(pe)
0981a08… noreply 102 return workstreams
0981a08… noreply 103
0981a08… noreply 104 # ------------------------------------------------------------------
0981a08… noreply 105 # Heuristic classification
0981a08… noreply 106 # ------------------------------------------------------------------
0981a08… noreply 107
0981a08… noreply 108 def _heuristic_classify(
0981a08… noreply 109 self,
0981a08… noreply 110 entity: Dict,
0981a08… noreply 111 relationships: List[Dict], # noqa: ARG002 — reserved for future rules
0981a08… noreply 112 ) -> Optional[PlanningEntityType]:
0981a08… noreply 113 """Rule-based classification from entity type and description keywords."""
0981a08… noreply 114 desc_lower = " ".join(entity.get("descriptions", [])).lower()
0981a08… noreply 115
0981a08… noreply 116 for planning_type, keywords in _KEYWORD_RULES:
0981a08… noreply 117 if any(kw in desc_lower for kw in keywords):
0981a08… noreply 118 return planning_type
0981a08… noreply 119
0981a08… noreply 120 return None
0981a08… noreply 121
0981a08… noreply 122 # ------------------------------------------------------------------
0981a08… noreply 123 # LLM classification
0981a08… noreply 124 # ------------------------------------------------------------------
0981a08… noreply 125
0981a08… noreply 126 def _llm_classify(
0981a08… noreply 127 self, entities: List[Dict], relationships: List[Dict]
0981a08… noreply 128 ) -> List[PlanningEntity]:
0981a08… noreply 129 """Use LLM to classify entities into planning types."""
0981a08… noreply 130 entity_summaries = []
0981a08… noreply 131 for e in entities[:50]: # limit to avoid token overflow
0981a08… noreply 132 descs = e.get("descriptions", [])
0981a08… noreply 133 desc_str = "; ".join(descs[:2]) if descs else "no description"
0981a08… noreply 134 entity_summaries.append(f"- {e['name']} ({e.get('type', 'concept')}): {desc_str}")
0981a08… noreply 135
0981a08… noreply 136 prompt = (
0981a08… noreply 137 "Classify these entities from a knowledge graph into planning categories.\n\n"
0981a08… noreply 138 "Entities:\n" + "\n".join(entity_summaries) + "\n\n"
0981a08… noreply 139 "Categories: goal, requirement, constraint, decision, risk, assumption, "
0981a08… noreply 140 "dependency, milestone, task, feature\n\n"
0981a08… noreply 141 "For each entity that fits a planning category, return JSON:\n"
0981a08… noreply 142 '[{"name": "...", "planning_type": "...", "priority": "high|medium|low"}]\n\n'
0981a08… noreply 143 "Only include entities that clearly fit a planning category. "
0981a08… noreply 144 "Skip entities that are just people, technologies, or general concepts. "
0981a08… noreply 145 "Return ONLY the JSON array."
0981a08… noreply 146 )
0981a08… noreply 147
0981a08… noreply 148 try:
0981a08… noreply 149 raw = self.pm.chat(
0981a08… noreply 150 [{"role": "user", "content": prompt}],
0981a08… noreply 151 max_tokens=2048,
0981a08… noreply 152 temperature=0.2,
0981a08… noreply 153 )
0981a08… noreply 154 except Exception:
0981a08… noreply 155 logger.warning("LLM classification failed, using heuristic only")
0981a08… noreply 156 return []
0981a08… noreply 157
0981a08… noreply 158 from video_processor.utils.json_parsing import parse_json_from_response
0981a08… noreply 159
0981a08… noreply 160 parsed = parse_json_from_response(raw)
0981a08… noreply 161
0981a08… noreply 162 results: List[PlanningEntity] = []
0981a08… noreply 163 if isinstance(parsed, list):
0981a08… noreply 164 for item in parsed:
0981a08… noreply 165 if isinstance(item, dict) and "name" in item and "planning_type" in item:
0981a08… noreply 166 try:
0981a08… noreply 167 ptype = PlanningEntityType(item["planning_type"])
0981a08… noreply 168 results.append(
0981a08… noreply 169 PlanningEntity(
0981a08… noreply 170 name=item["name"],
0981a08… noreply 171 planning_type=ptype,
0981a08… noreply 172 priority=item.get("priority"),
0981a08… noreply 173 source_entities=[item["name"]],
0981a08… noreply 174 )
0981a08… noreply 175 )
0981a08… noreply 176 except ValueError:
0981a08… noreply 177 pass
0981a08… noreply 178 return results
0981a08… noreply 179
0981a08… noreply 180 # ------------------------------------------------------------------
0981a08… noreply 181 # Merge
0981a08… noreply 182 # ------------------------------------------------------------------
0981a08… noreply 183
0981a08… noreply 184 @staticmethod
0981a08… noreply 185 def _merge_classifications(
0981a08… noreply 186 heuristic: List[PlanningEntity],
0981a08… noreply 187 llm: List[PlanningEntity],
0981a08… noreply 188 ) -> List[PlanningEntity]:
0981a08… noreply 189 """Merge heuristic and LLM classifications. LLM wins on conflicts."""
0981a08… noreply 190 by_name = {pe.name.lower(): pe for pe in heuristic}
0981a08… noreply 191 for pe in llm:
0981a08… noreply 192 by_name[pe.name.lower()] = pe # LLM overrides
0981a08… noreply 193 return list(by_name.values())

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button