PlanOpticon

planopticon / video_processor / integrators / taxonomy.py

Source Blame History 193 lines

0981a08…	noreply	1	"""Taxonomy classifier for planning entity extraction.
0981a08…	noreply	2
0981a08…	noreply	3	Bridges raw knowledge graph entities (person, technology, concept) into
0981a08…	noreply	4	planning-ready structures (goals, requirements, decisions, risks).
0981a08…	noreply	5	"""
0981a08…	noreply	6
0981a08…	noreply	7	import logging
0981a08…	noreply	8	from typing import Any, Dict, List, Optional
0981a08…	noreply	9
0981a08…	noreply	10	from video_processor.models import PlanningEntity, PlanningEntityType
0981a08…	noreply	11
0981a08…	noreply	12	logger = logging.getLogger(__name__)
0981a08…	noreply	13
0981a08…	noreply	14	# Keyword rules for heuristic classification. Each tuple is
0981a08…	noreply	15	# (PlanningEntityType, list-of-keywords). Order matters — first match wins.
0981a08…	noreply	16	_KEYWORD_RULES: List[tuple] = [
0981a08…	noreply	17	(PlanningEntityType.GOAL, ["goal", "objective", "aim", "target outcome"]),
0981a08…	noreply	18	(
0981a08…	noreply	19	PlanningEntityType.REQUIREMENT,
0981a08…	noreply	20	["must", "should", "requirement", "need", "required"],
0981a08…	noreply	21	),
0981a08…	noreply	22	(
0981a08…	noreply	23	PlanningEntityType.CONSTRAINT,
0981a08…	noreply	24	["constraint", "limitation", "restrict", "cannot", "must not"],
0981a08…	noreply	25	),
0981a08…	noreply	26	(
0981a08…	noreply	27	PlanningEntityType.DECISION,
0981a08…	noreply	28	["decided", "decision", "chose", "selected", "agreed"],
0981a08…	noreply	29	),
0981a08…	noreply	30	(PlanningEntityType.RISK, ["risk", "concern", "worry", "danger", "threat"]),
0981a08…	noreply	31	(
0981a08…	noreply	32	PlanningEntityType.ASSUMPTION,
0981a08…	noreply	33	["assume", "assumption", "expecting", "presume"],
0981a08…	noreply	34	),
0981a08…	noreply	35	(
0981a08…	noreply	36	PlanningEntityType.DEPENDENCY,
0981a08…	noreply	37	["depends", "dependency", "relies on", "prerequisite", "blocked"],
0981a08…	noreply	38	),
0981a08…	noreply	39	(
0981a08…	noreply	40	PlanningEntityType.MILESTONE,
0981a08…	noreply	41	["milestone", "deadline", "deliverable", "release", "launch"],
0981a08…	noreply	42	),
0981a08…	noreply	43	(
0981a08…	noreply	44	PlanningEntityType.TASK,
0981a08…	noreply	45	["task", "todo", "action item", "work item", "implement"],
0981a08…	noreply	46	),
0981a08…	noreply	47	(PlanningEntityType.FEATURE, ["feature", "capability", "functionality"]),
0981a08…	noreply	48	]
0981a08…	noreply	49
0981a08…	noreply	50
0981a08…	noreply	51	class TaxonomyClassifier:
0981a08…	noreply	52	"""Classifies raw knowledge graph entities into planning taxonomy types."""
0981a08…	noreply	53
0981a08…	noreply	54	def __init__(self, provider_manager: Optional[Any] = None):
0981a08…	noreply	55	self.pm = provider_manager
0981a08…	noreply	56
0981a08…	noreply	57	# ------------------------------------------------------------------
0981a08…	noreply	58	# Public API
0981a08…	noreply	59	# ------------------------------------------------------------------
0981a08…	noreply	60
0981a08…	noreply	61	def classify_entities(
0981a08…	noreply	62	self,
0981a08…	noreply	63	entities: List[Dict],
0981a08…	noreply	64	relationships: List[Dict],
0981a08…	noreply	65	) -> List[PlanningEntity]:
0981a08…	noreply	66	"""Classify extracted entities into planning entity types.
0981a08…	noreply	67
0981a08…	noreply	68	Uses heuristic classification first, then LLM refinement if a
0981a08…	noreply	69	provider manager is available.
0981a08…	noreply	70	"""
0981a08…	noreply	71	planning_entities: List[PlanningEntity] = []
0981a08…	noreply	72
0981a08…	noreply	73	# Step 1: heuristic classification
0981a08…	noreply	74	for entity in entities:
0981a08…	noreply	75	planning_type = self._heuristic_classify(entity, relationships)
0981a08…	noreply	76	if planning_type:
0981a08…	noreply	77	descs = entity.get("descriptions", [])
0981a08…	noreply	78	planning_entities.append(
0981a08…	noreply	79	PlanningEntity(
0981a08…	noreply	80	name=entity["name"],
0981a08…	noreply	81	planning_type=planning_type,
0981a08…	noreply	82	description="; ".join(descs[:2]),
0981a08…	noreply	83	source_entities=[entity["name"]],
0981a08…	noreply	84	)
0981a08…	noreply	85	)
0981a08…	noreply	86
0981a08…	noreply	87	# Step 2: LLM refinement (if provider available)
0981a08…	noreply	88	if self.pm and entities:
0981a08…	noreply	89	llm_classified = self._llm_classify(entities, relationships)
0981a08…	noreply	90	planning_entities = self._merge_classifications(planning_entities, llm_classified)
0981a08…	noreply	91
0981a08…	noreply	92	return planning_entities
0981a08…	noreply	93
0981a08…	noreply	94	def organize_by_workstream(
0981a08…	noreply	95	self, planning_entities: List[PlanningEntity]
0981a08…	noreply	96	) -> Dict[str, List[PlanningEntity]]:
0981a08…	noreply	97	"""Group planning entities into logical workstreams by type."""
0981a08…	noreply	98	workstreams: Dict[str, List[PlanningEntity]] = {}
0981a08…	noreply	99	for pe in planning_entities:
0981a08…	noreply	100	group = pe.planning_type.value + "s"
0981a08…	noreply	101	workstreams.setdefault(group, []).append(pe)
0981a08…	noreply	102	return workstreams
0981a08…	noreply	103
0981a08…	noreply	104	# ------------------------------------------------------------------
0981a08…	noreply	105	# Heuristic classification
0981a08…	noreply	106	# ------------------------------------------------------------------
0981a08…	noreply	107
0981a08…	noreply	108	def _heuristic_classify(
0981a08…	noreply	109	self,
0981a08…	noreply	110	entity: Dict,
0981a08…	noreply	111	relationships: List[Dict], # noqa: ARG002 — reserved for future rules
0981a08…	noreply	112	) -> Optional[PlanningEntityType]:
0981a08…	noreply	113	"""Rule-based classification from entity type and description keywords."""
0981a08…	noreply	114	desc_lower = " ".join(entity.get("descriptions", [])).lower()
0981a08…	noreply	115
0981a08…	noreply	116	for planning_type, keywords in _KEYWORD_RULES:
0981a08…	noreply	117	if any(kw in desc_lower for kw in keywords):
0981a08…	noreply	118	return planning_type
0981a08…	noreply	119
0981a08…	noreply	120	return None
0981a08…	noreply	121
0981a08…	noreply	122	# ------------------------------------------------------------------
0981a08…	noreply	123	# LLM classification
0981a08…	noreply	124	# ------------------------------------------------------------------
0981a08…	noreply	125
0981a08…	noreply	126	def _llm_classify(
0981a08…	noreply	127	self, entities: List[Dict], relationships: List[Dict]
0981a08…	noreply	128	) -> List[PlanningEntity]:
0981a08…	noreply	129	"""Use LLM to classify entities into planning types."""
0981a08…	noreply	130	entity_summaries = []
0981a08…	noreply	131	for e in entities[:50]: # limit to avoid token overflow
0981a08…	noreply	132	descs = e.get("descriptions", [])
0981a08…	noreply	133	desc_str = "; ".join(descs[:2]) if descs else "no description"
0981a08…	noreply	134	entity_summaries.append(f"- {e['name']} ({e.get('type', 'concept')}): {desc_str}")
0981a08…	noreply	135
0981a08…	noreply	136	prompt = (
0981a08…	noreply	137	"Classify these entities from a knowledge graph into planning categories.\n\n"
0981a08…	noreply	138	"Entities:\n" + "\n".join(entity_summaries) + "\n\n"
0981a08…	noreply	139	"Categories: goal, requirement, constraint, decision, risk, assumption, "
0981a08…	noreply	140	"dependency, milestone, task, feature\n\n"
0981a08…	noreply	141	"For each entity that fits a planning category, return JSON:\n"
0981a08…	noreply	142	'[{"name": "...", "planning_type": "...", "priority": "high\|medium\|low"}]\n\n'
0981a08…	noreply	143	"Only include entities that clearly fit a planning category. "
0981a08…	noreply	144	"Skip entities that are just people, technologies, or general concepts. "
0981a08…	noreply	145	"Return ONLY the JSON array."
0981a08…	noreply	146	)
0981a08…	noreply	147
0981a08…	noreply	148	try:
0981a08…	noreply	149	raw = self.pm.chat(
0981a08…	noreply	150	[{"role": "user", "content": prompt}],
0981a08…	noreply	151	max_tokens=2048,
0981a08…	noreply	152	temperature=0.2,
0981a08…	noreply	153	)
0981a08…	noreply	154	except Exception:
0981a08…	noreply	155	logger.warning("LLM classification failed, using heuristic only")
0981a08…	noreply	156	return []
0981a08…	noreply	157
0981a08…	noreply	158	from video_processor.utils.json_parsing import parse_json_from_response
0981a08…	noreply	159
0981a08…	noreply	160	parsed = parse_json_from_response(raw)
0981a08…	noreply	161
0981a08…	noreply	162	results: List[PlanningEntity] = []
0981a08…	noreply	163	if isinstance(parsed, list):
0981a08…	noreply	164	for item in parsed:
0981a08…	noreply	165	if isinstance(item, dict) and "name" in item and "planning_type" in item:
0981a08…	noreply	166	try:
0981a08…	noreply	167	ptype = PlanningEntityType(item["planning_type"])
0981a08…	noreply	168	results.append(
0981a08…	noreply	169	PlanningEntity(
0981a08…	noreply	170	name=item["name"],
0981a08…	noreply	171	planning_type=ptype,
0981a08…	noreply	172	priority=item.get("priority"),
0981a08…	noreply	173	source_entities=[item["name"]],
0981a08…	noreply	174	)
0981a08…	noreply	175	)
0981a08…	noreply	176	except ValueError:
0981a08…	noreply	177	pass
0981a08…	noreply	178	return results
0981a08…	noreply	179
0981a08…	noreply	180	# ------------------------------------------------------------------
0981a08…	noreply	181	# Merge
0981a08…	noreply	182	# ------------------------------------------------------------------
0981a08…	noreply	183
0981a08…	noreply	184	@staticmethod
0981a08…	noreply	185	def _merge_classifications(
0981a08…	noreply	186	heuristic: List[PlanningEntity],
0981a08…	noreply	187	llm: List[PlanningEntity],
0981a08…	noreply	188	) -> List[PlanningEntity]:
0981a08…	noreply	189	"""Merge heuristic and LLM classifications. LLM wins on conflicts."""
0981a08…	noreply	190	by_name = {pe.name.lower(): pe for pe in heuristic}
0981a08…	noreply	191	for pe in llm:
0981a08…	noreply	192	by_name[pe.name.lower()] = pe # LLM overrides
0981a08…	noreply	193	return list(by_name.values())

PlanOpticon

Keyboard Shortcuts