PlanOpticon

planopticon / video_processor / sources / hackernews_source.py
Source Blame History 112 lines
0981a08… noreply 1 """Hacker News source connector using the official Firebase API."""
0981a08… noreply 2
0981a08… noreply 3 import logging
0981a08… noreply 4 from pathlib import Path
0981a08… noreply 5 from typing import List, Optional
0981a08… noreply 6
0981a08… noreply 7 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 8
0981a08… noreply 9 logger = logging.getLogger(__name__)
0981a08… noreply 10
0981a08… noreply 11 HN_API = "https://hacker-news.firebaseio.com/v0"
0981a08… noreply 12
0981a08… noreply 13
0981a08… noreply 14 class HackerNewsSource(BaseSource):
0981a08… noreply 15 """
0981a08… noreply 16 Fetch Hacker News stories and comments via the public API.
0981a08… noreply 17
0981a08… noreply 18 API docs: https://github.com/HackerNews/API
0981a08… noreply 19 Requires: pip install requests
0981a08… noreply 20 """
0981a08… noreply 21
0981a08… noreply 22 def __init__(self, item_id: int, max_comments: int = 200):
0981a08… noreply 23 """
0981a08… noreply 24 Parameters
0981a08… noreply 25 ----------
0981a08… noreply 26 item_id : int
0981a08… noreply 27 HN story/item ID (e.g., 12345678).
0981a08… noreply 28 max_comments : int
0981a08… noreply 29 Maximum number of comments to fetch (default 200).
0981a08… noreply 30 """
0981a08… noreply 31 self.item_id = item_id
0981a08… noreply 32 self.max_comments = max_comments
0981a08… noreply 33
0981a08… noreply 34 def authenticate(self) -> bool:
0981a08… noreply 35 """No auth needed for the HN API."""
0981a08… noreply 36 return True
0981a08… noreply 37
0981a08… noreply 38 def list_videos(
0981a08… noreply 39 self,
0981a08… noreply 40 folder_id: Optional[str] = None,
0981a08… noreply 41 folder_path: Optional[str] = None,
0981a08… noreply 42 patterns: Optional[List[str]] = None,
0981a08… noreply 43 ) -> List[SourceFile]:
0981a08… noreply 44 """Return a single SourceFile for the HN story."""
0981a08… noreply 45 return [
0981a08… noreply 46 SourceFile(
0981a08… noreply 47 name=f"hn_{self.item_id}",
0981a08… noreply 48 id=str(self.item_id),
0981a08… noreply 49 mime_type="text/plain",
0981a08… noreply 50 )
0981a08… noreply 51 ]
0981a08… noreply 52
0981a08… noreply 53 def download(self, file: SourceFile, destination: Path) -> Path:
0981a08… noreply 54 """Download the story and comments as plain text."""
0981a08… noreply 55 destination = Path(destination)
0981a08… noreply 56 destination.parent.mkdir(parents=True, exist_ok=True)
0981a08… noreply 57 text = self.fetch_text()
0981a08… noreply 58 destination.write_text(text, encoding="utf-8")
0981a08… noreply 59 logger.info(f"Saved HN story {self.item_id} to {destination}")
0981a08… noreply 60 return destination
0981a08… noreply 61
0981a08… noreply 62 def _get_item(self, item_id: int) -> dict:
0981a08… noreply 63 import requests
0981a08… noreply 64
0981a08… noreply 65 resp = requests.get(f"{HN_API}/item/{item_id}.json", timeout=10)
0981a08… noreply 66 resp.raise_for_status()
0981a08… noreply 67 return resp.json() or {}
0981a08… noreply 68
0981a08… noreply 69 def fetch_text(self) -> str:
0981a08… noreply 70 """Fetch story and comments as structured text."""
0981a08… noreply 71 story = self._get_item(self.item_id)
0981a08… noreply 72 lines = []
0981a08… noreply 73 lines.append(f"# {story.get('title', 'Untitled')}")
0981a08… noreply 74 lines.append(f"by {story.get('by', 'unknown')} | {story.get('score', 0)} points")
0981a08… noreply 75 if story.get("url"):
0981a08… noreply 76 lines.append(f"URL: {story['url']}")
0981a08… noreply 77 if story.get("text"):
0981a08… noreply 78 lines.append(f"\n{story['text']}")
0981a08… noreply 79 lines.append("")
0981a08… noreply 80
0981a08… noreply 81 # Fetch comments
0981a08… noreply 82 kid_ids = story.get("kids", [])
0981a08… noreply 83 if kid_ids:
0981a08… noreply 84 lines.append("## Comments\n")
0981a08… noreply 85 count = [0]
0981a08… noreply 86 self._fetch_comments(kid_ids, lines, depth=0, count=count)
0981a08… noreply 87
0981a08… noreply 88 return "\n".join(lines)
0981a08… noreply 89
0981a08… noreply 90 def _fetch_comments(self, kid_ids: list, lines: list, depth: int, count: list) -> None:
0981a08… noreply 91 """Recursively fetch and format comments."""
0981a08… noreply 92 indent = " " * depth
0981a08… noreply 93 for kid_id in kid_ids:
0981a08… noreply 94 if count[0] >= self.max_comments:
0981a08… noreply 95 return
0981a08… noreply 96 try:
0981a08… noreply 97 item = self._get_item(kid_id)
0981a08… noreply 98 except Exception:
0981a08… noreply 99 continue
0981a08… noreply 100
0981a08… noreply 101 if item.get("deleted") or item.get("dead"):
0981a08… noreply 102 continue
0981a08… noreply 103
0981a08… noreply 104 count[0] += 1
0981a08… noreply 105 author = item.get("by", "[deleted]")
0981a08… noreply 106 text = item.get("text", "")
0981a08… noreply 107 lines.append(f"{indent}**{author}**:")
0981a08… noreply 108 lines.append(f"{indent}{text}")
0981a08… noreply 109 lines.append("")
0981a08… noreply 110
0981a08… noreply 111 if item.get("kids"):
0981a08… noreply 112 self._fetch_comments(item["kids"], lines, depth + 1, count)

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button