PlanOpticon

planopticon / video_processor / sources / rss_source.py
Source Blame History 114 lines
0981a08… noreply 1 """RSS/Atom feed source connector."""
0981a08… noreply 2
0981a08… noreply 3 import logging
0981a08… noreply 4 from pathlib import Path
0981a08… noreply 5 from typing import List, Optional
0981a08… noreply 6
0981a08… noreply 7 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 8
0981a08… noreply 9 logger = logging.getLogger(__name__)
0981a08… noreply 10
0981a08… noreply 11
0981a08… noreply 12 class RSSSource(BaseSource):
0981a08… noreply 13 """
0981a08… noreply 14 Parse RSS/Atom feeds and extract entries as text documents.
0981a08… noreply 15
0981a08… noreply 16 Optional: pip install feedparser (falls back to xml.etree.ElementTree)
0981a08… noreply 17 Requires: pip install requests
0981a08… noreply 18 """
0981a08… noreply 19
0981a08… noreply 20 def __init__(self, url: str, max_entries: int = 50):
0981a08… noreply 21 self.url = url
0981a08… noreply 22 self.max_entries = max_entries
0981a08… noreply 23 self._entries: List[dict] = []
0981a08… noreply 24
0981a08… noreply 25 def authenticate(self) -> bool:
0981a08… noreply 26 """No auth needed for public feeds."""
0981a08… noreply 27 return True
0981a08… noreply 28
0981a08… noreply 29 def _parse_feed(self) -> None:
0981a08… noreply 30 """Fetch and parse the feed."""
0981a08… noreply 31 if self._entries:
0981a08… noreply 32 return
0981a08… noreply 33
0981a08… noreply 34 import requests
0981a08… noreply 35
0981a08… noreply 36 resp = requests.get(self.url, timeout=15, headers={"User-Agent": "PlanOpticon/0.3"})
0981a08… noreply 37 resp.raise_for_status()
0981a08… noreply 38
0981a08… noreply 39 try:
0981a08… noreply 40 import feedparser
0981a08… noreply 41
0981a08… noreply 42 feed = feedparser.parse(resp.text)
0981a08… noreply 43 for entry in feed.entries[: self.max_entries]:
0981a08… noreply 44 self._entries.append(
0981a08… noreply 45 {
0981a08… noreply 46 "title": entry.get("title", "Untitled"),
0981a08… noreply 47 "link": entry.get("link", ""),
0981a08… noreply 48 "summary": entry.get("summary", ""),
0981a08… noreply 49 "published": entry.get("published", ""),
0981a08… noreply 50 "id": entry.get("id", entry.get("link", "")),
0981a08… noreply 51 }
0981a08… noreply 52 )
0981a08… noreply 53 except ImportError:
0981a08… noreply 54 logger.debug("feedparser not available, using xml.etree fallback")
0981a08… noreply 55 self._parse_xml(resp.text)
0981a08… noreply 56
0981a08… noreply 57 def _parse_xml(self, text: str) -> None:
0981a08… noreply 58 """Fallback parser using stdlib xml.etree."""
0981a08… noreply 59 import xml.etree.ElementTree as ET
0981a08… noreply 60
0981a08… noreply 61 root = ET.fromstring(text)
0981a08… noreply 62 # Handle RSS 2.0
0981a08… noreply 63 ns = {"atom": "http://www.w3.org/2005/Atom"}
0981a08… noreply 64 items = root.findall(".//item") or root.findall(".//atom:entry", ns)
0981a08… noreply 65 for item in items[: self.max_entries]:
0981a08… noreply 66 title = (
0981a08… noreply 67 item.findtext("title") or item.findtext("atom:title", namespaces=ns) or "Untitled"
0981a08… noreply 68 )
0981a08… noreply 69 link = item.findtext("link") or ""
0981a08… noreply 70 if not link:
0981a08… noreply 71 link_el = item.find("atom:link", ns)
0981a08… noreply 72 link = link_el.get("href", "") if link_el is not None else ""
0981a08… noreply 73 desc = (
0981a08… noreply 74 item.findtext("description") or item.findtext("atom:summary", namespaces=ns) or ""
0981a08… noreply 75 )
0981a08… noreply 76 pub = item.findtext("pubDate") or item.findtext("atom:published", namespaces=ns) or ""
0981a08… noreply 77 self._entries.append(
0981a08… noreply 78 {"title": title, "link": link, "summary": desc, "published": pub, "id": link}
0981a08… noreply 79 )
0981a08… noreply 80
0981a08… noreply 81 def list_videos(
0981a08… noreply 82 self,
0981a08… noreply 83 folder_id: Optional[str] = None,
0981a08… noreply 84 folder_path: Optional[str] = None,
0981a08… noreply 85 patterns: Optional[List[str]] = None,
0981a08… noreply 86 ) -> List[SourceFile]:
0981a08… noreply 87 """List feed entries as SourceFiles."""
0981a08… noreply 88 self._parse_feed()
0981a08… noreply 89 return [
0981a08… noreply 90 SourceFile(
0981a08… noreply 91 name=e["title"], id=e["id"], mime_type="text/plain", modified_at=e["published"]
0981a08… noreply 92 )
0981a08… noreply 93 for e in self._entries
0981a08… noreply 94 ]
0981a08… noreply 95
0981a08… noreply 96 def download(self, file: SourceFile, destination: Path) -> Path:
0981a08… noreply 97 """Write an entry's content as a text file."""
0981a08… noreply 98 self._parse_feed()
0981a08… noreply 99 destination = Path(destination)
0981a08… noreply 100 destination.parent.mkdir(parents=True, exist_ok=True)
0981a08… noreply 101
0981a08… noreply 102 entry = next((e for e in self._entries if e["id"] == file.id), None)
0981a08… noreply 103 if not entry:
0981a08… noreply 104 raise ValueError(f"Entry not found: {file.id}")
0981a08… noreply 105
0981a08… noreply 106 text = (
0981a08… noreply 107 f"# {entry['title']}\n\n"
0981a08… noreply 108 f"Published: {entry['published']}\n"
0981a08… noreply 109 f"Link: {entry['link']}\n\n"
0981a08… noreply 110 f"{entry['summary']}"
0981a08… noreply 111 )
0981a08… noreply 112 destination.write_text(text, encoding="utf-8")
0981a08… noreply 113 logger.info(f"Saved RSS entry to {destination}")
0981a08… noreply 114 return destination

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button