PlanOpticon

planopticon / video_processor / sources / podcast_source.py
Source Blame History 119 lines
0981a08… noreply 1 """Podcast feed source connector -- extends RSS for audio enclosures."""
0981a08… noreply 2
0981a08… noreply 3 import logging
0981a08… noreply 4 from pathlib import Path
0981a08… noreply 5 from typing import List, Optional
0981a08… noreply 6
0981a08… noreply 7 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 8
0981a08… noreply 9 logger = logging.getLogger(__name__)
0981a08… noreply 10
0981a08… noreply 11
0981a08… noreply 12 class PodcastSource(BaseSource):
0981a08… noreply 13 """
0981a08… noreply 14 Parse podcast RSS feeds and download audio episodes for pipeline processing.
0981a08… noreply 15
0981a08… noreply 16 Extends the RSS pattern to extract <enclosure> audio URLs.
0981a08… noreply 17 Requires: pip install requests
0981a08… noreply 18 Optional: pip install feedparser
0981a08… noreply 19 """
0981a08… noreply 20
0981a08… noreply 21 def __init__(self, feed_url: str, max_episodes: int = 10):
0981a08… noreply 22 self.feed_url = feed_url
0981a08… noreply 23 self.max_episodes = max_episodes
0981a08… noreply 24 self._episodes: List[dict] = []
0981a08… noreply 25
0981a08… noreply 26 def authenticate(self) -> bool:
0981a08… noreply 27 """No auth needed for public podcast feeds."""
0981a08… noreply 28 return True
0981a08… noreply 29
0981a08… noreply 30 def _parse_feed(self) -> None:
0981a08… noreply 31 """Fetch and parse the podcast feed for audio enclosures."""
0981a08… noreply 32 if self._episodes:
0981a08… noreply 33 return
0981a08… noreply 34
0981a08… noreply 35 import requests
0981a08… noreply 36
0981a08… noreply 37 resp = requests.get(self.feed_url, timeout=15, headers={"User-Agent": "PlanOpticon/0.3"})
0981a08… noreply 38 resp.raise_for_status()
0981a08… noreply 39
0981a08… noreply 40 try:
0981a08… noreply 41 import feedparser
0981a08… noreply 42
0981a08… noreply 43 feed = feedparser.parse(resp.text)
0981a08… noreply 44 for entry in feed.entries[: self.max_episodes]:
0981a08… noreply 45 audio_url = None
0981a08… noreply 46 for link in entry.get("links", []):
0981a08… noreply 47 if link.get("type", "").startswith("audio/"):
0981a08… noreply 48 audio_url = link.get("href")
0981a08… noreply 49 break
0981a08… noreply 50 if not audio_url and entry.get("enclosures"):
0981a08… noreply 51 audio_url = entry["enclosures"][0].get("href")
0981a08… noreply 52 if audio_url:
0981a08… noreply 53 self._episodes.append(
0981a08… noreply 54 {
0981a08… noreply 55 "title": entry.get("title", "Untitled"),
0981a08… noreply 56 "url": audio_url,
0981a08… noreply 57 "published": entry.get("published", ""),
0981a08… noreply 58 "duration": entry.get("itunes_duration", ""),
0981a08… noreply 59 }
0981a08… noreply 60 )
0981a08… noreply 61 except ImportError:
0981a08… noreply 62 logger.debug("feedparser not available, using xml.etree fallback")
0981a08… noreply 63 self._parse_xml(resp.text)
0981a08… noreply 64
0981a08… noreply 65 def _parse_xml(self, text: str) -> None:
0981a08… noreply 66 """Fallback parser for podcast XML using stdlib."""
0981a08… noreply 67 import xml.etree.ElementTree as ET
0981a08… noreply 68
0981a08… noreply 69 root = ET.fromstring(text)
0981a08… noreply 70 items = root.findall(".//item")
0981a08… noreply 71 for item in items[: self.max_episodes]:
0981a08… noreply 72 enclosure = item.find("enclosure")
0981a08… noreply 73 if enclosure is None:
0981a08… noreply 74 continue
0981a08… noreply 75 audio_url = enclosure.get("url", "")
0981a08… noreply 76 if not audio_url:
0981a08… noreply 77 continue
0981a08… noreply 78 title = item.findtext("title") or "Untitled"
0981a08… noreply 79 pub = item.findtext("pubDate") or ""
0981a08… noreply 80 self._episodes.append(
0981a08… noreply 81 {"title": title, "url": audio_url, "published": pub, "duration": ""}
0981a08… noreply 82 )
0981a08… noreply 83
0981a08… noreply 84 def list_videos(
0981a08… noreply 85 self,
0981a08… noreply 86 folder_id: Optional[str] = None,
0981a08… noreply 87 folder_path: Optional[str] = None,
0981a08… noreply 88 patterns: Optional[List[str]] = None,
0981a08… noreply 89 ) -> List[SourceFile]:
0981a08… noreply 90 """List podcast episodes as SourceFiles."""
0981a08… noreply 91 self._parse_feed()
0981a08… noreply 92 return [
0981a08… noreply 93 SourceFile(
0981a08… noreply 94 name=ep["title"],
0981a08… noreply 95 id=ep["url"],
0981a08… noreply 96 mime_type="audio/mpeg",
0981a08… noreply 97 modified_at=ep["published"],
0981a08… noreply 98 )
0981a08… noreply 99 for ep in self._episodes
0981a08… noreply 100 ]
0981a08… noreply 101
0981a08… noreply 102 def download(self, file: SourceFile, destination: Path) -> Path:
0981a08… noreply 103 """Download the podcast audio file."""
0981a08… noreply 104 import requests
0981a08… noreply 105
0981a08… noreply 106 destination = Path(destination)
0981a08… noreply 107 destination.parent.mkdir(parents=True, exist_ok=True)
0981a08… noreply 108
0981a08… noreply 109 resp = requests.get(
0981a08… noreply 110 file.id, timeout=60, stream=True, headers={"User-Agent": "PlanOpticon/0.3"}
0981a08… noreply 111 )
0981a08… noreply 112 resp.raise_for_status()
0981a08… noreply 113
0981a08… noreply 114 with open(destination, "wb") as f:
0981a08… noreply 115 for chunk in resp.iter_content(chunk_size=8192):
0981a08… noreply 116 f.write(chunk)
0981a08… noreply 117
0981a08… noreply 118 logger.info(f"Downloaded podcast episode to {destination}")
0981a08… noreply 119 return destination

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button