PlanOpticon

planopticon / video_processor / sources / reddit_source.py
Source Blame History 103 lines
0981a08… noreply 1 """Reddit source connector using the public JSON API."""
0981a08… noreply 2
0981a08… noreply 3 import logging
0981a08… noreply 4 from pathlib import Path
0981a08… noreply 5 from typing import List, Optional
0981a08… noreply 6
0981a08… noreply 7 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 8
0981a08… noreply 9 logger = logging.getLogger(__name__)
0981a08… noreply 10
0981a08… noreply 11
0981a08… noreply 12 class RedditSource(BaseSource):
0981a08… noreply 13 """
0981a08… noreply 14 Fetch Reddit posts and comments via the public JSON API.
0981a08… noreply 15
0981a08… noreply 16 No auth required for public posts. Append .json to any Reddit URL.
0981a08… noreply 17 Requires: pip install requests
0981a08… noreply 18 """
0981a08… noreply 19
0981a08… noreply 20 def __init__(self, url: str):
0981a08… noreply 21 """
0981a08… noreply 22 Parameters
0981a08… noreply 23 ----------
0981a08… noreply 24 url : str
0981a08… noreply 25 Reddit post or subreddit URL.
0981a08… noreply 26 """
0981a08… noreply 27 self.url = url.rstrip("/")
0981a08… noreply 28
0981a08… noreply 29 def authenticate(self) -> bool:
0981a08… noreply 30 """No auth needed for public Reddit content."""
0981a08… noreply 31 return True
0981a08… noreply 32
0981a08… noreply 33 def list_videos(
0981a08… noreply 34 self,
0981a08… noreply 35 folder_id: Optional[str] = None,
0981a08… noreply 36 folder_path: Optional[str] = None,
0981a08… noreply 37 patterns: Optional[List[str]] = None,
0981a08… noreply 38 ) -> List[SourceFile]:
0981a08… noreply 39 """Return a single SourceFile for the Reddit post."""
0981a08… noreply 40 return [
0981a08… noreply 41 SourceFile(
0981a08… noreply 42 name=self.url.split("/")[-1] or "reddit_post",
0981a08… noreply 43 id=self.url,
0981a08… noreply 44 mime_type="text/plain",
0981a08… noreply 45 )
0981a08… noreply 46 ]
0981a08… noreply 47
0981a08… noreply 48 def download(self, file: SourceFile, destination: Path) -> Path:
0981a08… noreply 49 """Download post and comments as plain text."""
0981a08… noreply 50 destination = Path(destination)
0981a08… noreply 51 destination.parent.mkdir(parents=True, exist_ok=True)
0981a08… noreply 52 text = self.fetch_text()
0981a08… noreply 53 destination.write_text(text, encoding="utf-8")
0981a08… noreply 54 logger.info(f"Saved Reddit content to {destination}")
0981a08… noreply 55 return destination
0981a08… noreply 56
0981a08… noreply 57 def fetch_text(self) -> str:
0981a08… noreply 58 """Fetch the Reddit post and comments as structured text."""
0981a08… noreply 59 import requests
0981a08… noreply 60
0981a08… noreply 61 json_url = self.url.rstrip("/") + ".json"
0981a08… noreply 62 resp = requests.get(
0981a08… noreply 63 json_url,
0981a08… noreply 64 timeout=15,
0981a08… noreply 65 headers={"User-Agent": "PlanOpticon/0.3 (source connector)"},
0981a08… noreply 66 )
0981a08… noreply 67 resp.raise_for_status()
0981a08… noreply 68 data = resp.json()
0981a08… noreply 69
0981a08… noreply 70 lines = []
0981a08… noreply 71 # Post data is in first listing
0981a08… noreply 72 if isinstance(data, list) and len(data) > 0:
0981a08… noreply 73 post = data[0]["data"]["children"][0]["data"]
0981a08… noreply 74 lines.append(f"# {post.get('title', 'Untitled')}")
0981a08… noreply 75 lines.append(f"by u/{post.get('author', '[deleted]')} | {post.get('score', 0)} points")
0981a08… noreply 76 lines.append("")
0981a08… noreply 77 if post.get("selftext"):
0981a08… noreply 78 lines.append(post["selftext"])
0981a08… noreply 79 lines.append("")
0981a08… noreply 80
0981a08… noreply 81 # Comments in second listing
0981a08… noreply 82 if len(data) > 1:
0981a08… noreply 83 lines.append("## Comments\n")
0981a08… noreply 84 self._extract_comments(data[1]["data"]["children"], lines, depth=0)
0981a08… noreply 85
0981a08… noreply 86 return "\n".join(lines)
0981a08… noreply 87
0981a08… noreply 88 def _extract_comments(self, children: list, lines: list, depth: int) -> None:
0981a08… noreply 89 """Recursively extract comment text."""
0981a08… noreply 90 indent = " " * depth
0981a08… noreply 91 for child in children:
0981a08… noreply 92 if child.get("kind") != "t1":
0981a08… noreply 93 continue
0981a08… noreply 94 c = child["data"]
0981a08… noreply 95 author = c.get("author", "[deleted]")
0981a08… noreply 96 body = c.get("body", "")
0981a08… noreply 97 lines.append(f"{indent}**{author}** ({c.get('score', 0)} pts):")
0981a08… noreply 98 lines.append(f"{indent}{body}")
0981a08… noreply 99 lines.append("")
0981a08… noreply 100 # Recurse into replies
0981a08… noreply 101 replies = c.get("replies")
0981a08… noreply 102 if isinstance(replies, dict):
0981a08… noreply 103 self._extract_comments(replies["data"]["children"], lines, depth + 1)

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button