PlanOpticon

planopticon / video_processor / sources / reddit_source.py

Source Blame History 103 lines

0981a08…	noreply	1	"""Reddit source connector using the public JSON API."""
0981a08…	noreply	2
0981a08…	noreply	3	import logging
0981a08…	noreply	4	from pathlib import Path
0981a08…	noreply	5	from typing import List, Optional
0981a08…	noreply	6
0981a08…	noreply	7	from video_processor.sources.base import BaseSource, SourceFile
0981a08…	noreply	8
0981a08…	noreply	9	logger = logging.getLogger(__name__)
0981a08…	noreply	10
0981a08…	noreply	11
0981a08…	noreply	12	class RedditSource(BaseSource):
0981a08…	noreply	13	"""
0981a08…	noreply	14	Fetch Reddit posts and comments via the public JSON API.
0981a08…	noreply	15
0981a08…	noreply	16	No auth required for public posts. Append .json to any Reddit URL.
0981a08…	noreply	17	Requires: pip install requests
0981a08…	noreply	18	"""
0981a08…	noreply	19
0981a08…	noreply	20	def __init__(self, url: str):
0981a08…	noreply	21	"""
0981a08…	noreply	22	Parameters
0981a08…	noreply	23	----------
0981a08…	noreply	24	url : str
0981a08…	noreply	25	Reddit post or subreddit URL.
0981a08…	noreply	26	"""
0981a08…	noreply	27	self.url = url.rstrip("/")
0981a08…	noreply	28
0981a08…	noreply	29	def authenticate(self) -> bool:
0981a08…	noreply	30	"""No auth needed for public Reddit content."""
0981a08…	noreply	31	return True
0981a08…	noreply	32
0981a08…	noreply	33	def list_videos(
0981a08…	noreply	34	self,
0981a08…	noreply	35	folder_id: Optional[str] = None,
0981a08…	noreply	36	folder_path: Optional[str] = None,
0981a08…	noreply	37	patterns: Optional[List[str]] = None,
0981a08…	noreply	38	) -> List[SourceFile]:
0981a08…	noreply	39	"""Return a single SourceFile for the Reddit post."""
0981a08…	noreply	40	return [
0981a08…	noreply	41	SourceFile(
0981a08…	noreply	42	name=self.url.split("/")[-1] or "reddit_post",
0981a08…	noreply	43	id=self.url,
0981a08…	noreply	44	mime_type="text/plain",
0981a08…	noreply	45	)
0981a08…	noreply	46	]
0981a08…	noreply	47
0981a08…	noreply	48	def download(self, file: SourceFile, destination: Path) -> Path:
0981a08…	noreply	49	"""Download post and comments as plain text."""
0981a08…	noreply	50	destination = Path(destination)
0981a08…	noreply	51	destination.parent.mkdir(parents=True, exist_ok=True)
0981a08…	noreply	52	text = self.fetch_text()
0981a08…	noreply	53	destination.write_text(text, encoding="utf-8")
0981a08…	noreply	54	logger.info(f"Saved Reddit content to {destination}")
0981a08…	noreply	55	return destination
0981a08…	noreply	56
0981a08…	noreply	57	def fetch_text(self) -> str:
0981a08…	noreply	58	"""Fetch the Reddit post and comments as structured text."""
0981a08…	noreply	59	import requests
0981a08…	noreply	60
0981a08…	noreply	61	json_url = self.url.rstrip("/") + ".json"
0981a08…	noreply	62	resp = requests.get(
0981a08…	noreply	63	json_url,
0981a08…	noreply	64	timeout=15,
0981a08…	noreply	65	headers={"User-Agent": "PlanOpticon/0.3 (source connector)"},
0981a08…	noreply	66	)
0981a08…	noreply	67	resp.raise_for_status()
0981a08…	noreply	68	data = resp.json()
0981a08…	noreply	69
0981a08…	noreply	70	lines = []
0981a08…	noreply	71	# Post data is in first listing
0981a08…	noreply	72	if isinstance(data, list) and len(data) > 0:
0981a08…	noreply	73	post = data[0]["data"]["children"][0]["data"]
0981a08…	noreply	74	lines.append(f"# {post.get('title', 'Untitled')}")
0981a08…	noreply	75	lines.append(f"by u/{post.get('author', '[deleted]')} \| {post.get('score', 0)} points")
0981a08…	noreply	76	lines.append("")
0981a08…	noreply	77	if post.get("selftext"):
0981a08…	noreply	78	lines.append(post["selftext"])
0981a08…	noreply	79	lines.append("")
0981a08…	noreply	80
0981a08…	noreply	81	# Comments in second listing
0981a08…	noreply	82	if len(data) > 1:
0981a08…	noreply	83	lines.append("## Comments\n")
0981a08…	noreply	84	self._extract_comments(data[1]["data"]["children"], lines, depth=0)
0981a08…	noreply	85
0981a08…	noreply	86	return "\n".join(lines)
0981a08…	noreply	87
0981a08…	noreply	88	def _extract_comments(self, children: list, lines: list, depth: int) -> None:
0981a08…	noreply	89	"""Recursively extract comment text."""
0981a08…	noreply	90	indent = " " * depth
0981a08…	noreply	91	for child in children:
0981a08…	noreply	92	if child.get("kind") != "t1":
0981a08…	noreply	93	continue
0981a08…	noreply	94	c = child["data"]
0981a08…	noreply	95	author = c.get("author", "[deleted]")
0981a08…	noreply	96	body = c.get("body", "")
0981a08…	noreply	97	lines.append(f"{indent}{author} ({c.get('score', 0)} pts):")
0981a08…	noreply	98	lines.append(f"{indent}{body}")
0981a08…	noreply	99	lines.append("")
0981a08…	noreply	100	# Recurse into replies
0981a08…	noreply	101	replies = c.get("replies")
0981a08…	noreply	102	if isinstance(replies, dict):
0981a08…	noreply	103	self._extract_comments(replies["data"]["children"], lines, depth + 1)

PlanOpticon

Keyboard Shortcuts