PlanOpticon

planopticon / video_processor / sources / github_source.py

Blame History Raw 157 lines

1	`"""GitHub source connector for fetching repo content, issues, and PRs."""`
2
3	`import logging`
4	`import os`
5	`from pathlib import Path`
6	`from typing import List, Optional`
7
8	`from video_processor.sources.base import BaseSource, SourceFile`
9
10	`logger = logging.getLogger(__name__)`
11
12	`API_BASE = "https://api.github.com"`
13
14
15	`class GitHubSource(BaseSource):`
16	`"""`
17	`Fetch GitHub repository README, issues, and pull requests as text documents.`
18
19	Auth: Set GITHUB_TOKEN env var, or use `gh auth token` output.
20	`Requires: pip install requests`
21	`"""`
22
23	`def __init__(self, repo: str, include_issues: bool = True, include_prs: bool = True):`
24	`"""`
25	`Parameters`
26	`----------`
27	`repo : str`
28	`GitHub repo in "owner/repo" format.`
29	`"""`
30	`self.repo = repo`
31	`self.include_issues = include_issues`
32	`self.include_prs = include_prs`
33	`self._token: Optional[str] = None`
34
35	`def authenticate(self) -> bool:`
36	`"""Authenticate via GITHUB_TOKEN env var or gh CLI."""`
37	`self._token = os.environ.get("GITHUB_TOKEN")`
38	`if not self._token:`
39	`try:`
40	`import subprocess`
41
42	`result = subprocess.run(["gh", "auth", "token"], capture_output=True, text=True)`
43	`if result.returncode == 0:`
44	`self._token = result.stdout.strip()`
45	`except FileNotFoundError:`
46	`pass`
47	`if not self._token:`
48	`logger.warning(`
49	`"No GitHub token found. Public repos only. Set GITHUB_TOKEN for private repos."`
50	`)`
51	`return True`
52
53	`def _headers(self) -> dict:`
54	`h = {"Accept": "application/vnd.github.v3+json"}`
55	`if self._token:`
56	`h["Authorization"] = f"Bearer {self._token}"`
57	`return h`
58
59	`def list_videos(`
60	`self,`
61	`folder_id: Optional[str] = None,`
62	`folder_path: Optional[str] = None,`
63	`patterns: Optional[List[str]] = None,`
64	`) -> List[SourceFile]:`
65	`"""List available documents (README, issues, PRs) as SourceFiles."""`
66	`import requests`
67
68	`files = []`
69	`# README`
70	`resp = requests.get(`
71	`f"{API_BASE}/repos/{self.repo}/readme", headers=self._headers(), timeout=15`
72	`)`
73	`if resp.ok:`
74	`files.append(SourceFile(name="README", id="readme", mime_type="text/markdown"))`
75
76	`# Issues`
77	`if self.include_issues:`
78	`resp = requests.get(`
79	`f"{API_BASE}/repos/{self.repo}/issues",`
80	`headers=self._headers(),`
81	`params={"state": "all", "per_page": 100},`
82	`timeout=15,`
83	`)`
84	`if resp.ok:`
85	`for issue in resp.json():`
86	`if "pull_request" not in issue:`
87	`files.append(`
88	`SourceFile(`
89	`name=f"Issue #{issue['number']}: {issue['title']}",`
90	`id=f"issue:{issue['number']}",`
91	`mime_type="text/plain",`
92	`)`
93	`)`
94
95	`# PRs`
96	`if self.include_prs:`
97	`resp = requests.get(`
98	`f"{API_BASE}/repos/{self.repo}/pulls",`
99	`headers=self._headers(),`
100	`params={"state": "all", "per_page": 100},`
101	`timeout=15,`
102	`)`
103	`if resp.ok:`
104	`for pr in resp.json():`
105	`files.append(`
106	`SourceFile(`
107	`name=f"PR #{pr['number']}: {pr['title']}",`
108	`id=f"pr:{pr['number']}",`
109	`mime_type="text/plain",`
110	`)`
111	`)`
112
113	`return files`
114
115	`def download(self, file: SourceFile, destination: Path) -> Path:`
116	`"""Download a single document (README, issue, or PR) as text."""`
117	`import requests`
118
119	`destination = Path(destination)`
120	`destination.parent.mkdir(parents=True, exist_ok=True)`
121
122	`if file.id == "readme":`
123	`resp = requests.get(`
124	`f"{API_BASE}/repos/{self.repo}/readme",`
125	`headers={**self._headers(), "Accept": "application/vnd.github.v3.raw"},`
126	`timeout=15,`
127	`)`
128	`destination.write_text(resp.text, encoding="utf-8")`
129	`elif file.id.startswith("issue:"):`
130	`num = file.id.split(":")[1]`
131	`resp = requests.get(`
132	`f"{API_BASE}/repos/{self.repo}/issues/{num}",`
133	`headers=self._headers(),`
134	`timeout=15,`
135	`)`
136	`data = resp.json()`
137	`text = f"# {data['title']}\n\n{data.get('body', '') or ''}"`
138	`# Append comments`
139	`comments_resp = requests.get(data["comments_url"], headers=self._headers(), timeout=15)`
140	`if comments_resp.ok:`
141	`for c in comments_resp.json():`
142	`text += f"\n\n---\n{c['user']['login']}: {c.get('body', '')}"`
143	`destination.write_text(text, encoding="utf-8")`
144	`elif file.id.startswith("pr:"):`
145	`num = file.id.split(":")[1]`
146	`resp = requests.get(`
147	`f"{API_BASE}/repos/{self.repo}/pulls/{num}",`
148	`headers=self._headers(),`
149	`timeout=15,`
150	`)`
151	`data = resp.json()`
152	`text = f"# PR: {data['title']}\n\n{data.get('body', '') or ''}"`
153	`destination.write_text(text, encoding="utf-8")`
154
155	`logger.info(f"Downloaded {file.name} to {destination}")`
156	`return destination`
157

PlanOpticon

Keyboard Shortcuts