PlanOpticon

planopticon / video_processor / sources / twitter_source.py
Source Blame History 129 lines
0981a08… noreply 1 """Twitter/X source connector -- stub requiring auth or gallery-dl."""
0981a08… noreply 2
0981a08… noreply 3 import logging
0981a08… noreply 4 from pathlib import Path
0981a08… noreply 5 from typing import List, Optional
0981a08… noreply 6
0981a08… noreply 7 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 8
0981a08… noreply 9 logger = logging.getLogger(__name__)
0981a08… noreply 10
0981a08… noreply 11
0981a08… noreply 12 class TwitterSource(BaseSource):
0981a08… noreply 13 """
0981a08… noreply 14 Fetch Twitter/X posts and threads.
0981a08… noreply 15
0981a08… noreply 16 Twitter API v2 requires authentication. This connector attempts to use
0981a08… noreply 17 gallery-dl as a fallback for public tweets.
0981a08… noreply 18
0981a08… noreply 19 Auth options:
0981a08… noreply 20 - Set TWITTER_BEARER_TOKEN env var for API v2 access
0981a08… noreply 21 - Install gallery-dl for scraping public tweets: pip install gallery-dl
0981a08… noreply 22 """
0981a08… noreply 23
0981a08… noreply 24 def __init__(self, url: str):
0981a08… noreply 25 self.url = url
0981a08… noreply 26 self._bearer_token: Optional[str] = None
0981a08… noreply 27
0981a08… noreply 28 def authenticate(self) -> bool:
0981a08… noreply 29 """Check for Twitter API token or gallery-dl availability."""
0981a08… noreply 30 import os
0981a08… noreply 31
0981a08… noreply 32 self._bearer_token = os.environ.get("TWITTER_BEARER_TOKEN")
0981a08… noreply 33 if self._bearer_token:
0981a08… noreply 34 return True
0981a08… noreply 35
0981a08… noreply 36 # Check for gallery-dl fallback
0981a08… noreply 37 try:
0981a08… noreply 38 import gallery_dl # noqa: F401
0981a08… noreply 39
0981a08… noreply 40 logger.info("Using gallery-dl for Twitter content extraction")
0981a08… noreply 41 return True
0981a08… noreply 42 except ImportError:
0981a08… noreply 43 pass
0981a08… noreply 44
0981a08… noreply 45 logger.error(
0981a08… noreply 46 "Twitter source requires either:\n"
0981a08… noreply 47 " 1. TWITTER_BEARER_TOKEN env var (Twitter API v2)\n"
0981a08… noreply 48 " 2. gallery-dl installed: pip install gallery-dl\n"
0981a08… noreply 49 "Twitter API access: https://developer.twitter.com/en/portal/dashboard"
0981a08… noreply 50 )
0981a08… noreply 51 return False
0981a08… noreply 52
0981a08… noreply 53 def list_videos(
0981a08… noreply 54 self,
0981a08… noreply 55 folder_id: Optional[str] = None,
0981a08… noreply 56 folder_path: Optional[str] = None,
0981a08… noreply 57 patterns: Optional[List[str]] = None,
0981a08… noreply 58 ) -> List[SourceFile]:
0981a08… noreply 59 """Return a single SourceFile for the tweet/thread."""
0981a08… noreply 60 return [
0981a08… noreply 61 SourceFile(
0981a08… noreply 62 name=self.url.split("/")[-1] or "tweet",
0981a08… noreply 63 id=self.url,
0981a08… noreply 64 mime_type="text/plain",
0981a08… noreply 65 )
0981a08… noreply 66 ]
0981a08… noreply 67
0981a08… noreply 68 def download(self, file: SourceFile, destination: Path) -> Path:
0981a08… noreply 69 """Download tweet content as text."""
0981a08… noreply 70 destination = Path(destination)
0981a08… noreply 71 destination.parent.mkdir(parents=True, exist_ok=True)
0981a08… noreply 72 text = self.fetch_text()
0981a08… noreply 73 destination.write_text(text, encoding="utf-8")
0981a08… noreply 74 logger.info(f"Saved Twitter content to {destination}")
0981a08… noreply 75 return destination
0981a08… noreply 76
0981a08… noreply 77 def fetch_text(self) -> str:
0981a08… noreply 78 """Extract tweet text via API or gallery-dl."""
0981a08… noreply 79 if self._bearer_token:
0981a08… noreply 80 return self._fetch_via_api()
0981a08… noreply 81
0981a08… noreply 82 try:
0981a08… noreply 83 return self._fetch_via_gallery_dl()
0981a08… noreply 84 except ImportError:
0981a08… noreply 85 raise RuntimeError(
0981a08… noreply 86 "No Twitter extraction method available. See authenticate() for setup."
0981a08… noreply 87 )
0981a08… noreply 88
0981a08… noreply 89 def _fetch_via_api(self) -> str:
0981a08… noreply 90 """Fetch tweet via Twitter API v2."""
0981a08… noreply 91 import re
0981a08… noreply 92
0981a08… noreply 93 import requests
0981a08… noreply 94
0981a08… noreply 95 match = re.search(r"/status/(\d+)", self.url)
0981a08… noreply 96 if not match:
0981a08… noreply 97 raise ValueError(f"Could not extract tweet ID from: {self.url}")
0981a08… noreply 98
0981a08… noreply 99 tweet_id = match.group(1)
0981a08… noreply 100 resp = requests.get(
0981a08… noreply 101 f"https://api.twitter.com/2/tweets/{tweet_id}",
0981a08… noreply 102 headers={"Authorization": f"Bearer {self._bearer_token}"},
0981a08… noreply 103 params={"tweet.fields": "author_id,created_at,text"},
0981a08… noreply 104 timeout=15,
0981a08… noreply 105 )
0981a08… noreply 106 resp.raise_for_status()
0981a08… noreply 107 data = resp.json().get("data", {})
0981a08… noreply 108 return f"{data.get('text', '')}\n\nCreated: {data.get('created_at', 'unknown')}"
0981a08… noreply 109
0981a08… noreply 110 def _fetch_via_gallery_dl(self) -> str:
0981a08… noreply 111 """Use gallery-dl to extract tweet metadata."""
0981a08… noreply 112 import json
0981a08… noreply 113 import subprocess
0981a08… noreply 114
0981a08… noreply 115 result = subprocess.run(
0981a08… noreply 116 ["gallery-dl", "--dump-json", self.url],
0981a08… noreply 117 capture_output=True,
0981a08… noreply 118 text=True,
0981a08… noreply 119 timeout=30,
0981a08… noreply 120 )
0981a08… noreply 121 if result.returncode != 0:
0981a08… noreply 122 raise RuntimeError(f"gallery-dl failed: {result.stderr}")
0981a08… noreply 123
0981a08… noreply 124 items = json.loads(result.stdout)
0981a08… noreply 125 texts = []
0981a08… noreply 126 for item in items if isinstance(items, list) else [items]:
0981a08… noreply 127 if isinstance(item, dict):
0981a08… noreply 128 texts.append(item.get("content", item.get("text", str(item))))
0981a08… noreply 129 return "\n\n".join(texts) if texts else "No text content extracted."

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button