Sources API Reference¶

`video_processor.sources.base` ¶

Base interface for cloud source integrations.

`BaseSource` ¶

Bases: ABC

Abstract base class for cloud source integrations.

Source code in video_processor/sources/base.py

class BaseSource(ABC):
    """Abstract base class for cloud source integrations."""

    @abstractmethod
    def authenticate(self) -> bool:
        """Authenticate with the cloud provider. Returns True on success."""
        ...

    @abstractmethod
    def list_videos(
        self,
        folder_id: Optional[str] = None,
        folder_path: Optional[str] = None,
        patterns: Optional[List[str]] = None,
    ) -> List[SourceFile]:
        """List video files in a folder."""
        ...

    @abstractmethod
    def download(
        self,
        file: SourceFile,
        destination: Path,
    ) -> Path:
        """Download a file to a local path. Returns the local path."""
        ...

    def download_all(
        self,
        files: List[SourceFile],
        destination_dir: Path,
    ) -> List[Path]:
        """Download multiple files to a directory, preserving subfolder structure."""
        destination_dir.mkdir(parents=True, exist_ok=True)
        paths = []
        for f in files:
            # Use path (with subfolder) if available, otherwise just name
            relative = f.path if f.path else f.name
            dest = destination_dir / relative
            try:
                local_path = self.download(f, dest)
                paths.append(local_path)
                logger.info(f"Downloaded: {relative}")
            except Exception as e:
                logger.error(f"Failed to download {relative}: {e}")
        return paths

`authenticate()` `abstractmethod` ¶

Authenticate with the cloud provider. Returns True on success.

Source code in video_processor/sources/base.py

@abstractmethod
def authenticate(self) -> bool:
    """Authenticate with the cloud provider. Returns True on success."""
    ...

`download(file, destination)` `abstractmethod` ¶

Download a file to a local path. Returns the local path.

Source code in video_processor/sources/base.py

@abstractmethod
def download(
    self,
    file: SourceFile,
    destination: Path,
) -> Path:
    """Download a file to a local path. Returns the local path."""
    ...

`download_all(files, destination_dir)` ¶

Download multiple files to a directory, preserving subfolder structure.

Source code in video_processor/sources/base.py

def download_all(
    self,
    files: List[SourceFile],
    destination_dir: Path,
) -> List[Path]:
    """Download multiple files to a directory, preserving subfolder structure."""
    destination_dir.mkdir(parents=True, exist_ok=True)
    paths = []
    for f in files:
        # Use path (with subfolder) if available, otherwise just name
        relative = f.path if f.path else f.name
        dest = destination_dir / relative
        try:
            local_path = self.download(f, dest)
            paths.append(local_path)
            logger.info(f"Downloaded: {relative}")
        except Exception as e:
            logger.error(f"Failed to download {relative}: {e}")
    return paths

`list_videos(folder_id=None, folder_path=None, patterns=None)` `abstractmethod` ¶

List video files in a folder.

Source code in video_processor/sources/base.py

@abstractmethod
def list_videos(
    self,
    folder_id: Optional[str] = None,
    folder_path: Optional[str] = None,
    patterns: Optional[List[str]] = None,
) -> List[SourceFile]:
    """List video files in a folder."""
    ...

`SourceFile` ¶

Bases: BaseModel

A file available in a cloud source.

Source code in video_processor/sources/base.py

class SourceFile(BaseModel):
    """A file available in a cloud source."""

    name: str = Field(description="File name")
    id: str = Field(description="Provider-specific file identifier")
    size_bytes: Optional[int] = Field(default=None, description="File size in bytes")
    mime_type: Optional[str] = Field(default=None, description="MIME type")
    modified_at: Optional[str] = Field(default=None, description="Last modified timestamp")
    path: Optional[str] = Field(default=None, description="Path within the source folder")

Overview¶

The sources module provides a unified interface for fetching content from cloud services, local applications, and the web. All sources implement the BaseSource abstract class, providing consistent authenticate(), list_videos(), and download() methods.

Sources are lazy-loaded to avoid pulling in optional dependencies at import time. You can import any source directly from video_processor.sources and the correct module will be loaded on demand.

BaseSource (ABC)¶

from video_processor.sources import BaseSource

Abstract base class that all source integrations implement. Defines the standard three-step workflow: authenticate, list, download.

authenticate()¶

@abstractmethod
def authenticate(self) -> bool

Authenticate with the cloud provider or service. Uses the auth strategy defined for the source (OAuth, API key, local access, etc.).

Returns: bool -- True on successful authentication, False on failure.

@abstractmethod
def list_videos(
    self,
    folder_id: Optional[str] = None,
    folder_path: Optional[str] = None,
    patterns: Optional[List[str]] = None,
) -> List[SourceFile]

List available video files (or other content, depending on the source).

Parameters:

Parameter	Type	Default	Description
`folder_id`	`Optional[str]`	`None`	Provider-specific folder/container identifier
`folder_path`	`Optional[str]`	`None`	Path within the source (e.g., folder name)
`patterns`	`Optional[List[str]]`	`None`	File name glob patterns to filter results

Returns: List[SourceFile] -- available files matching the criteria.

download()¶

@abstractmethod
def download(
    self,
    file: SourceFile,
    destination: Path,
) -> Path

Download a single file to a local path.

Parameters:

Parameter	Type	Description
`file`	`SourceFile`	File descriptor from `list_videos()`
`destination`	`Path`	Local destination path

Returns: Path -- the local path where the file was saved.

download_all()¶

def download_all(
    self,
    files: List[SourceFile],
    destination_dir: Path,
) -> List[Path]

Download multiple files to a directory, preserving subfolder structure from SourceFile.path. This is a concrete method provided by the base class.

Parameters:

Parameter	Type	Description
`files`	`List[SourceFile]`	Files to download
`destination_dir`	`Path`	Base directory for downloads (created if needed)

Returns: List[Path] -- local paths of successfully downloaded files. Failed downloads are logged and skipped.

SourceFile¶

from video_processor.sources import SourceFile

Pydantic model describing a file available in a cloud source.

Field	Type	Default	Description
`name`	`str`	required	File name
`id`	`str`	required	Provider-specific file identifier
`size_bytes`	`Optional[int]`	`None`	File size in bytes
`mime_type`	`Optional[str]`	`None`	MIME type (e.g., `"video/mp4"`)
`modified_at`	`Optional[str]`	`None`	Last modified timestamp
`path`	`Optional[str]`	`None`	Path within the source folder (used for subfolder structure in `download_all`)

{
  "name": "sprint-review-2026-03-01.mp4",
  "id": "abc123def456",
  "size_bytes": 524288000,
  "mime_type": "video/mp4",
  "modified_at": "2026-03-01T14:30:00Z",
  "path": "recordings/march/sprint-review-2026-03-01.mp4"
}

Lazy Loading Pattern¶

All sources are lazy-loaded via __getattr__ in the package __init__.py. This means importing video_processor.sources does not pull in any external dependencies (e.g., google-auth, msal, notion-client). The actual module is loaded only when you access the class.

# This import is instant -- no dependencies loaded
from video_processor.sources import ZoomSource

# The zoom_source module (and its dependencies) are loaded here
source = ZoomSource()

Available Sources¶

Cloud Recordings¶

Sources for fetching recorded meetings from video conferencing platforms.

Source	Class	Auth Method	Description
Zoom	`ZoomSource`	OAuth / Server-to-Server	List and download Zoom cloud recordings
Google Meet	`MeetRecordingSource`	OAuth (Google)	List and download Google Meet recordings from Drive
Microsoft Teams	`TeamsRecordingSource`	OAuth (Microsoft)	List and download Teams meeting recordings

Cloud Storage and Workspace¶

Sources for accessing files stored in cloud platforms.

Source	Class	Auth Method	Description
Google Drive	`GoogleDriveSource`	OAuth (Google)	Files from Google Drive
Google Workspace	`GWSSource`	OAuth (Google)	Google Docs, Sheets, Slides
Microsoft 365	`M365Source`	OAuth (Microsoft)	OneDrive, SharePoint files
Notion	`NotionSource`	OAuth / API key	Notion pages and databases
GitHub	`GitHubSource`	OAuth / API token	Repository files, issues, discussions
Dropbox	`DropboxSource`	OAuth / access token	(via auth config)

Notes Applications¶

Sources for local and cloud-based note-taking apps.

Source	Class	Auth Method	Description
Apple Notes	`AppleNotesSource`	Local (macOS)	Notes from Apple Notes.app
Obsidian	`ObsidianSource`	Local filesystem	Markdown files from Obsidian vaults
Logseq	`LogseqSource`	Local filesystem	Pages from Logseq graphs
OneNote	`OneNoteSource`	OAuth (Microsoft)	Microsoft OneNote notebooks
Google Keep	`GoogleKeepSource`	OAuth (Google)	Google Keep notes

Web and Content¶

Sources for fetching content from the web.

Source	Class	Auth Method	Description
YouTube	`YouTubeSource`	API key / OAuth	YouTube video metadata and transcripts
Web	`WebSource`	None	General web page content extraction
RSS	`RSSSource`	None	RSS/Atom feed entries
Podcast	`PodcastSource`	None	Podcast episodes from RSS feeds
arXiv	`ArxivSource`	None	Academic papers from arXiv
Hacker News	`HackerNewsSource`	None	Hacker News posts and comments
Reddit	`RedditSource`	API credentials	Reddit posts and comments
Twitter/X	`TwitterSource`	API credentials	Tweets and threads

Auth Integration¶

Most sources use PlanOpticon's unified auth system (see Auth API). The typical pattern within a source implementation:

from video_processor.auth import get_auth_manager

class MySource(BaseSource):
    def __init__(self):
        self._token = None

    def authenticate(self) -> bool:
        manager = get_auth_manager("my_service")
        if manager:
            token = manager.get_token()
            if token:
                self._token = token
                return True
        return False

    def list_videos(self, **kwargs) -> list[SourceFile]:
        if not self._token:
            raise RuntimeError("Not authenticated. Call authenticate() first.")
        # Use self._token to call the API
        ...

Usage Examples¶

Listing and downloading Zoom recordings¶

from pathlib import Path
from video_processor.sources import ZoomSource

source = ZoomSource()
if source.authenticate():
    recordings = source.list_videos()
    for rec in recordings:
        print(f"{rec.name} ({rec.size_bytes} bytes)")

    # Download all to a local directory
    paths = source.download_all(recordings, Path("./downloads"))

Fetching from multiple sources¶

from pathlib import Path
from video_processor.sources import GoogleDriveSource, NotionSource

# Google Drive
gdrive = GoogleDriveSource()
if gdrive.authenticate():
    files = gdrive.list_videos(
        folder_path="Meeting Recordings",
        patterns=["*.mp4", "*.webm"],
    )
    gdrive.download_all(files, Path("./drive-downloads"))

# Notion
notion = NotionSource()
if notion.authenticate():
    pages = notion.list_videos()  # Lists Notion pages
    for page in pages:
        print(f"Page: {page.name}")

YouTube content¶

from video_processor.sources import YouTubeSource

yt = YouTubeSource()
if yt.authenticate():
    videos = yt.list_videos(folder_path="https://youtube.com/playlist?list=...")
    for v in videos:
        print(f"{v.name} - {v.id}")

PlanOpticon

Sources API Reference¶

video_processor.sources.base ¶

BaseSource ¶

authenticate() abstractmethod ¶

download(file, destination) abstractmethod ¶

download_all(files, destination_dir) ¶

list_videos(folder_id=None, folder_path=None, patterns=None) abstractmethod ¶

SourceFile ¶

Overview¶

BaseSource (ABC)¶

authenticate()¶

list_videos()¶

download()¶

download_all()¶

SourceFile¶

Lazy Loading Pattern¶

Available Sources¶

Cloud Recordings¶

Cloud Storage and Workspace¶

Notes Applications¶

Web and Content¶

Auth Integration¶

Usage Examples¶

Listing and downloading Zoom recordings¶

Fetching from multiple sources¶

YouTube content¶

Keyboard Shortcuts

`video_processor.sources.base` ¶

`BaseSource` ¶

`authenticate()` `abstractmethod` ¶

`download(file, destination)` `abstractmethod` ¶

`download_all(files, destination_dir)` ¶

`list_videos(folder_id=None, folder_path=None, patterns=None)` `abstractmethod` ¶

`SourceFile` ¶