PlanOpticon

API Sources

3 days, 12 hours ago by admin

Sources API Reference

video_processor.sources.base

Base interface for cloud source integrations.

BaseSource

Bases: ABC

Abstract base class for cloud source integrations.

Source code in video_processor/sources/base.py

class BaseSource(ABC):
    """Abstract base class for cloud source integrations."""

    @abstractmethod
    def authenticate(self) -> bool:
        """Authenticate with the cloud provider. Returns True on success."""
        ...

    @abstractmethod
    def list_videos(
        self,
        folder_id: Optional[str] = None,
        folder_path: Optional[str] = None,
        patterns: Optional[List[str]] = None,
    ) -> List[SourceFile]:
        """List video files in a folder."""
        ...

    @abstractmethod
    def download(
        self,
        file: SourceFile,
        destination: Path,
    ) -> Path:
        """Download a file to a local path. Returns the local path."""
        ...

    def download_all(
        self,
        files: List[SourceFile],
        destination_dir: Path,
    ) -> List[Path]:
        """Download multiple files to a directory, preserving subfolder structure."""
        destination_dir.mkdir(parents=True, exist_ok=True)
        paths = []
        for f in files:
            # Use path (with subfolder) if available, otherwise just name
            relative = f.path if f.path else f.name
            dest = destination_dir / relative
            try:
                local_path = self.download(f, dest)
                paths.append(local_path)
                logger.info(f"Downloaded: {relative}")
            except Exception as e:
                logger.error(f"Failed to download {relative}: {e}")
        return paths

authenticate()

abstractmethod

Authenticate with the cloud provider. Returns True on success.

Source code in video_processor/sources/base.py

@abstractmethod
def authenticate(self) -> bool:
    """Authenticate with the cloud provider. Returns True on success."""
    ...

download(file, destination)

abstractmethod

Download a file to a local path. Returns the local path.

Source code in video_processor/sources/base.py

@abstractmethod
def download(
    self,
    file: SourceFile,
    destination: Path,
) -> Path:
    """Download a file to a local path. Returns the local path."""
    ...

download_all(files, destination_dir)

Download multiple files to a directory, preserving subfolder structure.

Source code in video_processor/sources/base.py

def download_all(
    self,
    files: List[SourceFile],
    destination_dir: Path,
) -> List[Path]:
    """Download multiple files to a directory, preserving subfolder structure."""
    destination_dir.mkdir(parents=True, exist_ok=True)
    paths = []
    for f in files:
        # Use path (with subfolder) if available, otherwise just name
        relative = f.path if f.path else f.name
        dest = destination_dir / relative
        try:
            local_path = self.download(f, dest)
            paths.append(local_path)
            logger.info(f"Downloaded: {relative}")
        except Exception as e:
            logger.error(f"Failed to download {relative}: {e}")
    return paths

list_videos(folder_id=None, folder_path=None, patterns=None)

abstractmethod

List video files in a folder.

Source code in video_processor/sources/base.py

@abstractmethod
def list_videos(
    self,
    folder_id: Optional[str] = None,
    folder_path: Optional[str] = None,
    patterns: Optional[List[str]] = None,
) -> List[SourceFile]:
    """List video files in a folder."""
    ...

SourceFile

Bases: BaseModel

A file available in a cloud source.

Source code in video_processor/sources/base.py

class SourceFile(BaseModel):
    """A file available in a cloud source."""

    name: str = Field(description="File name")
    id: str = Field(description="Provider-specific file identifier")
    size_bytes: Optional[int] = Field(default=None, description="File size in bytes")
    mime_type: Optional[str] = Field(default=None, description="MIME type")
    modified_at: Optional[str] = Field(default=None, description="Last modified timestamp")
    path: Optional[str] = Field(default=None, description="Path within the source folder")

Overview

The sources module provides a unified interface for fetching content from cloud services, local applications, and the web. All sources implement the BaseSource abstract class, providing consistent authenticate(), list_videos(), and download() methods.

Sources are lazy-loaded to avoid pulling in optional dependencies at import time. You can import any source directly from video_processor.sources and the correct module will be loaded on demand.

BaseSource (ABC)

from video_processor.sources import BaseSource

Abstract base class that all source integrations implement. Defines the standard three-step workflow: authenticate, list, download.

authenticate()

@abstractmethod
def authenticate(self) -> bool

Authenticate with the cloud provider or service. Uses the auth strategy defined for the source (OAuth, API key, local access, etc.).

Returns: bool -- True on successful authentication, False on failure.

list_videos()

@abstractmethod
def list_videos(
    self,
    folder_id: Optional[str] = None,
    folder_path: Optional[str] = None,
    patterns: Optional[List[str]] = None,
) -> List[SourceFile]

List available video files (or other content, depending on the source).

Parameters:

Parameter Type Default Description
folder_id Optional[str] None Provider-specific folder/container identifier
folder_path Optional[str] None Path within the source (e.g., folder name)
patterns Optional[List[str]] None File name glob patterns to filter results

Returns: List[SourceFile] -- available files matching the criteria.

download()

@abstractmethod
def download(
    self,
    file: SourceFile,
    destination: Path,
) -> Path

Download a single file to a local path.

Parameters:

Parameter Type Description
file SourceFile File descriptor from list_videos()
destination Path Local destination path

Returns: Path -- the local path where the file was saved.

download_all()

def download_all(
    self,
    files: List[SourceFile],
    destination_dir: Path,
) -> List[Path]

Download multiple files to a directory, preserving subfolder structure from SourceFile.path. This is a concrete method provided by the base class.

Parameters:

Parameter Type Description
files List[SourceFile] Files to download
destination_dir Path Base directory for downloads (created if needed)

Returns: List[Path] -- local paths of successfully downloaded files. Failed downloads are logged and skipped.

SourceFile

from video_processor.sources import SourceFile

Pydantic model describing a file available in a cloud source.

Field Type Default Description
name str required File name
id str required Provider-specific file identifier
size_bytes Optional[int] None File size in bytes
mime_type Optional[str] None MIME type (e.g., "video/mp4")
modified_at Optional[str] None Last modified timestamp
path Optional[str] None Path within the source folder (used for subfolder structure in download_all)
{
  "name": "sprint-review-2026-03-01.mp4",
  "id": "abc123def456",
  "size_bytes": 524288000,
  "mime_type": "video/mp4",
  "modified_at": "2026-03-01T14:30:00Z",
  "path": "recordings/march/sprint-review-2026-03-01.mp4"
}

Lazy Loading Pattern

All sources are lazy-loaded via __getattr__ in the package __init__.py. This means importing video_processor.sources does not pull in any external dependencies (e.g., google-auth, msal, notion-client). The actual module is loaded only when you access the class.

# This import is instant -- no dependencies loaded
from video_processor.sources import ZoomSource

# The zoom_source module (and its dependencies) are loaded here
source = ZoomSource()

Available Sources

Cloud Recordings

Sources for fetching recorded meetings from video conferencing platforms.

Source Class Auth Method Description
Zoom ZoomSource OAuth / Server-to-Server List and download Zoom cloud recordings
Google Meet MeetRecordingSource OAuth (Google) List and download Google Meet recordings from Drive
Microsoft Teams TeamsRecordingSource OAuth (Microsoft) List and download Teams meeting recordings

Cloud Storage and Workspace

Sources for accessing files stored in cloud platforms.

Source Class Auth Method Description
Google Drive GoogleDriveSource OAuth (Google) Files from Google Drive
Google Workspace GWSSource OAuth (Google) Google Docs, Sheets, Slides
Microsoft 365 M365Source OAuth (Microsoft) OneDrive, SharePoint files
Notion NotionSource OAuth / API key Notion pages and databases
GitHub GitHubSource OAuth / API token Repository files, issues, discussions
Dropbox DropboxSource OAuth / access token (via auth config)

Notes Applications

Sources for local and cloud-based note-taking apps.

Source Class Auth Method Description
Apple Notes AppleNotesSource Local (macOS) Notes from Apple Notes.app
Obsidian ObsidianSource Local filesystem Markdown files from Obsidian vaults
Logseq LogseqSource Local filesystem Pages from Logseq graphs
OneNote OneNoteSource OAuth (Microsoft) Microsoft OneNote notebooks
Google Keep GoogleKeepSource OAuth (Google) Google Keep notes

Web and Content

Sources for fetching content from the web.

Source Class Auth Method Description
YouTube YouTubeSource API key / OAuth YouTube video metadata and transcripts
Web WebSource None General web page content extraction
RSS RSSSource None RSS/Atom feed entries
Podcast PodcastSource None Podcast episodes from RSS feeds
arXiv ArxivSource None Academic papers from arXiv
Hacker News HackerNewsSource None Hacker News posts and comments
Reddit RedditSource API credentials Reddit posts and comments
Twitter/X TwitterSource API credentials Tweets and threads

Auth Integration

Most sources use PlanOpticon's unified auth system (see Auth API). The typical pattern within a source implementation:

from video_processor.auth import get_auth_manager

class MySource(BaseSource):
    def __init__(self):
        self._token = None

    def authenticate(self) -> bool:
        manager = get_auth_manager("my_service")
        if manager:
            token = manager.get_token()
            if token:
                self._token = token
                return True
        return False

    def list_videos(self, **kwargs) -> list[SourceFile]:
        if not self._token:
            raise RuntimeError("Not authenticated. Call authenticate() first.")
        # Use self._token to call the API
        ...

Usage Examples

Listing and downloading Zoom recordings

from pathlib import Path
from video_processor.sources import ZoomSource

source = ZoomSource()
if source.authenticate():
    recordings = source.list_videos()
    for rec in recordings:
        print(f"{rec.name} ({rec.size_bytes} bytes)")

    # Download all to a local directory
    paths = source.download_all(recordings, Path("./downloads"))

Fetching from multiple sources

from pathlib import Path
from video_processor.sources import GoogleDriveSource, NotionSource

# Google Drive
gdrive = GoogleDriveSource()
if gdrive.authenticate():
    files = gdrive.list_videos(
        folder_path="Meeting Recordings",
        patterns=["*.mp4", "*.webm"],
    )
    gdrive.download_all(files, Path("./drive-downloads"))

# Notion
notion = NotionSource()
if notion.authenticate():
    pages = notion.list_videos()  # Lists Notion pages
    for page in pages:
        print(f"Page: {page.name}")

YouTube content

from video_processor.sources import YouTubeSource

yt = YouTubeSource()
if yt.authenticate():
    videos = yt.list_videos(folder_path="https://youtube.com/playlist?list=...")
    for v in videos:
        print(f"{v.name} - {v.id}")

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button