PlanOpticon

planopticon / tests / test_sources.py
Source Blame History 2073 lines
0981a08… noreply 1 """Tests for all source connectors: import, instantiation, authenticate, list_videos."""
0981a08… noreply 2
ffef14a… noreply 3 import json
0981a08… noreply 4 import os
ffef14a… noreply 5 from pathlib import Path
0981a08… noreply 6 from unittest.mock import MagicMock, patch
0981a08… noreply 7
0981a08… noreply 8 import pytest
0981a08… noreply 9
ffef14a… noreply 10 from video_processor.sources.base import BaseSource, SourceFile
0981a08… noreply 11
0981a08… noreply 12 # ---------------------------------------------------------------------------
0981a08… noreply 13 # SourceFile model
0981a08… noreply 14 # ---------------------------------------------------------------------------
0981a08… noreply 15
0981a08… noreply 16
0981a08… noreply 17 def test_source_file_creation():
0981a08… noreply 18 sf = SourceFile(name="test.mp4", id="abc123")
0981a08… noreply 19 assert sf.name == "test.mp4"
0981a08… noreply 20 assert sf.id == "abc123"
0981a08… noreply 21 assert sf.size_bytes is None
0981a08… noreply 22 assert sf.mime_type is None
0981a08… noreply 23
0981a08… noreply 24
0981a08… noreply 25 def test_source_file_with_all_fields():
0981a08… noreply 26 sf = SourceFile(
0981a08… noreply 27 name="video.mp4",
0981a08… noreply 28 id="v1",
0981a08… noreply 29 size_bytes=1024,
0981a08… noreply 30 mime_type="video/mp4",
0981a08… noreply 31 modified_at="2025-01-01",
0981a08… noreply 32 path="folder/video.mp4",
0981a08… noreply 33 )
0981a08… noreply 34 assert sf.size_bytes == 1024
0981a08… noreply 35 assert sf.path == "folder/video.mp4"
0981a08… noreply 36
0981a08… noreply 37
0981a08… noreply 38 # ---------------------------------------------------------------------------
0981a08… noreply 39 # YouTubeSource
0981a08… noreply 40 # ---------------------------------------------------------------------------
0981a08… noreply 41
0981a08… noreply 42
0981a08… noreply 43 class TestYouTubeSource:
0981a08… noreply 44 def test_import(self):
0981a08… noreply 45 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 46
0981a08… noreply 47 assert YouTubeSource is not None
0981a08… noreply 48
0981a08… noreply 49 def test_constructor(self):
0981a08… noreply 50 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 51
0981a08… noreply 52 src = YouTubeSource(url="https://www.youtube.com/watch?v=dQw4w9WgXcQ")
0981a08… noreply 53 assert src.video_id == "dQw4w9WgXcQ"
0981a08… noreply 54 assert src.audio_only is False
0981a08… noreply 55
0981a08… noreply 56 def test_constructor_audio_only(self):
0981a08… noreply 57 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 58
0981a08… noreply 59 src = YouTubeSource(url="https://youtu.be/dQw4w9WgXcQ", audio_only=True)
0981a08… noreply 60 assert src.audio_only is True
0981a08… noreply 61
0981a08… noreply 62 def test_constructor_shorts_url(self):
0981a08… noreply 63 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 64
0981a08… noreply 65 src = YouTubeSource(url="https://youtube.com/shorts/dQw4w9WgXcQ")
0981a08… noreply 66 assert src.video_id == "dQw4w9WgXcQ"
0981a08… noreply 67
0981a08… noreply 68 def test_constructor_invalid_url(self):
0981a08… noreply 69 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 70
0981a08… noreply 71 with pytest.raises(ValueError, match="Could not extract"):
0981a08… noreply 72 YouTubeSource(url="https://example.com/not-youtube")
0981a08… noreply 73
0981a08… noreply 74 @patch.dict(os.environ, {}, clear=False)
0981a08… noreply 75 def test_authenticate_no_ytdlp(self):
0981a08… noreply 76 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 77
0981a08… noreply 78 src = YouTubeSource(url="https://youtube.com/watch?v=dQw4w9WgXcQ")
0981a08… noreply 79 with patch.dict("sys.modules", {"yt_dlp": None}):
0981a08… noreply 80 # yt_dlp import will fail
0981a08… noreply 81 result = src.authenticate()
0981a08… noreply 82 # Result depends on whether yt_dlp is installed; just check it returns bool
0981a08… noreply 83 assert isinstance(result, bool)
0981a08… noreply 84
0981a08… noreply 85 def test_list_videos(self):
0981a08… noreply 86 from video_processor.sources.youtube_source import YouTubeSource
0981a08… noreply 87
0981a08… noreply 88 mock_ydl = MagicMock()
0981a08… noreply 89 mock_ydl.__enter__ = MagicMock(return_value=mock_ydl)
0981a08… noreply 90 mock_ydl.__exit__ = MagicMock(return_value=False)
0981a08… noreply 91 mock_ydl.extract_info.return_value = {
0981a08… noreply 92 "title": "Test Video",
0981a08… noreply 93 "filesize": 1000,
0981a08… noreply 94 }
0981a08… noreply 95 mock_ydl_cls = MagicMock(return_value=mock_ydl)
0981a08… noreply 96 mock_module = MagicMock()
0981a08… noreply 97 mock_module.YoutubeDL = mock_ydl_cls
0981a08… noreply 98
0981a08… noreply 99 with patch.dict("sys.modules", {"yt_dlp": mock_module}):
0981a08… noreply 100 src = YouTubeSource(url="https://youtube.com/watch?v=dQw4w9WgXcQ")
0981a08… noreply 101 files = src.list_videos()
0981a08… noreply 102 assert isinstance(files, list)
0981a08… noreply 103 assert len(files) == 1
0981a08… noreply 104 assert files[0].name == "Test Video"
0981a08… noreply 105
0981a08… noreply 106
0981a08… noreply 107 # ---------------------------------------------------------------------------
0981a08… noreply 108 # WebSource
0981a08… noreply 109 # ---------------------------------------------------------------------------
0981a08… noreply 110
0981a08… noreply 111
0981a08… noreply 112 class TestWebSource:
0981a08… noreply 113 def test_import(self):
0981a08… noreply 114 from video_processor.sources.web_source import WebSource
0981a08… noreply 115
0981a08… noreply 116 assert WebSource is not None
0981a08… noreply 117
0981a08… noreply 118 def test_constructor(self):
0981a08… noreply 119 from video_processor.sources.web_source import WebSource
0981a08… noreply 120
0981a08… noreply 121 src = WebSource(url="https://example.com/page")
0981a08… noreply 122 assert src.url == "https://example.com/page"
0981a08… noreply 123
0981a08… noreply 124 def test_authenticate(self):
0981a08… noreply 125 from video_processor.sources.web_source import WebSource
0981a08… noreply 126
0981a08… noreply 127 src = WebSource(url="https://example.com")
0981a08… noreply 128 assert src.authenticate() is True
0981a08… noreply 129
0981a08… noreply 130 def test_list_videos(self):
0981a08… noreply 131 from video_processor.sources.web_source import WebSource
0981a08… noreply 132
0981a08… noreply 133 src = WebSource(url="https://example.com/article")
0981a08… noreply 134 files = src.list_videos()
0981a08… noreply 135 assert isinstance(files, list)
0981a08… noreply 136 assert len(files) == 1
0981a08… noreply 137 assert files[0].mime_type == "text/html"
0981a08… noreply 138
0981a08… noreply 139
0981a08… noreply 140 # ---------------------------------------------------------------------------
0981a08… noreply 141 # GitHubSource
0981a08… noreply 142 # ---------------------------------------------------------------------------
0981a08… noreply 143
0981a08… noreply 144
0981a08… noreply 145 class TestGitHubSource:
0981a08… noreply 146 def test_import(self):
0981a08… noreply 147 from video_processor.sources.github_source import GitHubSource
0981a08… noreply 148
0981a08… noreply 149 assert GitHubSource is not None
0981a08… noreply 150
0981a08… noreply 151 def test_constructor(self):
0981a08… noreply 152 from video_processor.sources.github_source import GitHubSource
0981a08… noreply 153
0981a08… noreply 154 src = GitHubSource(repo="owner/repo")
0981a08… noreply 155 assert src.repo == "owner/repo"
0981a08… noreply 156 assert src.include_issues is True
0981a08… noreply 157 assert src.include_prs is True
0981a08… noreply 158
0981a08… noreply 159 @patch.dict(os.environ, {"GITHUB_TOKEN": "ghp_test123"})
0981a08… noreply 160 def test_authenticate_with_env_token(self):
0981a08… noreply 161 from video_processor.sources.github_source import GitHubSource
0981a08… noreply 162
0981a08… noreply 163 src = GitHubSource(repo="owner/repo")
0981a08… noreply 164 result = src.authenticate()
0981a08… noreply 165 assert result is True
0981a08… noreply 166 assert src._token == "ghp_test123"
0981a08… noreply 167
0981a08… noreply 168 @patch("requests.get")
0981a08… noreply 169 @patch.dict(os.environ, {"GITHUB_TOKEN": "ghp_test123"})
0981a08… noreply 170 def test_list_videos(self, mock_get):
0981a08… noreply 171 from video_processor.sources.github_source import GitHubSource
0981a08… noreply 172
0981a08… noreply 173 # Mock responses for readme, issues, and PRs
0981a08… noreply 174 readme_resp = MagicMock()
0981a08… noreply 175 readme_resp.ok = True
0981a08… noreply 176
0981a08… noreply 177 issues_resp = MagicMock()
0981a08… noreply 178 issues_resp.ok = True
0981a08… noreply 179 issues_resp.json.return_value = [
0981a08… noreply 180 {"number": 1, "title": "Bug report", "id": 1},
0981a08… noreply 181 {"number": 2, "title": "Feature request", "id": 2, "pull_request": {}},
0981a08… noreply 182 ]
0981a08… noreply 183
0981a08… noreply 184 prs_resp = MagicMock()
0981a08… noreply 185 prs_resp.ok = True
0981a08… noreply 186 prs_resp.json.return_value = [
0981a08… noreply 187 {"number": 3, "title": "Fix bug"},
0981a08… noreply 188 ]
0981a08… noreply 189
0981a08… noreply 190 mock_get.side_effect = [readme_resp, issues_resp, prs_resp]
0981a08… noreply 191
0981a08… noreply 192 src = GitHubSource(repo="owner/repo")
0981a08… noreply 193 src.authenticate()
0981a08… noreply 194 files = src.list_videos()
0981a08… noreply 195 assert isinstance(files, list)
0981a08… noreply 196 # README + 1 issue (one filtered as PR) + 1 PR = 3
0981a08… noreply 197 assert len(files) == 3
0981a08… noreply 198
0981a08… noreply 199
0981a08… noreply 200 # ---------------------------------------------------------------------------
0981a08… noreply 201 # RedditSource
0981a08… noreply 202 # ---------------------------------------------------------------------------
0981a08… noreply 203
0981a08… noreply 204
0981a08… noreply 205 class TestRedditSource:
0981a08… noreply 206 def test_import(self):
0981a08… noreply 207 from video_processor.sources.reddit_source import RedditSource
0981a08… noreply 208
0981a08… noreply 209 assert RedditSource is not None
0981a08… noreply 210
0981a08… noreply 211 def test_constructor(self):
0981a08… noreply 212 from video_processor.sources.reddit_source import RedditSource
0981a08… noreply 213
0981a08… noreply 214 src = RedditSource(url="https://reddit.com/r/python/comments/abc123/test/")
0981a08… noreply 215 assert src.url == "https://reddit.com/r/python/comments/abc123/test"
0981a08… noreply 216
0981a08… noreply 217 def test_authenticate(self):
0981a08… noreply 218 from video_processor.sources.reddit_source import RedditSource
0981a08… noreply 219
0981a08… noreply 220 src = RedditSource(url="https://reddit.com/r/test")
0981a08… noreply 221 assert src.authenticate() is True
0981a08… noreply 222
0981a08… noreply 223 def test_list_videos(self):
0981a08… noreply 224 from video_processor.sources.reddit_source import RedditSource
0981a08… noreply 225
0981a08… noreply 226 src = RedditSource(url="https://reddit.com/r/python/comments/abc/post")
0981a08… noreply 227 files = src.list_videos()
0981a08… noreply 228 assert isinstance(files, list)
0981a08… noreply 229 assert len(files) == 1
0981a08… noreply 230 assert files[0].mime_type == "text/plain"
0981a08… noreply 231
0981a08… noreply 232
0981a08… noreply 233 # ---------------------------------------------------------------------------
0981a08… noreply 234 # HackerNewsSource
0981a08… noreply 235 # ---------------------------------------------------------------------------
0981a08… noreply 236
0981a08… noreply 237
0981a08… noreply 238 class TestHackerNewsSource:
0981a08… noreply 239 def test_import(self):
0981a08… noreply 240 from video_processor.sources.hackernews_source import HackerNewsSource
0981a08… noreply 241
0981a08… noreply 242 assert HackerNewsSource is not None
0981a08… noreply 243
0981a08… noreply 244 def test_constructor(self):
0981a08… noreply 245 from video_processor.sources.hackernews_source import HackerNewsSource
0981a08… noreply 246
0981a08… noreply 247 src = HackerNewsSource(item_id=12345678)
0981a08… noreply 248 assert src.item_id == 12345678
0981a08… noreply 249 assert src.max_comments == 200
0981a08… noreply 250
0981a08… noreply 251 def test_authenticate(self):
0981a08… noreply 252 from video_processor.sources.hackernews_source import HackerNewsSource
0981a08… noreply 253
0981a08… noreply 254 src = HackerNewsSource(item_id=12345678)
0981a08… noreply 255 assert src.authenticate() is True
0981a08… noreply 256
0981a08… noreply 257 def test_list_videos(self):
0981a08… noreply 258 from video_processor.sources.hackernews_source import HackerNewsSource
0981a08… noreply 259
0981a08… noreply 260 src = HackerNewsSource(item_id=99999)
0981a08… noreply 261 files = src.list_videos()
0981a08… noreply 262 assert isinstance(files, list)
0981a08… noreply 263 assert len(files) == 1
0981a08… noreply 264 assert files[0].id == "99999"
0981a08… noreply 265
0981a08… noreply 266
0981a08… noreply 267 # ---------------------------------------------------------------------------
0981a08… noreply 268 # RSSSource
0981a08… noreply 269 # ---------------------------------------------------------------------------
0981a08… noreply 270
0981a08… noreply 271
0981a08… noreply 272 class TestRSSSource:
0981a08… noreply 273 def test_import(self):
0981a08… noreply 274 from video_processor.sources.rss_source import RSSSource
0981a08… noreply 275
0981a08… noreply 276 assert RSSSource is not None
0981a08… noreply 277
0981a08… noreply 278 def test_constructor(self):
0981a08… noreply 279 from video_processor.sources.rss_source import RSSSource
0981a08… noreply 280
0981a08… noreply 281 src = RSSSource(url="https://example.com/feed.xml", max_entries=20)
0981a08… noreply 282 assert src.url == "https://example.com/feed.xml"
0981a08… noreply 283 assert src.max_entries == 20
0981a08… noreply 284
0981a08… noreply 285 def test_authenticate(self):
0981a08… noreply 286 from video_processor.sources.rss_source import RSSSource
0981a08… noreply 287
0981a08… noreply 288 src = RSSSource(url="https://example.com/feed.xml")
0981a08… noreply 289 assert src.authenticate() is True
0981a08… noreply 290
0981a08… noreply 291 @patch("requests.get")
0981a08… noreply 292 def test_list_videos(self, mock_get):
0981a08… noreply 293 from video_processor.sources.rss_source import RSSSource
0981a08… noreply 294
0981a08… noreply 295 rss_xml = """<?xml version="1.0"?>
0981a08… noreply 296 <rss version="2.0">
0981a08… noreply 297 <channel>
0981a08… noreply 298 <item>
0981a08… noreply 299 <title>Entry 1</title>
0981a08… noreply 300 <link>https://example.com/1</link>
0981a08… noreply 301 <description>First entry</description>
0981a08… noreply 302 <pubDate>Mon, 01 Jan 2025 00:00:00 GMT</pubDate>
0981a08… noreply 303 </item>
0981a08… noreply 304 </channel>
0981a08… noreply 305 </rss>"""
0981a08… noreply 306 mock_resp = MagicMock()
0981a08… noreply 307 mock_resp.text = rss_xml
0981a08… noreply 308 mock_resp.raise_for_status = MagicMock()
0981a08… noreply 309 mock_get.return_value = mock_resp
0981a08… noreply 310
0981a08… noreply 311 src = RSSSource(url="https://example.com/feed.xml")
0981a08… noreply 312 files = src.list_videos()
0981a08… noreply 313 assert isinstance(files, list)
0981a08… noreply 314 assert len(files) >= 1
0981a08… noreply 315
0981a08… noreply 316
0981a08… noreply 317 # ---------------------------------------------------------------------------
0981a08… noreply 318 # PodcastSource
0981a08… noreply 319 # ---------------------------------------------------------------------------
0981a08… noreply 320
0981a08… noreply 321
0981a08… noreply 322 class TestPodcastSource:
0981a08… noreply 323 def test_import(self):
0981a08… noreply 324 from video_processor.sources.podcast_source import PodcastSource
0981a08… noreply 325
0981a08… noreply 326 assert PodcastSource is not None
0981a08… noreply 327
0981a08… noreply 328 def test_constructor(self):
0981a08… noreply 329 from video_processor.sources.podcast_source import PodcastSource
0981a08… noreply 330
0981a08… noreply 331 src = PodcastSource(feed_url="https://example.com/podcast.xml", max_episodes=5)
0981a08… noreply 332 assert src.feed_url == "https://example.com/podcast.xml"
0981a08… noreply 333 assert src.max_episodes == 5
0981a08… noreply 334
0981a08… noreply 335 def test_authenticate(self):
0981a08… noreply 336 from video_processor.sources.podcast_source import PodcastSource
0981a08… noreply 337
0981a08… noreply 338 src = PodcastSource(feed_url="https://example.com/podcast.xml")
0981a08… noreply 339 assert src.authenticate() is True
0981a08… noreply 340
0981a08… noreply 341 @patch("requests.get")
0981a08… noreply 342 def test_list_videos(self, mock_get):
0981a08… noreply 343 from video_processor.sources.podcast_source import PodcastSource
0981a08… noreply 344
0981a08… noreply 345 podcast_xml = """<?xml version="1.0"?>
0981a08… noreply 346 <rss version="2.0">
0981a08… noreply 347 <channel>
0981a08… noreply 348 <item>
0981a08… noreply 349 <title>Episode 1</title>
0981a08… noreply 350 <enclosure url="https://example.com/ep1.mp3" type="audio/mpeg" />
0981a08… noreply 351 <pubDate>Mon, 01 Jan 2025 00:00:00 GMT</pubDate>
0981a08… noreply 352 </item>
0981a08… noreply 353 </channel>
0981a08… noreply 354 </rss>"""
0981a08… noreply 355 mock_resp = MagicMock()
0981a08… noreply 356 mock_resp.text = podcast_xml
0981a08… noreply 357 mock_resp.raise_for_status = MagicMock()
0981a08… noreply 358 mock_get.return_value = mock_resp
0981a08… noreply 359
0981a08… noreply 360 src = PodcastSource(feed_url="https://example.com/podcast.xml")
0981a08… noreply 361 files = src.list_videos()
0981a08… noreply 362 assert isinstance(files, list)
0981a08… noreply 363 assert len(files) == 1
0981a08… noreply 364 assert files[0].mime_type == "audio/mpeg"
0981a08… noreply 365
0981a08… noreply 366
0981a08… noreply 367 # ---------------------------------------------------------------------------
0981a08… noreply 368 # TwitterSource
0981a08… noreply 369 # ---------------------------------------------------------------------------
0981a08… noreply 370
0981a08… noreply 371
0981a08… noreply 372 class TestTwitterSource:
0981a08… noreply 373 def test_import(self):
0981a08… noreply 374 from video_processor.sources.twitter_source import TwitterSource
0981a08… noreply 375
0981a08… noreply 376 assert TwitterSource is not None
0981a08… noreply 377
0981a08… noreply 378 def test_constructor(self):
0981a08… noreply 379 from video_processor.sources.twitter_source import TwitterSource
0981a08… noreply 380
0981a08… noreply 381 src = TwitterSource(url="https://twitter.com/user/status/123456")
0981a08… noreply 382 assert src.url == "https://twitter.com/user/status/123456"
0981a08… noreply 383
0981a08… noreply 384 @patch.dict(os.environ, {"TWITTER_BEARER_TOKEN": "test_token"})
0981a08… noreply 385 def test_authenticate_with_bearer_token(self):
0981a08… noreply 386 from video_processor.sources.twitter_source import TwitterSource
0981a08… noreply 387
0981a08… noreply 388 src = TwitterSource(url="https://twitter.com/user/status/123456")
0981a08… noreply 389 assert src.authenticate() is True
0981a08… noreply 390
0981a08… noreply 391 @patch.dict(os.environ, {}, clear=True)
0981a08… noreply 392 def test_authenticate_no_token_no_gallery_dl(self):
0981a08… noreply 393 from video_processor.sources.twitter_source import TwitterSource
0981a08… noreply 394
0981a08… noreply 395 src = TwitterSource(url="https://twitter.com/user/status/123456")
0981a08… noreply 396 with patch.dict("sys.modules", {"gallery_dl": None}):
0981a08… noreply 397 result = src.authenticate()
0981a08… noreply 398 assert isinstance(result, bool)
0981a08… noreply 399
0981a08… noreply 400 def test_list_videos(self):
0981a08… noreply 401 from video_processor.sources.twitter_source import TwitterSource
0981a08… noreply 402
0981a08… noreply 403 src = TwitterSource(url="https://twitter.com/user/status/123456")
0981a08… noreply 404 files = src.list_videos()
0981a08… noreply 405 assert isinstance(files, list)
0981a08… noreply 406 assert len(files) == 1
0981a08… noreply 407
0981a08… noreply 408
0981a08… noreply 409 # ---------------------------------------------------------------------------
0981a08… noreply 410 # ArxivSource
0981a08… noreply 411 # ---------------------------------------------------------------------------
0981a08… noreply 412
0981a08… noreply 413
0981a08… noreply 414 class TestArxivSource:
0981a08… noreply 415 def test_import(self):
0981a08… noreply 416 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 417
0981a08… noreply 418 assert ArxivSource is not None
0981a08… noreply 419
0981a08… noreply 420 def test_constructor(self):
0981a08… noreply 421 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 422
0981a08… noreply 423 src = ArxivSource(url_or_id="2301.07041")
0981a08… noreply 424 assert src.arxiv_id == "2301.07041"
0981a08… noreply 425
0981a08… noreply 426 def test_constructor_from_url(self):
0981a08… noreply 427 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 428
0981a08… noreply 429 src = ArxivSource(url_or_id="https://arxiv.org/abs/2301.07041v2")
0981a08… noreply 430 assert src.arxiv_id == "2301.07041v2"
0981a08… noreply 431
0981a08… noreply 432 def test_constructor_invalid(self):
0981a08… noreply 433 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 434
0981a08… noreply 435 with pytest.raises(ValueError, match="Could not extract"):
0981a08… noreply 436 ArxivSource(url_or_id="not-an-arxiv-id")
0981a08… noreply 437
0981a08… noreply 438 def test_authenticate(self):
0981a08… noreply 439 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 440
0981a08… noreply 441 src = ArxivSource(url_or_id="2301.07041")
0981a08… noreply 442 assert src.authenticate() is True
0981a08… noreply 443
0981a08… noreply 444 @patch("requests.get")
0981a08… noreply 445 def test_list_videos(self, mock_get):
0981a08… noreply 446 from video_processor.sources.arxiv_source import ArxivSource
0981a08… noreply 447
0981a08… noreply 448 atom_xml = """<?xml version="1.0"?>
0981a08… noreply 449 <feed xmlns="http://www.w3.org/2005/Atom"
0981a08… noreply 450 xmlns:arxiv="http://arxiv.org/schemas/atom">
0981a08… noreply 451 <entry>
0981a08… noreply 452 <title>Test Paper</title>
0981a08… noreply 453 <summary>Abstract text here.</summary>
0981a08… noreply 454 <author><name>Author One</name></author>
0981a08… noreply 455 <published>2023-01-15T00:00:00Z</published>
0981a08… noreply 456 </entry>
0981a08… noreply 457 </feed>"""
0981a08… noreply 458 mock_resp = MagicMock()
0981a08… noreply 459 mock_resp.text = atom_xml
0981a08… noreply 460 mock_resp.raise_for_status = MagicMock()
0981a08… noreply 461 mock_get.return_value = mock_resp
0981a08… noreply 462
0981a08… noreply 463 src = ArxivSource(url_or_id="2301.07041")
0981a08… noreply 464 files = src.list_videos()
0981a08… noreply 465 assert isinstance(files, list)
0981a08… noreply 466 assert len(files) == 2 # metadata + pdf
0981a08… noreply 467
0981a08… noreply 468
0981a08… noreply 469 # ---------------------------------------------------------------------------
0981a08… noreply 470 # S3Source
0981a08… noreply 471 # ---------------------------------------------------------------------------
0981a08… noreply 472
0981a08… noreply 473
0981a08… noreply 474 class TestS3Source:
0981a08… noreply 475 def test_import(self):
0981a08… noreply 476 from video_processor.sources.s3_source import S3Source
0981a08… noreply 477
0981a08… noreply 478 assert S3Source is not None
0981a08… noreply 479
0981a08… noreply 480 def test_constructor(self):
0981a08… noreply 481 from video_processor.sources.s3_source import S3Source
0981a08… noreply 482
0981a08… noreply 483 src = S3Source(bucket="my-bucket", prefix="videos/", region="us-east-1")
0981a08… noreply 484 assert src.bucket == "my-bucket"
0981a08… noreply 485 assert src.prefix == "videos/"
0981a08… noreply 486 assert src.region == "us-east-1"
0981a08… noreply 487
0981a08… noreply 488 def test_authenticate_success(self):
0981a08… noreply 489 from video_processor.sources.s3_source import S3Source
0981a08… noreply 490
0981a08… noreply 491 mock_client = MagicMock()
0981a08… noreply 492 mock_client.head_bucket.return_value = {}
0981a08… noreply 493 mock_boto3 = MagicMock()
0981a08… noreply 494 mock_boto3.client.return_value = mock_client
0981a08… noreply 495
0981a08… noreply 496 with patch.dict("sys.modules", {"boto3": mock_boto3}):
0981a08… noreply 497 src = S3Source(bucket="my-bucket")
0981a08… noreply 498 assert src.authenticate() is True
0981a08… noreply 499
0981a08… noreply 500 def test_authenticate_failure(self):
0981a08… noreply 501 from video_processor.sources.s3_source import S3Source
0981a08… noreply 502
0981a08… noreply 503 mock_client = MagicMock()
0981a08… noreply 504 mock_client.head_bucket.side_effect = Exception("Access Denied")
0981a08… noreply 505 mock_boto3 = MagicMock()
0981a08… noreply 506 mock_boto3.client.return_value = mock_client
0981a08… noreply 507
0981a08… noreply 508 with patch.dict("sys.modules", {"boto3": mock_boto3}):
0981a08… noreply 509 src = S3Source(bucket="bad-bucket")
0981a08… noreply 510 assert src.authenticate() is False
0981a08… noreply 511
0981a08… noreply 512 def test_list_videos(self):
0981a08… noreply 513 from video_processor.sources.s3_source import S3Source
0981a08… noreply 514
0981a08… noreply 515 mock_client = MagicMock()
0981a08… noreply 516 mock_client.head_bucket.return_value = {}
0981a08… noreply 517 paginator = MagicMock()
0981a08… noreply 518 mock_client.get_paginator.return_value = paginator
0981a08… noreply 519 paginator.paginate.return_value = [
0981a08… noreply 520 {
0981a08… noreply 521 "Contents": [
0981a08… noreply 522 {"Key": "videos/clip.mp4", "Size": 5000},
0981a08… noreply 523 {"Key": "videos/notes.txt", "Size": 100},
0981a08… noreply 524 {"Key": "videos/movie.mkv", "Size": 90000},
0981a08… noreply 525 ]
0981a08… noreply 526 }
0981a08… noreply 527 ]
0981a08… noreply 528 mock_boto3 = MagicMock()
0981a08… noreply 529 mock_boto3.client.return_value = mock_client
0981a08… noreply 530
0981a08… noreply 531 with patch.dict("sys.modules", {"boto3": mock_boto3}):
0981a08… noreply 532 src = S3Source(bucket="my-bucket")
0981a08… noreply 533 src.authenticate()
0981a08… noreply 534 files = src.list_videos()
0981a08… noreply 535 assert isinstance(files, list)
0981a08… noreply 536 # Only .mp4 and .mkv are video extensions
0981a08… noreply 537 assert len(files) == 2
0981a08… noreply 538 names = [f.name for f in files]
0981a08… noreply 539 assert "clip.mp4" in names
0981a08… noreply 540 assert "movie.mkv" in names
0981a08… noreply 541
0981a08… noreply 542
0981a08… noreply 543 # ---------------------------------------------------------------------------
0981a08… noreply 544 # GWSSource
0981a08… noreply 545 # ---------------------------------------------------------------------------
0981a08… noreply 546
0981a08… noreply 547
0981a08… noreply 548 class TestGWSSource:
0981a08… noreply 549 def test_import(self):
0981a08… noreply 550 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 551
0981a08… noreply 552 assert GWSSource is not None
0981a08… noreply 553
0981a08… noreply 554 def test_constructor_defaults(self):
0981a08… noreply 555 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 556
0981a08… noreply 557 src = GWSSource()
0981a08… noreply 558 assert src.folder_id is None
0981a08… noreply 559 assert src.query is None
0981a08… noreply 560 assert src.doc_ids == []
0981a08… noreply 561
0981a08… noreply 562 def test_constructor_with_folder(self):
0981a08… noreply 563 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 564
0981a08… noreply 565 src = GWSSource(folder_id="1abc", query="name contains 'spec'")
0981a08… noreply 566 assert src.folder_id == "1abc"
0981a08… noreply 567 assert src.query == "name contains 'spec'"
0981a08… noreply 568
0981a08… noreply 569 def test_constructor_with_doc_ids(self):
0981a08… noreply 570 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 571
0981a08… noreply 572 src = GWSSource(doc_ids=["doc1", "doc2"])
0981a08… noreply 573 assert src.doc_ids == ["doc1", "doc2"]
0981a08… noreply 574
0981a08… noreply 575 @patch("shutil.which", return_value=None)
0981a08… noreply 576 def test_authenticate_no_gws(self, _mock_which):
0981a08… noreply 577 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 578
0981a08… noreply 579 src = GWSSource()
0981a08… noreply 580 assert src.authenticate() is False
0981a08… noreply 581
0981a08… noreply 582 @patch("video_processor.sources.gws_source._run_gws")
0981a08… noreply 583 @patch("shutil.which", return_value="/usr/local/bin/gws")
0981a08… noreply 584 def test_authenticate_success(self, _mock_which, mock_run):
0981a08… noreply 585 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 586
0981a08… noreply 587 mock_run.return_value = {"connectedAs": "[email protected]"}
0981a08… noreply 588 src = GWSSource()
0981a08… noreply 589 assert src.authenticate() is True
0981a08… noreply 590
0981a08… noreply 591 @patch("video_processor.sources.gws_source._run_gws")
0981a08… noreply 592 @patch("shutil.which", return_value="/usr/local/bin/gws")
0981a08… noreply 593 def test_list_videos(self, _mock_which, mock_run):
0981a08… noreply 594 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 595
0981a08… noreply 596 mock_run.return_value = {
0981a08… noreply 597 "files": [
0981a08… noreply 598 {
0981a08… noreply 599 "id": "doc123",
0981a08… noreply 600 "name": "Project Spec",
0981a08… noreply 601 "mimeType": "application/vnd.google-apps.document",
0981a08… noreply 602 "modifiedTime": "2026-01-01T00:00:00Z",
0981a08… noreply 603 },
0981a08… noreply 604 {
0981a08… noreply 605 "id": "sheet456",
0981a08… noreply 606 "name": "Budget",
0981a08… noreply 607 "mimeType": "application/vnd.google-apps.spreadsheet",
0981a08… noreply 608 },
0981a08… noreply 609 ]
0981a08… noreply 610 }
0981a08… noreply 611 src = GWSSource(folder_id="folder1")
0981a08… noreply 612 files = src.list_videos()
0981a08… noreply 613 assert len(files) == 2
0981a08… noreply 614 assert files[0].name == "Project Spec"
0981a08… noreply 615 assert files[1].id == "sheet456"
0981a08… noreply 616
0981a08… noreply 617 @patch("video_processor.sources.gws_source._run_gws")
0981a08… noreply 618 @patch("shutil.which", return_value="/usr/local/bin/gws")
0981a08… noreply 619 def test_list_videos_with_doc_ids(self, _mock_which, mock_run):
0981a08… noreply 620 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 621
0981a08… noreply 622 mock_run.return_value = {
0981a08… noreply 623 "id": "doc123",
0981a08… noreply 624 "name": "My Doc",
0981a08… noreply 625 "mimeType": "application/vnd.google-apps.document",
0981a08… noreply 626 }
0981a08… noreply 627 src = GWSSource(doc_ids=["doc123"])
0981a08… noreply 628 files = src.list_videos()
0981a08… noreply 629 assert len(files) == 1
0981a08… noreply 630 assert files[0].name == "My Doc"
0981a08… noreply 631
0981a08… noreply 632 def test_result_to_source_file(self):
0981a08… noreply 633 from video_processor.sources.gws_source import _result_to_source_file
0981a08… noreply 634
0981a08… noreply 635 sf = _result_to_source_file(
0981a08… noreply 636 {
0981a08… noreply 637 "id": "abc",
0981a08… noreply 638 "name": "Test Doc",
0981a08… noreply 639 "mimeType": "text/plain",
0981a08… noreply 640 "size": "1024",
0981a08… noreply 641 "modifiedTime": "2026-03-01",
0981a08… noreply 642 }
0981a08… noreply 643 )
0981a08… noreply 644 assert sf.name == "Test Doc"
0981a08… noreply 645 assert sf.id == "abc"
0981a08… noreply 646 assert sf.size_bytes == 1024
0981a08… noreply 647 assert sf.mime_type == "text/plain"
0981a08… noreply 648
0981a08… noreply 649 @patch("video_processor.sources.gws_source._run_gws")
0981a08… noreply 650 def test_get_doc_text(self, mock_run):
0981a08… noreply 651 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 652
0981a08… noreply 653 mock_run.return_value = {
0981a08… noreply 654 "body": {
0981a08… noreply 655 "content": [
0981a08… noreply 656 {
0981a08… noreply 657 "paragraph": {
0981a08… noreply 658 "elements": [
0981a08… noreply 659 {"textRun": {"content": "Hello world\n"}},
0981a08… noreply 660 ]
0981a08… noreply 661 }
0981a08… noreply 662 },
0981a08… noreply 663 {
0981a08… noreply 664 "paragraph": {
0981a08… noreply 665 "elements": [
0981a08… noreply 666 {"textRun": {"content": "Second paragraph\n"}},
0981a08… noreply 667 ]
0981a08… noreply 668 }
0981a08… noreply 669 },
0981a08… noreply 670 ]
0981a08… noreply 671 }
0981a08… noreply 672 }
0981a08… noreply 673 src = GWSSource()
0981a08… noreply 674 text = src._get_doc_text("doc123")
0981a08… noreply 675 assert "Hello world" in text
0981a08… noreply 676 assert "Second paragraph" in text
0981a08… noreply 677
0981a08… noreply 678 @patch("video_processor.sources.gws_source._run_gws")
0981a08… noreply 679 def test_collate(self, mock_run):
0981a08… noreply 680 from video_processor.sources.gws_source import GWSSource
0981a08… noreply 681
0981a08… noreply 682 # First call: list files, second+: export each
0981a08… noreply 683 mock_run.side_effect = [
0981a08… noreply 684 {
0981a08… noreply 685 "files": [
0981a08… noreply 686 {
0981a08… noreply 687 "id": "d1",
0981a08… noreply 688 "name": "Doc A",
0981a08… noreply 689 "mimeType": "application/vnd.google-apps.document",
0981a08… noreply 690 },
0981a08… noreply 691 ]
0981a08… noreply 692 },
0981a08… noreply 693 {"raw": "Content of Doc A"},
0981a08… noreply 694 ]
0981a08… noreply 695 src = GWSSource(folder_id="f1")
0981a08… noreply 696 result = src.collate()
0981a08… noreply 697 assert "Doc A" in result
0981a08… noreply 698 assert "Content of Doc A" in result
0981a08… noreply 699
0981a08… noreply 700
0981a08… noreply 701 # ---------------------------------------------------------------------------
0981a08… noreply 702 # M365Source
0981a08… noreply 703 # ---------------------------------------------------------------------------
0981a08… noreply 704
0981a08… noreply 705
0981a08… noreply 706 class TestM365Source:
0981a08… noreply 707 def test_import(self):
0981a08… noreply 708 from video_processor.sources.m365_source import M365Source
0981a08… noreply 709
0981a08… noreply 710 assert M365Source is not None
0981a08… noreply 711
0981a08… noreply 712 def test_constructor(self):
0981a08… noreply 713 from video_processor.sources.m365_source import M365Source
0981a08… noreply 714
0981a08… noreply 715 src = M365Source(
0981a08… noreply 716 web_url="https://contoso.sharepoint.com/sites/proj",
0981a08… noreply 717 folder_url="/sites/proj/Shared Documents",
0981a08… noreply 718 )
0981a08… noreply 719 assert src.web_url == "https://contoso.sharepoint.com/sites/proj"
0981a08… noreply 720 assert src.folder_url == "/sites/proj/Shared Documents"
0981a08… noreply 721 assert src.file_ids == []
0981a08… noreply 722 assert src.recursive is False
0981a08… noreply 723
0981a08… noreply 724 def test_constructor_with_file_ids(self):
0981a08… noreply 725 from video_processor.sources.m365_source import M365Source
0981a08… noreply 726
0981a08… noreply 727 src = M365Source(
0981a08… noreply 728 web_url="https://contoso.sharepoint.com",
0981a08… noreply 729 file_ids=["id1", "id2"],
0981a08… noreply 730 )
0981a08… noreply 731 assert src.file_ids == ["id1", "id2"]
0981a08… noreply 732
0981a08… noreply 733 @patch("shutil.which", return_value=None)
0981a08… noreply 734 def test_authenticate_no_m365(self, _mock_which):
0981a08… noreply 735 from video_processor.sources.m365_source import M365Source
0981a08… noreply 736
0981a08… noreply 737 src = M365Source(web_url="https://contoso.sharepoint.com")
0981a08… noreply 738 assert src.authenticate() is False
0981a08… noreply 739
0981a08… noreply 740 @patch("video_processor.sources.m365_source._run_m365")
0981a08… noreply 741 @patch("shutil.which", return_value="/usr/local/bin/m365")
0981a08… noreply 742 def test_authenticate_logged_in(self, _mock_which, mock_run):
0981a08… noreply 743 from video_processor.sources.m365_source import M365Source
0981a08… noreply 744
0981a08… noreply 745 mock_run.return_value = {"connectedAs": "[email protected]"}
0981a08… noreply 746 src = M365Source(web_url="https://contoso.sharepoint.com")
0981a08… noreply 747 assert src.authenticate() is True
0981a08… noreply 748
0981a08… noreply 749 @patch("video_processor.sources.m365_source._run_m365")
0981a08… noreply 750 @patch("shutil.which", return_value="/usr/local/bin/m365")
0981a08… noreply 751 def test_authenticate_not_logged_in(self, _mock_which, mock_run):
0981a08… noreply 752 from video_processor.sources.m365_source import M365Source
0981a08… noreply 753
0981a08… noreply 754 mock_run.return_value = {}
0981a08… noreply 755 src = M365Source(web_url="https://contoso.sharepoint.com")
0981a08… noreply 756 assert src.authenticate() is False
0981a08… noreply 757
0981a08… noreply 758 @patch("video_processor.sources.m365_source._run_m365")
0981a08… noreply 759 @patch("shutil.which", return_value="/usr/local/bin/m365")
0981a08… noreply 760 def test_list_videos(self, _mock_which, mock_run):
0981a08… noreply 761 from video_processor.sources.m365_source import M365Source
0981a08… noreply 762
0981a08… noreply 763 mock_run.side_effect = [
0981a08… noreply 764 {"connectedAs": "[email protected]"}, # authenticate
0981a08… noreply 765 [
0981a08… noreply 766 {
0981a08… noreply 767 "Name": "spec.docx",
0981a08… noreply 768 "UniqueId": "uid-1",
0981a08… noreply 769 "Length": "20480",
0981a08… noreply 770 "ServerRelativeUrl": "/sites/proj/docs/spec.docx",
0981a08… noreply 771 },
0981a08… noreply 772 {
0981a08… noreply 773 "Name": "budget.xlsx",
0981a08… noreply 774 "UniqueId": "uid-2",
0981a08… noreply 775 "Length": "10240",
0981a08… noreply 776 "ServerRelativeUrl": "/sites/proj/docs/budget.xlsx",
0981a08… noreply 777 },
0981a08… noreply 778 {
0981a08… noreply 779 "Name": "image.png",
0981a08… noreply 780 "UniqueId": "uid-3",
0981a08… noreply 781 "Length": "5000",
0981a08… noreply 782 "ServerRelativeUrl": "/sites/proj/docs/image.png",
0981a08… noreply 783 },
0981a08… noreply 784 ],
0981a08… noreply 785 ]
0981a08… noreply 786 src = M365Source(
0981a08… noreply 787 web_url="https://contoso.sharepoint.com/sites/proj",
0981a08… noreply 788 folder_url="/sites/proj/docs",
0981a08… noreply 789 )
0981a08… noreply 790 src.authenticate()
0981a08… noreply 791 files = src.list_videos()
0981a08… noreply 792 # Only .docx and .xlsx match _DOC_EXTENSIONS, not .png
0981a08… noreply 793 assert len(files) == 2
0981a08… noreply 794 names = [f.name for f in files]
0981a08… noreply 795 assert "spec.docx" in names
0981a08… noreply 796 assert "budget.xlsx" in names
0981a08… noreply 797
0981a08… noreply 798 @patch("video_processor.sources.m365_source._run_m365")
0981a08… noreply 799 def test_list_videos_with_file_ids(self, mock_run):
0981a08… noreply 800 from video_processor.sources.m365_source import M365Source
0981a08… noreply 801
0981a08… noreply 802 mock_run.return_value = {
0981a08… noreply 803 "Name": "report.pdf",
0981a08… noreply 804 "UniqueId": "uid-1",
0981a08… noreply 805 "Length": "50000",
0981a08… noreply 806 "ServerRelativeUrl": "/sites/proj/docs/report.pdf",
0981a08… noreply 807 }
0981a08… noreply 808 src = M365Source(
0981a08… noreply 809 web_url="https://contoso.sharepoint.com",
0981a08… noreply 810 file_ids=["uid-1"],
0981a08… noreply 811 )
0981a08… noreply 812 files = src.list_videos()
0981a08… noreply 813 assert len(files) == 1
0981a08… noreply 814 assert files[0].name == "report.pdf"
0981a08… noreply 815
0981a08… noreply 816 def test_result_to_source_file(self):
0981a08… noreply 817 from video_processor.sources.m365_source import _result_to_source_file
0981a08… noreply 818
0981a08… noreply 819 sf = _result_to_source_file(
0981a08… noreply 820 {
0981a08… noreply 821 "Name": "notes.txt",
0981a08… noreply 822 "UniqueId": "abc-123",
0981a08… noreply 823 "Length": "512",
0981a08… noreply 824 "ServerRelativeUrl": "/sites/proj/notes.txt",
0981a08… noreply 825 "TimeLastModified": "2026-03-01T12:00:00Z",
0981a08… noreply 826 }
0981a08… noreply 827 )
0981a08… noreply 828 assert sf.name == "notes.txt"
0981a08… noreply 829 assert sf.id == "abc-123"
0981a08… noreply 830 assert sf.size_bytes == 512
0981a08… noreply 831 assert sf.path == "/sites/proj/notes.txt"
0981a08… noreply 832 assert sf.modified_at == "2026-03-01T12:00:00Z"
0981a08… noreply 833
0981a08… noreply 834 def test_extract_text_txt(self, tmp_path):
0981a08… noreply 835 from video_processor.sources.m365_source import _extract_text
0981a08… noreply 836
0981a08… noreply 837 f = tmp_path / "test.txt"
0981a08… noreply 838 f.write_text("Hello from a text file")
0981a08… noreply 839 result = _extract_text(f)
0981a08… noreply 840 assert result == "Hello from a text file"
0981a08… noreply 841
0981a08… noreply 842 def test_extract_text_md(self, tmp_path):
0981a08… noreply 843 from video_processor.sources.m365_source import _extract_text
0981a08… noreply 844
0981a08… noreply 845 f = tmp_path / "readme.md"
0981a08… noreply 846 f.write_text("# Title\n\nSome content")
0981a08… noreply 847 result = _extract_text(f)
0981a08… noreply 848 assert "Title" in result
0981a08… noreply 849 assert "Some content" in result
0981a08… noreply 850
0981a08… noreply 851 def test_extract_text_unsupported(self, tmp_path):
0981a08… noreply 852 from video_processor.sources.m365_source import _extract_text
0981a08… noreply 853
0981a08… noreply 854 f = tmp_path / "data.bin"
0981a08… noreply 855 f.write_bytes(b"\x00\x01\x02")
0981a08… noreply 856 result = _extract_text(f)
0981a08… noreply 857 assert "Unsupported" in result
0981a08… noreply 858
0981a08… noreply 859 def test_list_no_folder_url(self):
0981a08… noreply 860 from video_processor.sources.m365_source import M365Source
0981a08… noreply 861
0981a08… noreply 862 src = M365Source(web_url="https://contoso.sharepoint.com")
0981a08… noreply 863 files = src.list_videos()
0981a08… noreply 864 assert files == []
0981a08… noreply 865
0981a08… noreply 866
0981a08… noreply 867 # ---------------------------------------------------------------------------
0981a08… noreply 868 # ObsidianSource
0981a08… noreply 869 # ---------------------------------------------------------------------------
0981a08… noreply 870
0981a08… noreply 871
0981a08… noreply 872 class TestObsidianSource:
0981a08… noreply 873 def test_import(self):
0981a08… noreply 874 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 875
0981a08… noreply 876 assert ObsidianSource is not None
0981a08… noreply 877
0981a08… noreply 878 def test_constructor(self, tmp_path):
0981a08… noreply 879 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 880
0981a08… noreply 881 src = ObsidianSource(vault_path=str(tmp_path))
0981a08… noreply 882 assert src.vault_path == tmp_path
0981a08… noreply 883
0981a08… noreply 884 def test_authenticate_with_vault(self, tmp_path):
0981a08… noreply 885 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 886
0981a08… noreply 887 (tmp_path / "note.md").write_text("# Hello")
0981a08… noreply 888 src = ObsidianSource(vault_path=str(tmp_path))
0981a08… noreply 889 assert src.authenticate() is True
0981a08… noreply 890
0981a08… noreply 891 def test_authenticate_empty_dir(self, tmp_path):
0981a08… noreply 892 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 893
0981a08… noreply 894 src = ObsidianSource(vault_path=str(tmp_path))
0981a08… noreply 895 assert src.authenticate() is False
0981a08… noreply 896
0981a08… noreply 897 def test_authenticate_nonexistent(self, tmp_path):
0981a08… noreply 898 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 899
0981a08… noreply 900 src = ObsidianSource(vault_path=str(tmp_path / "nonexistent"))
0981a08… noreply 901 assert src.authenticate() is False
0981a08… noreply 902
0981a08… noreply 903 def test_parse_note(self, tmp_path):
0981a08… noreply 904 from video_processor.sources.obsidian_source import parse_note
0981a08… noreply 905
0981a08… noreply 906 note_content = (
0981a08… noreply 907 "---\n"
0981a08… noreply 908 "title: Test Note\n"
0981a08… noreply 909 "tags: [python, testing]\n"
0981a08… noreply 910 "---\n"
0981a08… noreply 911 "# Heading One\n\n"
0981a08… noreply 912 "Some text with a [[Wiki Link]] and [[Another Page|alias]].\n\n"
0981a08… noreply 913 "Also has #tag1 and #tag2 inline tags.\n\n"
0981a08… noreply 914 "## Sub Heading\n\n"
0981a08… noreply 915 "More content here.\n"
0981a08… noreply 916 )
0981a08… noreply 917 note_file = tmp_path / "test_note.md"
0981a08… noreply 918 note_file.write_text(note_content)
0981a08… noreply 919
0981a08… noreply 920 result = parse_note(note_file)
0981a08… noreply 921
0981a08… noreply 922 assert result["frontmatter"]["title"] == "Test Note"
0981a08… noreply 923 assert isinstance(result["frontmatter"]["tags"], list)
0981a08… noreply 924 assert "python" in result["frontmatter"]["tags"]
0981a08… noreply 925 assert "Wiki Link" in result["links"]
0981a08… noreply 926 assert "Another Page" in result["links"]
0981a08… noreply 927 assert "tag1" in result["tags"]
0981a08… noreply 928 assert "tag2" in result["tags"]
0981a08… noreply 929 assert len(result["headings"]) == 2
0981a08… noreply 930 assert result["headings"][0]["level"] == 1
0981a08… noreply 931 assert result["headings"][0]["text"] == "Heading One"
0981a08… noreply 932 assert "Some text" in result["body"]
0981a08… noreply 933
0981a08… noreply 934 def test_ingest_vault(self, tmp_path):
0981a08… noreply 935 from video_processor.sources.obsidian_source import ingest_vault
0981a08… noreply 936
0981a08… noreply 937 (tmp_path / "note_a.md").write_text("# A\n\nLinks to [[B]].\n")
0981a08… noreply 938 (tmp_path / "note_b.md").write_text("# B\n\nLinks to [[A]] and [[C]].\n")
0981a08… noreply 939
0981a08… noreply 940 result = ingest_vault(tmp_path)
0981a08… noreply 941
0981a08… noreply 942 assert len(result["notes"]) == 2
0981a08… noreply 943 names = [n["name"] for n in result["notes"]]
0981a08… noreply 944 assert "note_a" in names
0981a08… noreply 945 assert "note_b" in names
0981a08… noreply 946 # note_a links to B, note_b links to A and C => 3 links
0981a08… noreply 947 assert len(result["links"]) == 3
0981a08… noreply 948
0981a08… noreply 949 def test_list_videos(self, tmp_path):
0981a08… noreply 950 from video_processor.sources.obsidian_source import ObsidianSource
0981a08… noreply 951
0981a08… noreply 952 (tmp_path / "note1.md").write_text("# Note 1")
0981a08… noreply 953 sub = tmp_path / "subdir"
0981a08… noreply 954 sub.mkdir()
0981a08… noreply 955 (sub / "note2.md").write_text("# Note 2")
0981a08… noreply 956
0981a08… noreply 957 src = ObsidianSource(vault_path=str(tmp_path))
0981a08… noreply 958 files = src.list_videos()
0981a08… noreply 959 assert len(files) == 2
0981a08… noreply 960 assert all(f.mime_type == "text/markdown" for f in files)
0981a08… noreply 961
0981a08… noreply 962
0981a08… noreply 963 # ---------------------------------------------------------------------------
0981a08… noreply 964 # LogseqSource
0981a08… noreply 965 # ---------------------------------------------------------------------------
0981a08… noreply 966
0981a08… noreply 967
0981a08… noreply 968 class TestLogseqSource:
0981a08… noreply 969 def test_import(self):
0981a08… noreply 970 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 971
0981a08… noreply 972 assert LogseqSource is not None
0981a08… noreply 973
0981a08… noreply 974 def test_constructor(self, tmp_path):
0981a08… noreply 975 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 976
0981a08… noreply 977 src = LogseqSource(graph_path=str(tmp_path))
0981a08… noreply 978 assert src.graph_path == tmp_path
0981a08… noreply 979
0981a08… noreply 980 def test_authenticate_with_pages(self, tmp_path):
0981a08… noreply 981 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 982
0981a08… noreply 983 (tmp_path / "pages").mkdir()
0981a08… noreply 984 src = LogseqSource(graph_path=str(tmp_path))
0981a08… noreply 985 assert src.authenticate() is True
0981a08… noreply 986
0981a08… noreply 987 def test_authenticate_no_pages_or_journals(self, tmp_path):
0981a08… noreply 988 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 989
0981a08… noreply 990 src = LogseqSource(graph_path=str(tmp_path))
0981a08… noreply 991 assert src.authenticate() is False
0981a08… noreply 992
0981a08… noreply 993 def test_authenticate_nonexistent(self, tmp_path):
0981a08… noreply 994 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 995
0981a08… noreply 996 src = LogseqSource(graph_path=str(tmp_path / "nonexistent"))
0981a08… noreply 997 assert src.authenticate() is False
0981a08… noreply 998
0981a08… noreply 999 def test_parse_page(self, tmp_path):
0981a08… noreply 1000 from video_processor.sources.logseq_source import parse_page
0981a08… noreply 1001
0981a08… noreply 1002 page_content = (
0981a08… noreply 1003 "title:: My Page\n"
0981a08… noreply 1004 "tags:: #project #important\n"
0981a08… noreply 1005 "- Some block content\n"
0981a08… noreply 1006 " - Nested with [[Another Page]] link\n"
0981a08… noreply 1007 " - And a #todo tag\n"
0981a08… noreply 1008 " - Block ref ((abc12345-6789-0abc-def0-123456789abc))\n"
0981a08… noreply 1009 )
0981a08… noreply 1010 page_file = tmp_path / "my_page.md"
0981a08… noreply 1011 page_file.write_text(page_content)
0981a08… noreply 1012
0981a08… noreply 1013 result = parse_page(page_file)
0981a08… noreply 1014
0981a08… noreply 1015 assert result["properties"]["title"] == "My Page"
0981a08… noreply 1016 assert "Another Page" in result["links"]
0981a08… noreply 1017 assert "todo" in result["tags"]
0981a08… noreply 1018 assert "abc12345-6789-0abc-def0-123456789abc" in result["block_refs"]
0981a08… noreply 1019 assert "Some block content" in result["body"]
0981a08… noreply 1020
0981a08… noreply 1021 def test_ingest_graph(self, tmp_path):
0981a08… noreply 1022 from video_processor.sources.logseq_source import ingest_graph
0981a08… noreply 1023
0981a08… noreply 1024 pages_dir = tmp_path / "pages"
0981a08… noreply 1025 pages_dir.mkdir()
0981a08… noreply 1026 (pages_dir / "page_a.md").write_text("- Content linking [[Page B]]\n")
0981a08… noreply 1027 (pages_dir / "page_b.md").write_text("- Content linking [[Page A]]\n")
0981a08… noreply 1028
0981a08… noreply 1029 journals_dir = tmp_path / "journals"
0981a08… noreply 1030 journals_dir.mkdir()
0981a08… noreply 1031 (journals_dir / "2026_03_07.md").write_text("- Journal entry\n")
0981a08… noreply 1032
0981a08… noreply 1033 result = ingest_graph(tmp_path)
0981a08… noreply 1034
0981a08… noreply 1035 assert len(result["notes"]) == 3
0981a08… noreply 1036 assert len(result["links"]) == 2
0981a08… noreply 1037
0981a08… noreply 1038 def test_list_videos(self, tmp_path):
0981a08… noreply 1039 from video_processor.sources.logseq_source import LogseqSource
0981a08… noreply 1040
0981a08… noreply 1041 pages_dir = tmp_path / "pages"
0981a08… noreply 1042 pages_dir.mkdir()
0981a08… noreply 1043 (pages_dir / "page1.md").write_text("- content")
0981a08… noreply 1044
0981a08… noreply 1045 src = LogseqSource(graph_path=str(tmp_path))
0981a08… noreply 1046 files = src.list_videos()
0981a08… noreply 1047 assert len(files) == 1
0981a08… noreply 1048 assert files[0].mime_type == "text/markdown"
0981a08… noreply 1049
0981a08… noreply 1050
0981a08… noreply 1051 # ---------------------------------------------------------------------------
0981a08… noreply 1052 # NotionSource
0981a08… noreply 1053 # ---------------------------------------------------------------------------
0981a08… noreply 1054
0981a08… noreply 1055
0981a08… noreply 1056 class TestNotionSource:
0981a08… noreply 1057 def test_import(self):
0981a08… noreply 1058 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1059
0981a08… noreply 1060 assert NotionSource is not None
0981a08… noreply 1061
0981a08… noreply 1062 def test_constructor(self):
0981a08… noreply 1063 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1064
0981a08… noreply 1065 src = NotionSource(token="ntn_test123", database_id="db-1")
0981a08… noreply 1066 assert src.token == "ntn_test123"
0981a08… noreply 1067 assert src.database_id == "db-1"
0981a08… noreply 1068 assert src.page_ids == []
0981a08… noreply 1069
0981a08… noreply 1070 @patch.dict(os.environ, {}, clear=True)
0981a08… noreply 1071 def test_authenticate_no_token(self):
0981a08… noreply 1072 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1073
0981a08… noreply 1074 src = NotionSource(token="")
0981a08… noreply 1075 assert src.authenticate() is False
0981a08… noreply 1076
0981a08… noreply 1077 @patch("requests.get")
0981a08… noreply 1078 def test_authenticate_with_mock(self, mock_get):
0981a08… noreply 1079 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1080
0981a08… noreply 1081 mock_resp = MagicMock()
0981a08… noreply 1082 mock_resp.raise_for_status = MagicMock()
0981a08… noreply 1083 mock_resp.json.return_value = {"name": "Test Bot"}
0981a08… noreply 1084 mock_get.return_value = mock_resp
0981a08… noreply 1085
0981a08… noreply 1086 src = NotionSource(token="ntn_test123")
0981a08… noreply 1087 assert src.authenticate() is True
0981a08… noreply 1088
0981a08… noreply 1089 @patch("requests.post")
0981a08… noreply 1090 def test_list_videos_database(self, mock_post):
0981a08… noreply 1091 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1092
0981a08… noreply 1093 mock_resp = MagicMock()
0981a08… noreply 1094 mock_resp.raise_for_status = MagicMock()
0981a08… noreply 1095 mock_resp.json.return_value = {
0981a08… noreply 1096 "results": [
0981a08… noreply 1097 {
0981a08… noreply 1098 "id": "page-1",
0981a08… noreply 1099 "last_edited_time": "2026-03-01T00:00:00Z",
0981a08… noreply 1100 "properties": {
0981a08… noreply 1101 "Name": {
0981a08… noreply 1102 "type": "title",
0981a08… noreply 1103 "title": [{"plain_text": "Meeting Notes"}],
0981a08… noreply 1104 }
0981a08… noreply 1105 },
0981a08… noreply 1106 },
0981a08… noreply 1107 ],
0981a08… noreply 1108 "has_more": False,
0981a08… noreply 1109 }
0981a08… noreply 1110 mock_post.return_value = mock_resp
0981a08… noreply 1111
0981a08… noreply 1112 src = NotionSource(token="ntn_test", database_id="db-1")
0981a08… noreply 1113 files = src.list_videos()
0981a08… noreply 1114 assert len(files) == 1
0981a08… noreply 1115 assert files[0].name == "Meeting Notes"
0981a08… noreply 1116 assert files[0].id == "page-1"
0981a08… noreply 1117
0981a08… noreply 1118 def test_blocks_to_text(self):
0981a08… noreply 1119 from video_processor.sources.notion_source import NotionSource
0981a08… noreply 1120
0981a08… noreply 1121 src = NotionSource(token="test")
0981a08… noreply 1122 blocks = [
0981a08… noreply 1123 {
0981a08… noreply 1124 "type": "heading_1",
0981a08… noreply 1125 "heading_1": {
0981a08… noreply 1126 "rich_text": [{"plain_text": "Title"}],
0981a08… noreply 1127 },
0981a08… noreply 1128 },
0981a08… noreply 1129 {
0981a08… noreply 1130 "type": "paragraph",
0981a08… noreply 1131 "paragraph": {
0981a08… noreply 1132 "rich_text": [{"plain_text": "Some paragraph text."}],
0981a08… noreply 1133 },
0981a08… noreply 1134 },
0981a08… noreply 1135 {
0981a08… noreply 1136 "type": "bulleted_list_item",
0981a08… noreply 1137 "bulleted_list_item": {
0981a08… noreply 1138 "rich_text": [{"plain_text": "A bullet point"}],
0981a08… noreply 1139 },
0981a08… noreply 1140 },
0981a08… noreply 1141 {
0981a08… noreply 1142 "type": "divider",
0981a08… noreply 1143 "divider": {},
0981a08… noreply 1144 },
0981a08… noreply 1145 ]
0981a08… noreply 1146 result = src._blocks_to_text(blocks)
0981a08… noreply 1147 assert "# Title" in result
0981a08… noreply 1148 assert "Some paragraph text." in result
0981a08… noreply 1149 assert "- A bullet point" in result
0981a08… noreply 1150 assert "---" in result
0981a08… noreply 1151
0981a08… noreply 1152
0981a08… noreply 1153 # ---------------------------------------------------------------------------
0981a08… noreply 1154 # AppleNotesSource
0981a08… noreply 1155 # ---------------------------------------------------------------------------
0981a08… noreply 1156
0981a08… noreply 1157
0981a08… noreply 1158 class TestAppleNotesSource:
0981a08… noreply 1159 def test_import(self):
0981a08… noreply 1160 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1161
0981a08… noreply 1162 assert AppleNotesSource is not None
0981a08… noreply 1163
0981a08… noreply 1164 def test_constructor(self):
0981a08… noreply 1165 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1166
0981a08… noreply 1167 src = AppleNotesSource(folder="Work")
0981a08… noreply 1168 assert src.folder == "Work"
0981a08… noreply 1169
0981a08… noreply 1170 def test_constructor_default(self):
0981a08… noreply 1171 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1172
0981a08… noreply 1173 src = AppleNotesSource()
0981a08… noreply 1174 assert src.folder is None
0981a08… noreply 1175
0981a08… noreply 1176 def test_authenticate_platform(self):
0981a08… noreply 1177 import sys
0981a08… noreply 1178
0981a08… noreply 1179 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1180
0981a08… noreply 1181 src = AppleNotesSource()
0981a08… noreply 1182 result = src.authenticate()
0981a08… noreply 1183 if sys.platform == "darwin":
0981a08… noreply 1184 assert result is True
0981a08… noreply 1185 else:
0981a08… noreply 1186 assert result is False
0981a08… noreply 1187
0981a08… noreply 1188 def test_html_to_text(self):
0981a08… noreply 1189 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1190
0981a08… noreply 1191 html = (
0981a08… noreply 1192 "<div>Hello <b>World</b></div>"
0981a08… noreply 1193 "<p>Paragraph one.</p>"
0981a08… noreply 1194 "<p>Paragraph two with &amp; entity.</p>"
0981a08… noreply 1195 "<br/>"
0981a08… noreply 1196 "<ul><li>Item 1</li><li>Item 2</li></ul>"
0981a08… noreply 1197 )
0981a08… noreply 1198 result = AppleNotesSource._html_to_text(html)
0981a08… noreply 1199 assert "Hello World" in result
0981a08… noreply 1200 assert "Paragraph one." in result
0981a08… noreply 1201 assert "Paragraph two with & entity." in result
0981a08… noreply 1202 assert "Item 1" in result
0981a08… noreply 1203
0981a08… noreply 1204 def test_html_to_text_empty(self):
0981a08… noreply 1205 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1206
0981a08… noreply 1207 assert AppleNotesSource._html_to_text("") == ""
0981a08… noreply 1208
0981a08… noreply 1209 def test_html_to_text_entities(self):
0981a08… noreply 1210 from video_processor.sources.apple_notes_source import AppleNotesSource
0981a08… noreply 1211
0981a08… noreply 1212 html = "&lt;code&gt; &quot;test&quot; &#39;single&#39; &nbsp;space"
0981a08… noreply 1213 result = AppleNotesSource._html_to_text(html)
0981a08… noreply 1214 assert "<code>" in result
0981a08… noreply 1215 assert '"test"' in result
0981a08… noreply 1216 assert "'single'" in result
0981a08… noreply 1217
0981a08… noreply 1218
0981a08… noreply 1219 # ---------------------------------------------------------------------------
0981a08… noreply 1220 # GoogleKeepSource
0981a08… noreply 1221 # ---------------------------------------------------------------------------
0981a08… noreply 1222
0981a08… noreply 1223
0981a08… noreply 1224 class TestGoogleKeepSource:
0981a08… noreply 1225 def test_import(self):
0981a08… noreply 1226 from video_processor.sources.google_keep_source import GoogleKeepSource
0981a08… noreply 1227
0981a08… noreply 1228 assert GoogleKeepSource is not None
0981a08… noreply 1229
0981a08… noreply 1230 def test_constructor(self):
0981a08… noreply 1231 from video_processor.sources.google_keep_source import GoogleKeepSource
0981a08… noreply 1232
0981a08… noreply 1233 src = GoogleKeepSource(label="meetings")
0981a08… noreply 1234 assert src.label == "meetings"
0981a08… noreply 1235
0981a08… noreply 1236 def test_constructor_default(self):
0981a08… noreply 1237 from video_processor.sources.google_keep_source import GoogleKeepSource
0981a08… noreply 1238
0981a08… noreply 1239 src = GoogleKeepSource()
0981a08… noreply 1240 assert src.label is None
0981a08… noreply 1241
0981a08… noreply 1242 @patch("shutil.which", return_value=None)
0981a08… noreply 1243 def test_authenticate_no_gws(self, _mock_which):
0981a08… noreply 1244 from video_processor.sources.google_keep_source import GoogleKeepSource
0981a08… noreply 1245
0981a08… noreply 1246 src = GoogleKeepSource()
0981a08… noreply 1247 assert src.authenticate() is False
0981a08… noreply 1248
0981a08… noreply 1249 def test_note_to_text(self):
0981a08… noreply 1250 from video_processor.sources.google_keep_source import _note_to_text
0981a08… noreply 1251
0981a08… noreply 1252 note = {
0981a08… noreply 1253 "title": "Shopping List",
0981a08… noreply 1254 "body": "Remember to buy groceries",
0981a08… noreply 1255 "listContent": [
0981a08… noreply 1256 {"text": "Milk", "checked": True},
0981a08… noreply 1257 {"text": "Bread", "checked": False},
0981a08… noreply 1258 {"text": "", "checked": False},
0981a08… noreply 1259 ],
0981a08… noreply 1260 }
0981a08… noreply 1261 result = _note_to_text(note)
0981a08… noreply 1262 assert "Shopping List" in result
0981a08… noreply 1263 assert "Remember to buy groceries" in result
0981a08… noreply 1264 assert "- [x] Milk" in result
0981a08… noreply 1265 assert "- [ ] Bread" in result
0981a08… noreply 1266
0981a08… noreply 1267 def test_note_to_text_empty(self):
0981a08… noreply 1268 from video_processor.sources.google_keep_source import _note_to_text
0981a08… noreply 1269
0981a08… noreply 1270 assert _note_to_text({}) == ""
0981a08… noreply 1271
0981a08… noreply 1272 def test_note_to_text_text_content(self):
0981a08… noreply 1273 from video_processor.sources.google_keep_source import _note_to_text
0981a08… noreply 1274
0981a08… noreply 1275 note = {"title": "Simple", "textContent": "Just a plain note"}
0981a08… noreply 1276 result = _note_to_text(note)
0981a08… noreply 1277 assert "Simple" in result
0981a08… noreply 1278 assert "Just a plain note" in result
0981a08… noreply 1279
0981a08… noreply 1280
0981a08… noreply 1281 # ---------------------------------------------------------------------------
0981a08… noreply 1282 # OneNoteSource
0981a08… noreply 1283 # ---------------------------------------------------------------------------
0981a08… noreply 1284
0981a08… noreply 1285
0981a08… noreply 1286 class TestOneNoteSource:
0981a08… noreply 1287 def test_import(self):
0981a08… noreply 1288 from video_processor.sources.onenote_source import OneNoteSource
0981a08… noreply 1289
0981a08… noreply 1290 assert OneNoteSource is not None
0981a08… noreply 1291
0981a08… noreply 1292 def test_constructor(self):
0981a08… noreply 1293 from video_processor.sources.onenote_source import OneNoteSource
0981a08… noreply 1294
0981a08… noreply 1295 src = OneNoteSource(notebook_name="Work Notes", section_name="Meetings")
0981a08… noreply 1296 assert src.notebook_name == "Work Notes"
0981a08… noreply 1297 assert src.section_name == "Meetings"
0981a08… noreply 1298
0981a08… noreply 1299 def test_constructor_default(self):
0981a08… noreply 1300 from video_processor.sources.onenote_source import OneNoteSource
0981a08… noreply 1301
0981a08… noreply 1302 src = OneNoteSource()
0981a08… noreply 1303 assert src.notebook_name is None
0981a08… noreply 1304 assert src.section_name is None
0981a08… noreply 1305
0981a08… noreply 1306 @patch("shutil.which", return_value=None)
0981a08… noreply 1307 def test_authenticate_no_m365(self, _mock_which):
0981a08… noreply 1308 from video_processor.sources.onenote_source import OneNoteSource
0981a08… noreply 1309
0981a08… noreply 1310 src = OneNoteSource()
0981a08… noreply 1311 assert src.authenticate() is False
0981a08… noreply 1312
0981a08… noreply 1313 def test_html_to_text(self):
0981a08… noreply 1314 from video_processor.sources.onenote_source import _html_to_text
0981a08… noreply 1315
0981a08… noreply 1316 html = (
0981a08… noreply 1317 "<html><body>"
0981a08… noreply 1318 "<h1>Meeting Notes</h1>"
0981a08… noreply 1319 "<p>Discussed the &amp; project.</p>"
0981a08… noreply 1320 "<script>var x = 1;</script>"
0981a08… noreply 1321 "<style>.foo { color: red; }</style>"
0981a08… noreply 1322 "<ul><li>Action item 1</li><li>Action item 2</li></ul>"
0981a08… noreply 1323 "<p>Entity &#x41; and &#65; decoded.</p>"
0981a08… noreply 1324 "</body></html>"
0981a08… noreply 1325 )
0981a08… noreply 1326 result = _html_to_text(html)
0981a08… noreply 1327 assert "Meeting Notes" in result
0981a08… noreply 1328 assert "Discussed the & project." in result
0981a08… noreply 1329 assert "var x" not in result
0981a08… noreply 1330 assert ".foo" not in result
0981a08… noreply 1331 assert "Action item 1" in result
0981a08… noreply 1332 assert "Entity A and A decoded." in result
0981a08… noreply 1333
0981a08… noreply 1334 def test_html_to_text_empty(self):
0981a08… noreply 1335 from video_processor.sources.onenote_source import _html_to_text
0981a08… noreply 1336
0981a08… noreply 1337 assert _html_to_text("") == ""
0981a08… noreply 1338
0981a08… noreply 1339 def test_html_to_text_entities(self):
0981a08… noreply 1340 from video_processor.sources.onenote_source import _html_to_text
0981a08… noreply 1341
0981a08… noreply 1342 html = "&lt;tag&gt; &quot;quoted&quot; &apos;apos&apos; &nbsp;space"
0981a08… noreply 1343 result = _html_to_text(html)
0981a08… noreply 1344 assert "<tag>" in result
0981a08… noreply 1345 assert '"quoted"' in result
0981a08… noreply 1346 assert "'apos'" in result
0981a08… noreply 1347
0981a08… noreply 1348
0981a08… noreply 1349 # ---------------------------------------------------------------------------
0981a08… noreply 1350 # ZoomSource
0981a08… noreply 1351 # ---------------------------------------------------------------------------
0981a08… noreply 1352
0981a08… noreply 1353
0981a08… noreply 1354 class TestZoomSource:
0981a08… noreply 1355 def test_import(self):
0981a08… noreply 1356 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1357
0981a08… noreply 1358 assert ZoomSource is not None
0981a08… noreply 1359
0981a08… noreply 1360 def test_constructor_defaults(self):
0981a08… noreply 1361 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1362
0981a08… noreply 1363 src = ZoomSource()
0981a08… noreply 1364 assert src.client_id is None or isinstance(src.client_id, str)
0981a08… noreply 1365 assert src._access_token is None
0981a08… noreply 1366
0981a08… noreply 1367 def test_constructor_explicit(self):
0981a08… noreply 1368 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1369
0981a08… noreply 1370 src = ZoomSource(
0981a08… noreply 1371 client_id="cid",
0981a08… noreply 1372 client_secret="csec",
0981a08… noreply 1373 account_id="aid",
0981a08… noreply 1374 )
0981a08… noreply 1375 assert src.client_id == "cid"
0981a08… noreply 1376 assert src.client_secret == "csec"
0981a08… noreply 1377 assert src.account_id == "aid"
0981a08… noreply 1378
0981a08… noreply 1379 def test_authenticate_no_credentials(self):
0981a08… noreply 1380 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1381
0981a08… noreply 1382 src = ZoomSource(client_id=None, client_secret=None, account_id=None)
0981a08… noreply 1383 # No saved token, no account_id, no client_id → should fail
0981a08… noreply 1384 assert src.authenticate() is False
0981a08… noreply 1385
0981a08… noreply 1386 def test_list_videos_not_authenticated(self):
0981a08… noreply 1387 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1388
0981a08… noreply 1389 src = ZoomSource()
0981a08… noreply 1390 with pytest.raises(RuntimeError, match="Not authenticated"):
0981a08… noreply 1391 src.list_videos()
0981a08… noreply 1392
0981a08… noreply 1393 def test_download_not_authenticated(self):
0981a08… noreply 1394 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1395
0981a08… noreply 1396 src = ZoomSource()
0981a08… noreply 1397 sf = SourceFile(name="test.mp4", id="123")
0981a08… noreply 1398 with pytest.raises(RuntimeError, match="Not authenticated"):
0981a08… noreply 1399 src.download(sf, "/tmp/test.mp4")
0981a08… noreply 1400
0981a08… noreply 1401 def test_fetch_transcript_not_authenticated(self):
0981a08… noreply 1402 from video_processor.sources.zoom_source import ZoomSource
0981a08… noreply 1403
0981a08… noreply 1404 src = ZoomSource()
0981a08… noreply 1405 with pytest.raises(RuntimeError, match="Not authenticated"):
0981a08… noreply 1406 src.fetch_transcript("meeting123")
0981a08… noreply 1407
0981a08… noreply 1408 def test_mime_types_mapping(self):
0981a08… noreply 1409 from video_processor.sources.zoom_source import _MIME_TYPES
0981a08… noreply 1410
0981a08… noreply 1411 assert _MIME_TYPES["MP4"] == "video/mp4"
0981a08… noreply 1412 assert _MIME_TYPES["TRANSCRIPT"] == "text/vtt"
0981a08… noreply 1413 assert _MIME_TYPES["M4A"] == "audio/mp4"
0981a08… noreply 1414
0981a08… noreply 1415
0981a08… noreply 1416 # ---------------------------------------------------------------------------
0981a08… noreply 1417 # TeamsRecordingSource
0981a08… noreply 1418 # ---------------------------------------------------------------------------
0981a08… noreply 1419
0981a08… noreply 1420
0981a08… noreply 1421 class TestTeamsRecordingSource:
0981a08… noreply 1422 def test_import(self):
0981a08… noreply 1423 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1424 TeamsRecordingSource,
0981a08… noreply 1425 )
0981a08… noreply 1426
0981a08… noreply 1427 assert TeamsRecordingSource is not None
0981a08… noreply 1428
0981a08… noreply 1429 def test_constructor_default(self):
0981a08… noreply 1430 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1431 TeamsRecordingSource,
0981a08… noreply 1432 )
0981a08… noreply 1433
0981a08… noreply 1434 src = TeamsRecordingSource()
0981a08… noreply 1435 assert src.user_id == "me"
0981a08… noreply 1436
0981a08… noreply 1437 def test_constructor_custom_user(self):
0981a08… noreply 1438 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1439 TeamsRecordingSource,
0981a08… noreply 1440 )
0981a08… noreply 1441
0981a08… noreply 1442 src = TeamsRecordingSource(user_id="[email protected]")
0981a08… noreply 1443 assert src.user_id == "[email protected]"
0981a08… noreply 1444
0981a08… noreply 1445 @patch("shutil.which", return_value=None)
0981a08… noreply 1446 def test_authenticate_no_m365(self, _mock_which):
0981a08… noreply 1447 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1448 TeamsRecordingSource,
0981a08… noreply 1449 )
0981a08… noreply 1450
0981a08… noreply 1451 src = TeamsRecordingSource()
0981a08… noreply 1452 assert src.authenticate() is False
0981a08… noreply 1453
0981a08… noreply 1454 def test_vtt_to_text(self):
0981a08… noreply 1455 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1456 _vtt_to_text,
0981a08… noreply 1457 )
0981a08… noreply 1458
0981a08… noreply 1459 vtt = (
0981a08… noreply 1460 "WEBVTT\n\n"
0981a08… noreply 1461 "1\n"
0981a08… noreply 1462 "00:00:01.000 --> 00:00:05.000\n"
0981a08… noreply 1463 "<v Speaker1>Hello everyone\n\n"
0981a08… noreply 1464 "2\n"
0981a08… noreply 1465 "00:00:05.000 --> 00:00:10.000\n"
0981a08… noreply 1466 "<v Speaker2>Welcome to the meeting\n"
0981a08… noreply 1467 )
0981a08… noreply 1468 result = _vtt_to_text(vtt)
0981a08… noreply 1469 assert "Hello everyone" in result
0981a08… noreply 1470 assert "Welcome to the meeting" in result
0981a08… noreply 1471 assert "WEBVTT" not in result
0981a08… noreply 1472 assert "-->" not in result
0981a08… noreply 1473
0981a08… noreply 1474 def test_vtt_to_text_empty(self):
0981a08… noreply 1475 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1476 _vtt_to_text,
0981a08… noreply 1477 )
0981a08… noreply 1478
0981a08… noreply 1479 assert _vtt_to_text("") == ""
0981a08… noreply 1480
0981a08… noreply 1481 def test_vtt_to_text_deduplicates(self):
0981a08… noreply 1482 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1483 _vtt_to_text,
0981a08… noreply 1484 )
0981a08… noreply 1485
0981a08… noreply 1486 vtt = (
0981a08… noreply 1487 "WEBVTT\n\n"
0981a08… noreply 1488 "00:00:01.000 --> 00:00:03.000\n"
0981a08… noreply 1489 "Same line\n\n"
0981a08… noreply 1490 "00:00:03.000 --> 00:00:05.000\n"
0981a08… noreply 1491 "Same line\n"
0981a08… noreply 1492 )
0981a08… noreply 1493 result = _vtt_to_text(vtt)
0981a08… noreply 1494 assert result.count("Same line") == 1
0981a08… noreply 1495
0981a08… noreply 1496 def test_extract_meetings_list_dict(self):
0981a08… noreply 1497 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1498 TeamsRecordingSource,
0981a08… noreply 1499 )
0981a08… noreply 1500
0981a08… noreply 1501 src = TeamsRecordingSource()
0981a08… noreply 1502 result = src._extract_meetings_list({"value": [{"id": "m1"}]})
0981a08… noreply 1503 assert len(result) == 1
0981a08… noreply 1504
0981a08… noreply 1505 def test_extract_meetings_list_list(self):
0981a08… noreply 1506 from video_processor.sources.teams_recording_source import (
0981a08… noreply 1507 TeamsRecordingSource,
0981a08… noreply 1508 )
0981a08… noreply 1509
0981a08… noreply 1510 src = TeamsRecordingSource()
0981a08… noreply 1511 result = src._extract_meetings_list([{"id": "m1"}])
0981a08… noreply 1512 assert len(result) == 1
0981a08… noreply 1513
0981a08… noreply 1514
0981a08… noreply 1515 # ---------------------------------------------------------------------------
0981a08… noreply 1516 # MeetRecordingSource
0981a08… noreply 1517 # ---------------------------------------------------------------------------
0981a08… noreply 1518
0981a08… noreply 1519
0981a08… noreply 1520 class TestMeetRecordingSource:
0981a08… noreply 1521 def test_import(self):
0981a08… noreply 1522 from video_processor.sources.meet_recording_source import (
0981a08… noreply 1523 MeetRecordingSource,
0981a08… noreply 1524 )
0981a08… noreply 1525
0981a08… noreply 1526 assert MeetRecordingSource is not None
0981a08… noreply 1527
0981a08… noreply 1528 def test_constructor_default(self):
0981a08… noreply 1529 from video_processor.sources.meet_recording_source import (
0981a08… noreply 1530 MeetRecordingSource,
0981a08… noreply 1531 )
0981a08… noreply 1532
0981a08… noreply 1533 src = MeetRecordingSource()
0981a08… noreply 1534 assert src.drive_folder_id is None
0981a08… noreply 1535
0981a08… noreply 1536 def test_constructor_with_folder(self):
0981a08… noreply 1537 from video_processor.sources.meet_recording_source import (
0981a08… noreply 1538 MeetRecordingSource,
0981a08… noreply 1539 )
0981a08… noreply 1540
0981a08… noreply 1541 src = MeetRecordingSource(drive_folder_id="folder123")
0981a08… noreply 1542 assert src.drive_folder_id == "folder123"
0981a08… noreply 1543
0981a08… noreply 1544 @patch("shutil.which", return_value=None)
0981a08… noreply 1545 def test_authenticate_no_gws(self, _mock_which):
0981a08… noreply 1546 from video_processor.sources.meet_recording_source import (
0981a08… noreply 1547 MeetRecordingSource,
0981a08… noreply 1548 )
0981a08… noreply 1549
0981a08… noreply 1550 src = MeetRecordingSource()
0981a08… noreply 1551 assert src.authenticate() is False
0981a08… noreply 1552
0981a08… noreply 1553 def test_find_matching_transcript_date_extraction(self):
0981a08… noreply 1554 import re
0981a08… noreply 1555
0981a08… noreply 1556 name = "Meet Recording 2026-03-07T14:30:00"
0981a08… noreply 1557 match = re.search(r"\d{4}-\d{2}-\d{2}", name)
0981a08… noreply 1558 assert match is not None
0981a08… noreply 1559 assert match.group(0) == "2026-03-07"
0981a08… noreply 1560
0981a08… noreply 1561 def test_lazy_import(self):
0981a08… noreply 1562 from video_processor.sources import MeetRecordingSource
0981a08… noreply 1563
0981a08… noreply 1564 assert MeetRecordingSource is not None
0981a08… noreply 1565
0981a08… noreply 1566 def test_teams_lazy_import(self):
0981a08… noreply 1567 from video_processor.sources import TeamsRecordingSource
0981a08… noreply 1568
0981a08… noreply 1569 assert TeamsRecordingSource is not None
0981a08… noreply 1570
0981a08… noreply 1571 def test_zoom_lazy_import(self):
0981a08… noreply 1572 from video_processor.sources import ZoomSource
0981a08… noreply 1573
0981a08… noreply 1574 assert ZoomSource is not None
ffef14a… noreply 1575
ffef14a… noreply 1576 def test_invalid_lazy_import(self):
ffef14a… noreply 1577 from video_processor import sources
ffef14a… noreply 1578
ffef14a… noreply 1579 with pytest.raises(AttributeError):
ffef14a… noreply 1580 _ = sources.NonexistentSource
ffef14a… noreply 1581
ffef14a… noreply 1582
ffef14a… noreply 1583 # ---------------------------------------------------------------------------
ffef14a… noreply 1584 # BaseSource.download_all
ffef14a… noreply 1585 # ---------------------------------------------------------------------------
ffef14a… noreply 1586
ffef14a… noreply 1587
ffef14a… noreply 1588 class TestBaseSourceDownloadAll:
ffef14a… noreply 1589 def test_download_all_success(self, tmp_path):
ffef14a… noreply 1590 """download_all should download all files using path when available."""
ffef14a… noreply 1591
ffef14a… noreply 1592 class FakeSource(BaseSource):
ffef14a… noreply 1593 def authenticate(self):
ffef14a… noreply 1594 return True
ffef14a… noreply 1595
ffef14a… noreply 1596 def list_videos(self, **kwargs):
ffef14a… noreply 1597 return []
ffef14a… noreply 1598
ffef14a… noreply 1599 def download(self, file, destination):
ffef14a… noreply 1600 destination.parent.mkdir(parents=True, exist_ok=True)
ffef14a… noreply 1601 destination.write_text(f"content:{file.name}")
ffef14a… noreply 1602 return destination
ffef14a… noreply 1603
ffef14a… noreply 1604 src = FakeSource()
ffef14a… noreply 1605 files = [
ffef14a… noreply 1606 SourceFile(name="a.mp4", id="1"),
ffef14a… noreply 1607 SourceFile(name="b.mp4", id="2", path="subdir/b.mp4"),
ffef14a… noreply 1608 ]
ffef14a… noreply 1609 paths = src.download_all(files, tmp_path)
ffef14a… noreply 1610 assert len(paths) == 2
ffef14a… noreply 1611 assert (tmp_path / "a.mp4").read_text() == "content:a.mp4"
ffef14a… noreply 1612 assert (tmp_path / "subdir" / "b.mp4").read_text() == "content:b.mp4"
ffef14a… noreply 1613
ffef14a… noreply 1614 def test_download_all_partial_failure(self, tmp_path):
ffef14a… noreply 1615 """download_all should continue past failures and return successful paths."""
ffef14a… noreply 1616
ffef14a… noreply 1617 class PartialFail(BaseSource):
ffef14a… noreply 1618 def authenticate(self):
ffef14a… noreply 1619 return True
ffef14a… noreply 1620
ffef14a… noreply 1621 def list_videos(self, **kwargs):
ffef14a… noreply 1622 return []
ffef14a… noreply 1623
ffef14a… noreply 1624 def download(self, file, destination):
ffef14a… noreply 1625 if file.id == "bad":
ffef14a… noreply 1626 raise RuntimeError("download failed")
ffef14a… noreply 1627 destination.parent.mkdir(parents=True, exist_ok=True)
ffef14a… noreply 1628 destination.write_text("ok")
ffef14a… noreply 1629 return destination
ffef14a… noreply 1630
ffef14a… noreply 1631 src = PartialFail()
ffef14a… noreply 1632 files = [
ffef14a… noreply 1633 SourceFile(name="good.mp4", id="good"),
ffef14a… noreply 1634 SourceFile(name="bad.mp4", id="bad"),
ffef14a… noreply 1635 SourceFile(name="also_good.mp4", id="good2"),
ffef14a… noreply 1636 ]
ffef14a… noreply 1637 paths = src.download_all(files, tmp_path)
ffef14a… noreply 1638 assert len(paths) == 2
ffef14a… noreply 1639
ffef14a… noreply 1640
ffef14a… noreply 1641 # ---------------------------------------------------------------------------
ffef14a… noreply 1642 # Download & error handling tests
ffef14a… noreply 1643 # ---------------------------------------------------------------------------
ffef14a… noreply 1644
ffef14a… noreply 1645
ffef14a… noreply 1646 class TestRSSSourceDownload:
ffef14a… noreply 1647 @patch("requests.get")
ffef14a… noreply 1648 def test_download_entry(self, mock_get, tmp_path):
ffef14a… noreply 1649 from video_processor.sources.rss_source import RSSSource
ffef14a… noreply 1650
ffef14a… noreply 1651 xml = (
ffef14a… noreply 1652 "<rss><channel><item><title>Post 1</title>"
ffef14a… noreply 1653 "<link>https://example.com/1</link>"
ffef14a… noreply 1654 "<description>Summary here</description>"
ffef14a… noreply 1655 "<pubDate>Mon, 01 Jan 2025</pubDate></item></channel></rss>"
ffef14a… noreply 1656 )
ffef14a… noreply 1657 mock_get.return_value = MagicMock(text=xml, status_code=200)
ffef14a… noreply 1658 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1659
ffef14a… noreply 1660 src = RSSSource(url="https://example.com/feed.xml")
ffef14a… noreply 1661 with patch.dict("sys.modules", {"feedparser": None}):
ffef14a… noreply 1662 files = src.list_videos()
ffef14a… noreply 1663 assert len(files) == 1
ffef14a… noreply 1664
ffef14a… noreply 1665 dest = tmp_path / "entry.txt"
ffef14a… noreply 1666 result = src.download(files[0], dest)
ffef14a… noreply 1667 assert result.exists()
ffef14a… noreply 1668 content = result.read_text()
ffef14a… noreply 1669 assert "Post 1" in content
ffef14a… noreply 1670 assert "Summary here" in content
ffef14a… noreply 1671
ffef14a… noreply 1672 @patch("requests.get")
ffef14a… noreply 1673 def test_download_not_found(self, mock_get, tmp_path):
ffef14a… noreply 1674 from video_processor.sources.rss_source import RSSSource
ffef14a… noreply 1675
ffef14a… noreply 1676 xml = "<rss><channel></channel></rss>"
ffef14a… noreply 1677 mock_get.return_value = MagicMock(text=xml, status_code=200)
ffef14a… noreply 1678 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1679
ffef14a… noreply 1680 src = RSSSource(url="https://example.com/feed.xml")
ffef14a… noreply 1681 with patch.dict("sys.modules", {"feedparser": None}):
ffef14a… noreply 1682 src.list_videos()
ffef14a… noreply 1683
ffef14a… noreply 1684 fake = SourceFile(name="missing", id="nonexistent")
ffef14a… noreply 1685 with pytest.raises(ValueError, match="Entry not found"):
ffef14a… noreply 1686 src.download(fake, tmp_path / "out.txt")
ffef14a… noreply 1687
ffef14a… noreply 1688
ffef14a… noreply 1689 class TestWebSourceDownload:
ffef14a… noreply 1690 @patch("requests.get")
ffef14a… noreply 1691 def test_download_saves_text(self, mock_get, tmp_path):
ffef14a… noreply 1692 from video_processor.sources.web_source import WebSource
ffef14a… noreply 1693
ffef14a… noreply 1694 mock_get.return_value = MagicMock(
ffef14a… noreply 1695 text="<html><body><p>Page content</p></body></html>", status_code=200
ffef14a… noreply 1696 )
ffef14a… noreply 1697 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1698
ffef14a… noreply 1699 src = WebSource(url="https://example.com/page")
ffef14a… noreply 1700 with patch.dict("sys.modules", {"bs4": None}):
ffef14a… noreply 1701 file = src.list_videos()[0]
ffef14a… noreply 1702 dest = tmp_path / "page.txt"
ffef14a… noreply 1703 result = src.download(file, dest)
ffef14a… noreply 1704 assert result.exists()
ffef14a… noreply 1705 assert "Page content" in result.read_text()
ffef14a… noreply 1706
ffef14a… noreply 1707 def test_strip_html_tags(self):
ffef14a… noreply 1708 from video_processor.sources.web_source import _strip_html_tags
ffef14a… noreply 1709
ffef14a… noreply 1710 html = "<p>Hello</p><script>evil()</script><style>.x{}</style>"
ffef14a… noreply 1711 text = _strip_html_tags(html)
ffef14a… noreply 1712 assert "Hello" in text
ffef14a… noreply 1713 assert "evil" not in text
ffef14a… noreply 1714
ffef14a… noreply 1715
ffef14a… noreply 1716 class TestHackerNewsSourceDownload:
ffef14a… noreply 1717 @patch("requests.get")
ffef14a… noreply 1718 def test_download(self, mock_get, tmp_path):
ffef14a… noreply 1719 from video_processor.sources.hackernews_source import HackerNewsSource
ffef14a… noreply 1720
ffef14a… noreply 1721 story = {"title": "Story", "by": "user", "score": 1, "kids": []}
ffef14a… noreply 1722
ffef14a… noreply 1723 def side_effect(url, timeout=10):
ffef14a… noreply 1724 resp = MagicMock()
ffef14a… noreply 1725 resp.raise_for_status = MagicMock()
ffef14a… noreply 1726 resp.json.return_value = story
ffef14a… noreply 1727 return resp
ffef14a… noreply 1728
ffef14a… noreply 1729 mock_get.side_effect = side_effect
ffef14a… noreply 1730
ffef14a… noreply 1731 src = HackerNewsSource(item_id=12345)
ffef14a… noreply 1732 file = src.list_videos()[0]
ffef14a… noreply 1733 dest = tmp_path / "hn.txt"
ffef14a… noreply 1734 result = src.download(file, dest)
ffef14a… noreply 1735 assert result.exists()
ffef14a… noreply 1736 assert "Story" in result.read_text()
ffef14a… noreply 1737
ffef14a… noreply 1738 @patch("requests.get")
ffef14a… noreply 1739 def test_max_comments(self, mock_get):
ffef14a… noreply 1740 from video_processor.sources.hackernews_source import HackerNewsSource
ffef14a… noreply 1741
ffef14a… noreply 1742 story = {"title": "Big", "by": "u", "score": 1, "kids": list(range(100, 110))}
ffef14a… noreply 1743 comment = {"by": "c", "text": "hi", "kids": []}
ffef14a… noreply 1744
ffef14a… noreply 1745 def side_effect(url, timeout=10):
ffef14a… noreply 1746 resp = MagicMock()
ffef14a… noreply 1747 resp.raise_for_status = MagicMock()
ffef14a… noreply 1748 if "/12345.json" in url:
ffef14a… noreply 1749 resp.json.return_value = story
ffef14a… noreply 1750 else:
ffef14a… noreply 1751 resp.json.return_value = comment
ffef14a… noreply 1752 return resp
ffef14a… noreply 1753
ffef14a… noreply 1754 mock_get.side_effect = side_effect
ffef14a… noreply 1755
ffef14a… noreply 1756 src = HackerNewsSource(item_id=12345, max_comments=3)
ffef14a… noreply 1757 text = src.fetch_text()
ffef14a… noreply 1758 assert text.count("**c**") == 3
ffef14a… noreply 1759
ffef14a… noreply 1760 @patch("requests.get")
ffef14a… noreply 1761 def test_deleted_comments_skipped(self, mock_get):
ffef14a… noreply 1762 from video_processor.sources.hackernews_source import HackerNewsSource
ffef14a… noreply 1763
ffef14a… noreply 1764 story = {"title": "Story", "by": "u", "score": 1, "kids": [200, 201]}
ffef14a… noreply 1765
ffef14a… noreply 1766 def side_effect(url, timeout=10):
ffef14a… noreply 1767 resp = MagicMock()
ffef14a… noreply 1768 resp.raise_for_status = MagicMock()
ffef14a… noreply 1769 if "/12345.json" in url:
ffef14a… noreply 1770 resp.json.return_value = story
ffef14a… noreply 1771 elif "/200.json" in url:
ffef14a… noreply 1772 resp.json.return_value = {"deleted": True}
ffef14a… noreply 1773 elif "/201.json" in url:
ffef14a… noreply 1774 resp.json.return_value = {"by": "alive", "text": "here", "dead": False}
ffef14a… noreply 1775 return resp
ffef14a… noreply 1776
ffef14a… noreply 1777 mock_get.side_effect = side_effect
ffef14a… noreply 1778
ffef14a… noreply 1779 src = HackerNewsSource(item_id=12345)
ffef14a… noreply 1780 text = src.fetch_text()
ffef14a… noreply 1781 assert "alive" in text
ffef14a… noreply 1782 assert text.count("**") == 2 # only the alive comment
ffef14a… noreply 1783
ffef14a… noreply 1784
ffef14a… noreply 1785 class TestRedditSourceDownload:
ffef14a… noreply 1786 @patch("requests.get")
ffef14a… noreply 1787 def test_download(self, mock_get, tmp_path):
ffef14a… noreply 1788 from video_processor.sources.reddit_source import RedditSource
ffef14a… noreply 1789
ffef14a… noreply 1790 mock_get.return_value = MagicMock(status_code=200)
ffef14a… noreply 1791 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1792 mock_get.return_value.json.return_value = [
ffef14a… noreply 1793 {"data": {"children": [{"data": {"title": "Post", "author": "u", "score": 1}}]}},
ffef14a… noreply 1794 {"data": {"children": []}},
ffef14a… noreply 1795 ]
ffef14a… noreply 1796
ffef14a… noreply 1797 src = RedditSource(url="https://reddit.com/r/test/comments/abc/post")
ffef14a… noreply 1798 file = src.list_videos()[0]
ffef14a… noreply 1799 dest = tmp_path / "reddit.txt"
ffef14a… noreply 1800 result = src.download(file, dest)
ffef14a… noreply 1801 assert result.exists()
ffef14a… noreply 1802 assert "Post" in result.read_text()
ffef14a… noreply 1803
ffef14a… noreply 1804
ffef14a… noreply 1805 class TestArxivSourceDownload:
ffef14a… noreply 1806 @patch("requests.get")
ffef14a… noreply 1807 def test_download_metadata(self, mock_get, tmp_path):
ffef14a… noreply 1808 from video_processor.sources.arxiv_source import ArxivSource
ffef14a… noreply 1809
ffef14a… noreply 1810 xml = """<?xml version="1.0"?>
ffef14a… noreply 1811 <feed xmlns="http://www.w3.org/2005/Atom">
ffef14a… noreply 1812 <entry>
ffef14a… noreply 1813 <title>Paper Title</title>
ffef14a… noreply 1814 <summary>Abstract text</summary>
ffef14a… noreply 1815 <author><name>Alice</name></author>
ffef14a… noreply 1816 <published>2023-01-01</published>
ffef14a… noreply 1817 </entry>
ffef14a… noreply 1818 </feed>"""
ffef14a… noreply 1819
ffef14a… noreply 1820 mock_get.return_value = MagicMock(text=xml, status_code=200)
ffef14a… noreply 1821 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1822
ffef14a… noreply 1823 src = ArxivSource("2301.12345")
ffef14a… noreply 1824 files = src.list_videos()
ffef14a… noreply 1825 meta = [f for f in files if f.id.startswith("meta:")][0]
ffef14a… noreply 1826 dest = tmp_path / "paper.txt"
ffef14a… noreply 1827 result = src.download(meta, dest)
ffef14a… noreply 1828 assert result.exists()
ffef14a… noreply 1829 content = result.read_text()
ffef14a… noreply 1830 assert "Paper Title" in content
ffef14a… noreply 1831 assert "Alice" in content
ffef14a… noreply 1832 assert "Abstract text" in content
ffef14a… noreply 1833
ffef14a… noreply 1834
ffef14a… noreply 1835 class TestPodcastSourceDownload:
ffef14a… noreply 1836 @patch("requests.get")
ffef14a… noreply 1837 def test_max_episodes(self, mock_get):
ffef14a… noreply 1838 from video_processor.sources.podcast_source import PodcastSource
ffef14a… noreply 1839
ffef14a… noreply 1840 items = "".join(
ffef14a… noreply 1841 f"<item><title>Ep {i}</title>"
ffef14a… noreply 1842 f'<enclosure url="https://example.com/ep{i}.mp3" type="audio/mpeg"/></item>'
ffef14a… noreply 1843 for i in range(20)
ffef14a… noreply 1844 )
ffef14a… noreply 1845 xml = f"<rss><channel>{items}</channel></rss>"
ffef14a… noreply 1846
ffef14a… noreply 1847 mock_get.return_value = MagicMock(text=xml, status_code=200)
ffef14a… noreply 1848 mock_get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1849
ffef14a… noreply 1850 src = PodcastSource(feed_url="https://example.com/feed.xml", max_episodes=5)
ffef14a… noreply 1851 with patch.dict("sys.modules", {"feedparser": None}):
ffef14a… noreply 1852 files = src.list_videos()
ffef14a… noreply 1853 assert len(files) == 5
ffef14a… noreply 1854
ffef14a… noreply 1855
ffef14a… noreply 1856 # ---------------------------------------------------------------------------
ffef14a… noreply 1857 # Auth edge cases
ffef14a… noreply 1858 # ---------------------------------------------------------------------------
ffef14a… noreply 1859
ffef14a… noreply 1860
ffef14a… noreply 1861 class TestZoomSourceAuth:
ffef14a… noreply 1862 def test_saved_token_valid(self, tmp_path):
ffef14a… noreply 1863 import time
ffef14a… noreply 1864
ffef14a… noreply 1865 from video_processor.sources.zoom_source import ZoomSource
ffef14a… noreply 1866
ffef14a… noreply 1867 token_path = tmp_path / "token.json"
ffef14a… noreply 1868
ffef14a… noreply 1869 token_path.write_text(
ffef14a… noreply 1870 json.dumps({"access_token": "valid", "expires_at": time.time() + 3600})
ffef14a… noreply 1871 )
ffef14a… noreply 1872 src = ZoomSource(token_path=token_path)
ffef14a… noreply 1873 assert src._auth_saved_token() is True
ffef14a… noreply 1874 assert src._access_token == "valid"
ffef14a… noreply 1875
ffef14a… noreply 1876 def test_saved_token_expired_no_refresh(self, tmp_path):
ffef14a… noreply 1877 from video_processor.sources.zoom_source import ZoomSource
ffef14a… noreply 1878
ffef14a… noreply 1879 token_path = tmp_path / "token.json"
ffef14a… noreply 1880 token_path.write_text(json.dumps({"access_token": "old", "expires_at": 0}))
ffef14a… noreply 1881 src = ZoomSource(token_path=token_path)
ffef14a… noreply 1882 assert src._auth_saved_token() is False
ffef14a… noreply 1883
ffef14a… noreply 1884 @patch("video_processor.sources.zoom_source.requests")
ffef14a… noreply 1885 def test_server_to_server_success(self, mock_requests, tmp_path):
ffef14a… noreply 1886 from video_processor.sources.zoom_source import ZoomSource
ffef14a… noreply 1887
ffef14a… noreply 1888 mock_requests.post.return_value = MagicMock(status_code=200)
ffef14a… noreply 1889 mock_requests.post.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1890 mock_requests.post.return_value.json.return_value = {
ffef14a… noreply 1891 "access_token": "s2s_tok",
ffef14a… noreply 1892 "expires_in": 3600,
ffef14a… noreply 1893 }
ffef14a… noreply 1894
ffef14a… noreply 1895 src = ZoomSource(
ffef14a… noreply 1896 client_id="cid",
ffef14a… noreply 1897 client_secret="csec",
ffef14a… noreply 1898 account_id="aid",
ffef14a… noreply 1899 token_path=tmp_path / "token.json",
ffef14a… noreply 1900 )
ffef14a… noreply 1901 assert src._auth_server_to_server() is True
ffef14a… noreply 1902 assert src._access_token == "s2s_tok"
ffef14a… noreply 1903
ffef14a… noreply 1904 def test_server_to_server_no_creds(self):
ffef14a… noreply 1905 from video_processor.sources.zoom_source import ZoomSource
ffef14a… noreply 1906
ffef14a… noreply 1907 src = ZoomSource(account_id="aid")
ffef14a… noreply 1908 assert src._auth_server_to_server() is False
ffef14a… noreply 1909
ffef14a… noreply 1910 def test_download_no_url_raises(self):
ffef14a… noreply 1911 from video_processor.sources.zoom_source import ZoomSource
ffef14a… noreply 1912
ffef14a… noreply 1913 src = ZoomSource()
ffef14a… noreply 1914 src._access_token = "tok"
ffef14a… noreply 1915 file = SourceFile(name="meeting.mp4", id="123")
ffef14a… noreply 1916 with pytest.raises(ValueError, match="No download URL"):
ffef14a… noreply 1917 src.download(file, Path("/tmp/out.mp4"))
ffef14a… noreply 1918
ffef14a… noreply 1919
ffef14a… noreply 1920 class TestGoogleDriveSourceAuth:
ffef14a… noreply 1921 def test_is_service_account_true(self, tmp_path):
ffef14a… noreply 1922 from video_processor.sources.google_drive import GoogleDriveSource
ffef14a… noreply 1923
ffef14a… noreply 1924 creds = tmp_path / "sa.json"
ffef14a… noreply 1925 creds.write_text(json.dumps({"type": "service_account"}))
ffef14a… noreply 1926 src = GoogleDriveSource(credentials_path=str(creds))
ffef14a… noreply 1927 assert src._is_service_account() is True
ffef14a… noreply 1928
ffef14a… noreply 1929 def test_is_service_account_false(self, tmp_path):
ffef14a… noreply 1930 from video_processor.sources.google_drive import GoogleDriveSource
ffef14a… noreply 1931
ffef14a… noreply 1932 creds = tmp_path / "oauth.json"
ffef14a… noreply 1933 creds.write_text(json.dumps({"type": "authorized_user"}))
ffef14a… noreply 1934 src = GoogleDriveSource(credentials_path=str(creds))
ffef14a… noreply 1935 assert src._is_service_account() is False
ffef14a… noreply 1936
ffef14a… noreply 1937 def test_is_service_account_no_file(self):
ffef14a… noreply 1938 from video_processor.sources.google_drive import GoogleDriveSource
ffef14a… noreply 1939
ffef14a… noreply 1940 with patch.dict("os.environ", {}, clear=True):
ffef14a… noreply 1941 src = GoogleDriveSource(credentials_path=None)
ffef14a… noreply 1942 src.credentials_path = None
ffef14a… noreply 1943 assert src._is_service_account() is False
ffef14a… noreply 1944
ffef14a… noreply 1945 def test_download_not_authed(self):
ffef14a… noreply 1946 from video_processor.sources.google_drive import GoogleDriveSource
ffef14a… noreply 1947
ffef14a… noreply 1948 src = GoogleDriveSource()
ffef14a… noreply 1949 with pytest.raises(RuntimeError, match="Not authenticated"):
ffef14a… noreply 1950 src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
ffef14a… noreply 1951
ffef14a… noreply 1952
ffef14a… noreply 1953 class TestDropboxSourceAuth:
ffef14a… noreply 1954 def test_init_from_env(self):
ffef14a… noreply 1955 from video_processor.sources.dropbox_source import DropboxSource
ffef14a… noreply 1956
ffef14a… noreply 1957 with patch.dict(
ffef14a… noreply 1958 "os.environ",
ffef14a… noreply 1959 {"DROPBOX_ACCESS_TOKEN": "tok", "DROPBOX_APP_KEY": "key"},
ffef14a… noreply 1960 ):
ffef14a… noreply 1961 src = DropboxSource()
ffef14a… noreply 1962 assert src.access_token == "tok"
ffef14a… noreply 1963 assert src.app_key == "key"
ffef14a… noreply 1964
ffef14a… noreply 1965 def test_not_authed_list(self):
ffef14a… noreply 1966 from video_processor.sources.dropbox_source import DropboxSource
ffef14a… noreply 1967
ffef14a… noreply 1968 src = DropboxSource()
ffef14a… noreply 1969 with pytest.raises(RuntimeError, match="Not authenticated"):
ffef14a… noreply 1970 src.list_videos()
ffef14a… noreply 1971
ffef14a… noreply 1972 def test_not_authed_download(self):
ffef14a… noreply 1973 from video_processor.sources.dropbox_source import DropboxSource
ffef14a… noreply 1974
ffef14a… noreply 1975 src = DropboxSource()
ffef14a… noreply 1976 with pytest.raises(RuntimeError, match="Not authenticated"):
ffef14a… noreply 1977 src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
ffef14a… noreply 1978
ffef14a… noreply 1979
ffef14a… noreply 1980 class TestNotionSourceAuth:
ffef14a… noreply 1981 def test_no_token(self):
ffef14a… noreply 1982 from video_processor.sources.notion_source import NotionSource
ffef14a… noreply 1983
ffef14a… noreply 1984 with patch.dict("os.environ", {}, clear=True):
ffef14a… noreply 1985 src = NotionSource(token="")
ffef14a… noreply 1986 assert src.authenticate() is False
ffef14a… noreply 1987
ffef14a… noreply 1988 @patch("video_processor.sources.notion_source.requests")
ffef14a… noreply 1989 def test_auth_success(self, mock_requests):
ffef14a… noreply 1990 from video_processor.sources.notion_source import NotionSource
ffef14a… noreply 1991
ffef14a… noreply 1992 mock_requests.get.return_value = MagicMock(status_code=200)
ffef14a… noreply 1993 mock_requests.get.return_value.raise_for_status = MagicMock()
ffef14a… noreply 1994 mock_requests.get.return_value.json.return_value = {"name": "Bot"}
ffef14a… noreply 1995 mock_requests.RequestException = Exception
ffef14a… noreply 1996
ffef14a… noreply 1997 src = NotionSource(token="ntn_valid")
ffef14a… noreply 1998 assert src.authenticate() is True
ffef14a… noreply 1999
ffef14a… noreply 2000 @patch("video_processor.sources.notion_source.requests")
ffef14a… noreply 2001 def test_auth_failure(self, mock_requests):
ffef14a… noreply 2002 from video_processor.sources.notion_source import NotionSource
ffef14a… noreply 2003
ffef14a… noreply 2004 mock_requests.get.return_value.raise_for_status.side_effect = Exception("401")
ffef14a… noreply 2005 mock_requests.RequestException = Exception
ffef14a… noreply 2006
ffef14a… noreply 2007 src = NotionSource(token="ntn_bad")
ffef14a… noreply 2008 assert src.authenticate() is False
ffef14a… noreply 2009
ffef14a… noreply 2010 def test_extract_property_values(self):
ffef14a… noreply 2011 from video_processor.sources.notion_source import _extract_property_value
ffef14a… noreply 2012
ffef14a… noreply 2013 assert _extract_property_value({"type": "number", "number": 42}) == "42"
ffef14a… noreply 2014 assert _extract_property_value({"type": "number", "number": None}) == ""
ffef14a… noreply 2015 assert _extract_property_value({"type": "select", "select": {"name": "High"}}) == "High"
ffef14a… noreply 2016 assert _extract_property_value({"type": "select", "select": None}) == ""
ffef14a… noreply 2017 assert _extract_property_value({"type": "checkbox", "checkbox": True}) == "True"
ffef14a… noreply 2018 assert _extract_property_value({"type": "url", "url": "https://ex.com"}) == "https://ex.com"
ffef14a… noreply 2019 assert _extract_property_value({"type": "unknown"}) == ""
ffef14a… noreply 2020
ffef14a… noreply 2021
ffef14a… noreply 2022 class TestGitHubSourceAuth:
ffef14a… noreply 2023 def test_authenticate_no_token(self):
ffef14a… noreply 2024 from video_processor.sources.github_source import GitHubSource
ffef14a… noreply 2025
ffef14a… noreply 2026 src = GitHubSource(repo="owner/repo")
ffef14a… noreply 2027 with patch.dict("os.environ", {}, clear=True):
ffef14a… noreply 2028 with patch("subprocess.run", side_effect=FileNotFoundError):
ffef14a… noreply 2029 result = src.authenticate()
ffef14a… noreply 2030 assert result is True # works for public repos
ffef14a… noreply 2031
ffef14a… noreply 2032 @patch("requests.get")
ffef14a… noreply 2033 def test_list_excludes_pr_from_issues(self, mock_get):
ffef14a… noreply 2034 from video_processor.sources.github_source import GitHubSource
ffef14a… noreply 2035
ffef14a… noreply 2036 def side_effect(url, **kwargs):
ffef14a… noreply 2037 resp = MagicMock()
ffef14a… noreply 2038 resp.ok = True
ffef14a… noreply 2039 if "/readme" in url:
ffef14a… noreply 2040 resp.json.return_value = {}
ffef14a… noreply 2041 elif "/issues" in url:
ffef14a… noreply 2042 resp.json.return_value = [
ffef14a… noreply 2043 {"number": 1, "title": "Bug"},
ffef14a… noreply 2044 {"number": 2, "title": "PR as issue", "pull_request": {}},
ffef14a… noreply 2045 ]
ffef14a… noreply 2046 elif "/pulls" in url:
ffef14a… noreply 2047 resp.json.return_value = []
ffef14a… noreply 2048 return resp
ffef14a… noreply 2049
ffef14a… noreply 2050 mock_get.side_effect = side_effect
ffef14a… noreply 2051
ffef14a… noreply 2052 src = GitHubSource(repo="o/r")
ffef14a… noreply 2053 src.authenticate()
ffef14a… noreply 2054 files = src.list_videos()
ffef14a… noreply 2055 ids = [f.id for f in files]
ffef14a… noreply 2056 assert "issue:1" in ids
ffef14a… noreply 2057 assert "issue:2" not in ids # excluded because it has pull_request key
ffef14a… noreply 2058
ffef14a… noreply 2059
ffef14a… noreply 2060 class TestS3SourceErrors:
ffef14a… noreply 2061 def test_not_authed_list(self):
ffef14a… noreply 2062 from video_processor.sources.s3_source import S3Source
ffef14a… noreply 2063
ffef14a… noreply 2064 src = S3Source(bucket="test")
ffef14a… noreply 2065 with pytest.raises(RuntimeError, match="Not authenticated"):
ffef14a… noreply 2066 src.list_videos()
ffef14a… noreply 2067
ffef14a… noreply 2068 def test_not_authed_download(self):
ffef14a… noreply 2069 from video_processor.sources.s3_source import S3Source
ffef14a… noreply 2070
ffef14a… noreply 2071 src = S3Source(bucket="test")
ffef14a… noreply 2072 with pytest.raises(RuntimeError, match="Not authenticated"):
ffef14a… noreply 2073 src.download(SourceFile(name="x", id="x"), Path("/tmp/x"))

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button