PlanOpticon

Merge pull request #117 from ConflictHQ/test/source-connector-tests test: source connector download and auth tests

noreply 2026-03-08 00:54 trunk merge
Commit ffef14a510103b189b9d7710cbe46205cea6c3a5d2c6a834cacf155d1e74aa0f
1 file changed +502 -1
--- tests/test_sources.py
+++ tests/test_sources.py
@@ -1,13 +1,15 @@
11
"""Tests for all source connectors: import, instantiation, authenticate, list_videos."""
22
3
+import json
34
import os
5
+from pathlib import Path
46
from unittest.mock import MagicMock, patch
57
68
import pytest
79
8
-from video_processor.sources.base import SourceFile
10
+from video_processor.sources.base import BaseSource, SourceFile
911
1012
# ---------------------------------------------------------------------------
1113
# SourceFile model
1214
# ---------------------------------------------------------------------------
1315
@@ -1568,5 +1570,504 @@
15681570
15691571
def test_zoom_lazy_import(self):
15701572
from video_processor.sources import ZoomSource
15711573
15721574
assert ZoomSource is not None
1575
+
1576
+ def test_invalid_lazy_import(self):
1577
+ from video_processor import sources
1578
+
1579
+ with pytest.raises(AttributeError):
1580
+ _ = sources.NonexistentSource
1581
+
1582
+
1583
+# ---------------------------------------------------------------------------
1584
+# BaseSource.download_all
1585
+# ---------------------------------------------------------------------------
1586
+
1587
+
1588
+class TestBaseSourceDownloadAll:
1589
+ def test_download_all_success(self, tmp_path):
1590
+ """download_all should download all files using path when available."""
1591
+
1592
+ class FakeSource(BaseSource):
1593
+ def authenticate(self):
1594
+ return True
1595
+
1596
+ def list_videos(self, **kwargs):
1597
+ return []
1598
+
1599
+ def download(self, file, destination):
1600
+ destination.parent.mkdir(parents=True, exist_ok=True)
1601
+ destination.write_text(f"content:{file.name}")
1602
+ return destination
1603
+
1604
+ src = FakeSource()
1605
+ files = [
1606
+ SourceFile(name="a.mp4", id="1"),
1607
+ SourceFile(name="b.mp4", id="2", path="subdir/b.mp4"),
1608
+ ]
1609
+ paths = src.download_all(files, tmp_path)
1610
+ assert len(paths) == 2
1611
+ assert (tmp_path / "a.mp4").read_text() == "content:a.mp4"
1612
+ assert (tmp_path / "subdir" / "b.mp4").read_text() == "content:b.mp4"
1613
+
1614
+ def test_download_all_partial_failure(self, tmp_path):
1615
+ """download_all should continue past failures and return successful paths."""
1616
+
1617
+ class PartialFail(BaseSource):
1618
+ def authenticate(self):
1619
+ return True
1620
+
1621
+ def list_videos(self, **kwargs):
1622
+ return []
1623
+
1624
+ def download(self, file, destination):
1625
+ if file.id == "bad":
1626
+ raise RuntimeError("download failed")
1627
+ destination.parent.mkdir(parents=True, exist_ok=True)
1628
+ destination.write_text("ok")
1629
+ return destination
1630
+
1631
+ src = PartialFail()
1632
+ files = [
1633
+ SourceFile(name="good.mp4", id="good"),
1634
+ SourceFile(name="bad.mp4", id="bad"),
1635
+ SourceFile(name="also_good.mp4", id="good2"),
1636
+ ]
1637
+ paths = src.download_all(files, tmp_path)
1638
+ assert len(paths) == 2
1639
+
1640
+
1641
+# ---------------------------------------------------------------------------
1642
+# Download & error handling tests
1643
+# ---------------------------------------------------------------------------
1644
+
1645
+
1646
+class TestRSSSourceDownload:
1647
+ @patch("requests.get")
1648
+ def test_download_entry(self, mock_get, tmp_path):
1649
+ from video_processor.sources.rss_source import RSSSource
1650
+
1651
+ xml = (
1652
+ "<rss><channel><item><title>Post 1</title>"
1653
+ "<link>https://example.com/1</link>"
1654
+ "<description>Summary here</description>"
1655
+ "<pubDate>Mon, 01 Jan 2025</pubDate></item></channel></rss>"
1656
+ )
1657
+ mock_get.return_value = MagicMock(text=xml, status_code=200)
1658
+ mock_get.return_value.raise_for_status = MagicMock()
1659
+
1660
+ src = RSSSource(url="https://example.com/feed.xml")
1661
+ with patch.dict("sys.modules", {"feedparser": None}):
1662
+ files = src.list_videos()
1663
+ assert len(files) == 1
1664
+
1665
+ dest = tmp_path / "entry.txt"
1666
+ result = src.download(files[0], dest)
1667
+ assert result.exists()
1668
+ content = result.read_text()
1669
+ assert "Post 1" in content
1670
+ assert "Summary here" in content
1671
+
1672
+ @patch("requests.get")
1673
+ def test_download_not_found(self, mock_get, tmp_path):
1674
+ from video_processor.sources.rss_source import RSSSource
1675
+
1676
+ xml = "<rss><channel></channel></rss>"
1677
+ mock_get.return_value = MagicMock(text=xml, status_code=200)
1678
+ mock_get.return_value.raise_for_status = MagicMock()
1679
+
1680
+ src = RSSSource(url="https://example.com/feed.xml")
1681
+ with patch.dict("sys.modules", {"feedparser": None}):
1682
+ src.list_videos()
1683
+
1684
+ fake = SourceFile(name="missing", id="nonexistent")
1685
+ with pytest.raises(ValueError, match="Entry not found"):
1686
+ src.download(fake, tmp_path / "out.txt")
1687
+
1688
+
1689
+class TestWebSourceDownload:
1690
+ @patch("requests.get")
1691
+ def test_download_saves_text(self, mock_get, tmp_path):
1692
+ from video_processor.sources.web_source import WebSource
1693
+
1694
+ mock_get.return_value = MagicMock(
1695
+ text="<html><body><p>Page content</p></body></html>", status_code=200
1696
+ )
1697
+ mock_get.return_value.raise_for_status = MagicMock()
1698
+
1699
+ src = WebSource(url="https://example.com/page")
1700
+ with patch.dict("sys.modules", {"bs4": None}):
1701
+ file = src.list_videos()[0]
1702
+ dest = tmp_path / "page.txt"
1703
+ result = src.download(file, dest)
1704
+ assert result.exists()
1705
+ assert "Page content" in result.read_text()
1706
+
1707
+ def test_strip_html_tags(self):
1708
+ from video_processor.sources.web_source import _strip_html_tags
1709
+
1710
+ html = "<p>Hello</p><script>evil()</script><style>.x{}</style>"
1711
+ text = _strip_html_tags(html)
1712
+ assert "Hello" in text
1713
+ assert "evil" not in text
1714
+
1715
+
1716
+class TestHackerNewsSourceDownload:
1717
+ @patch("requests.get")
1718
+ def test_download(self, mock_get, tmp_path):
1719
+ from video_processor.sources.hackernews_source import HackerNewsSource
1720
+
1721
+ story = {"title": "Story", "by": "user", "score": 1, "kids": []}
1722
+
1723
+ def side_effect(url, timeout=10):
1724
+ resp = MagicMock()
1725
+ resp.raise_for_status = MagicMock()
1726
+ resp.json.return_value = story
1727
+ return resp
1728
+
1729
+ mock_get.side_effect = side_effect
1730
+
1731
+ src = HackerNewsSource(item_id=12345)
1732
+ file = src.list_videos()[0]
1733
+ dest = tmp_path / "hn.txt"
1734
+ result = src.download(file, dest)
1735
+ assert result.exists()
1736
+ assert "Story" in result.read_text()
1737
+
1738
+ @patch("requests.get")
1739
+ def test_max_comments(self, mock_get):
1740
+ from video_processor.sources.hackernews_source import HackerNewsSource
1741
+
1742
+ story = {"title": "Big", "by": "u", "score": 1, "kids": list(range(100, 110))}
1743
+ comment = {"by": "c", "text": "hi", "kids": []}
1744
+
1745
+ def side_effect(url, timeout=10):
1746
+ resp = MagicMock()
1747
+ resp.raise_for_status = MagicMock()
1748
+ if "/12345.json" in url:
1749
+ resp.json.return_value = story
1750
+ else:
1751
+ resp.json.return_value = comment
1752
+ return resp
1753
+
1754
+ mock_get.side_effect = side_effect
1755
+
1756
+ src = HackerNewsSource(item_id=12345, max_comments=3)
1757
+ text = src.fetch_text()
1758
+ assert text.count("**c**") == 3
1759
+
1760
+ @patch("requests.get")
1761
+ def test_deleted_comments_skipped(self, mock_get):
1762
+ from video_processor.sources.hackernews_source import HackerNewsSource
1763
+
1764
+ story = {"title": "Story", "by": "u", "score": 1, "kids": [200, 201]}
1765
+
1766
+ def side_effect(url, timeout=10):
1767
+ resp = MagicMock()
1768
+ resp.raise_for_status = MagicMock()
1769
+ if "/12345.json" in url:
1770
+ resp.json.return_value = story
1771
+ elif "/200.json" in url:
1772
+ resp.json.return_value = {"deleted": True}
1773
+ elif "/201.json" in url:
1774
+ resp.json.return_value = {"by": "alive", "text": "here", "dead": False}
1775
+ return resp
1776
+
1777
+ mock_get.side_effect = side_effect
1778
+
1779
+ src = HackerNewsSource(item_id=12345)
1780
+ text = src.fetch_text()
1781
+ assert "alive" in text
1782
+ assert text.count("**") == 2 # only the alive comment
1783
+
1784
+
1785
+class TestRedditSourceDownload:
1786
+ @patch("requests.get")
1787
+ def test_download(self, mock_get, tmp_path):
1788
+ from video_processor.sources.reddit_source import RedditSource
1789
+
1790
+ mock_get.return_value = MagicMock(status_code=200)
1791
+ mock_get.return_value.raise_for_status = MagicMock()
1792
+ mock_get.return_value.json.return_value = [
1793
+ {"data": {"children": [{"data": {"title": "Post", "author": "u", "score": 1}}]}},
1794
+ {"data": {"children": []}},
1795
+ ]
1796
+
1797
+ src = RedditSource(url="https://reddit.com/r/test/comments/abc/post")
1798
+ file = src.list_videos()[0]
1799
+ dest = tmp_path / "reddit.txt"
1800
+ result = src.download(file, dest)
1801
+ assert result.exists()
1802
+ assert "Post" in result.read_text()
1803
+
1804
+
1805
+class TestArxivSourceDownload:
1806
+ @patch("requests.get")
1807
+ def test_download_metadata(self, mock_get, tmp_path):
1808
+ from video_processor.sources.arxiv_source import ArxivSource
1809
+
1810
+ xml = """<?xml version="1.0"?>
1811
+ <feed xmlns="http://www.w3.org/2005/Atom">
1812
+ <entry>
1813
+ <title>Paper Title</title>
1814
+ <summary>Abstract text</summary>
1815
+ <author><name>Alice</name></author>
1816
+ <published>2023-01-01</published>
1817
+ </entry>
1818
+ </feed>"""
1819
+
1820
+ mock_get.return_value = MagicMock(text=xml, status_code=200)
1821
+ mock_get.return_value.raise_for_status = MagicMock()
1822
+
1823
+ src = ArxivSource("2301.12345")
1824
+ files = src.list_videos()
1825
+ meta = [f for f in files if f.id.startswith("meta:")][0]
1826
+ dest = tmp_path / "paper.txt"
1827
+ result = src.download(meta, dest)
1828
+ assert result.exists()
1829
+ content = result.read_text()
1830
+ assert "Paper Title" in content
1831
+ assert "Alice" in content
1832
+ assert "Abstract text" in content
1833
+
1834
+
1835
+class TestPodcastSourceDownload:
1836
+ @patch("requests.get")
1837
+ def test_max_episodes(self, mock_get):
1838
+ from video_processor.sources.podcast_source import PodcastSource
1839
+
1840
+ items = "".join(
1841
+ f"<item><title>Ep {i}</title>"
1842
+ f'<enclosure url="https://example.com/ep{i}.mp3" type="audio/mpeg"/></item>'
1843
+ for i in range(20)
1844
+ )
1845
+ xml = f"<rss><channel>{items}</channel></rss>"
1846
+
1847
+ mock_get.return_value = MagicMock(text=xml, status_code=200)
1848
+ mock_get.return_value.raise_for_status = MagicMock()
1849
+
1850
+ src = PodcastSource(feed_url="https://example.com/feed.xml", max_episodes=5)
1851
+ with patch.dict("sys.modules", {"feedparser": None}):
1852
+ files = src.list_videos()
1853
+ assert len(files) == 5
1854
+
1855
+
1856
+# ---------------------------------------------------------------------------
1857
+# Auth edge cases
1858
+# ---------------------------------------------------------------------------
1859
+
1860
+
1861
+class TestZoomSourceAuth:
1862
+ def test_saved_token_valid(self, tmp_path):
1863
+ import time
1864
+
1865
+ from video_processor.sources.zoom_source import ZoomSource
1866
+
1867
+ token_path = tmp_path / "token.json"
1868
+
1869
+ token_path.write_text(
1870
+ json.dumps({"access_token": "valid", "expires_at": time.time() + 3600})
1871
+ )
1872
+ src = ZoomSource(token_path=token_path)
1873
+ assert src._auth_saved_token() is True
1874
+ assert src._access_token == "valid"
1875
+
1876
+ def test_saved_token_expired_no_refresh(self, tmp_path):
1877
+ from video_processor.sources.zoom_source import ZoomSource
1878
+
1879
+ token_path = tmp_path / "token.json"
1880
+ token_path.write_text(json.dumps({"access_token": "old", "expires_at": 0}))
1881
+ src = ZoomSource(token_path=token_path)
1882
+ assert src._auth_saved_token() is False
1883
+
1884
+ @patch("video_processor.sources.zoom_source.requests")
1885
+ def test_server_to_server_success(self, mock_requests, tmp_path):
1886
+ from video_processor.sources.zoom_source import ZoomSource
1887
+
1888
+ mock_requests.post.return_value = MagicMock(status_code=200)
1889
+ mock_requests.post.return_value.raise_for_status = MagicMock()
1890
+ mock_requests.post.return_value.json.return_value = {
1891
+ "access_token": "s2s_tok",
1892
+ "expires_in": 3600,
1893
+ }
1894
+
1895
+ src = ZoomSource(
1896
+ client_id="cid",
1897
+ client_secret="csec",
1898
+ account_id="aid",
1899
+ token_path=tmp_path / "token.json",
1900
+ )
1901
+ assert src._auth_server_to_server() is True
1902
+ assert src._access_token == "s2s_tok"
1903
+
1904
+ def test_server_to_server_no_creds(self):
1905
+ from video_processor.sources.zoom_source import ZoomSource
1906
+
1907
+ src = ZoomSource(account_id="aid")
1908
+ assert src._auth_server_to_server() is False
1909
+
1910
+ def test_download_no_url_raises(self):
1911
+ from video_processor.sources.zoom_source import ZoomSource
1912
+
1913
+ src = ZoomSource()
1914
+ src._access_token = "tok"
1915
+ file = SourceFile(name="meeting.mp4", id="123")
1916
+ with pytest.raises(ValueError, match="No download URL"):
1917
+ src.download(file, Path("/tmp/out.mp4"))
1918
+
1919
+
1920
+class TestGoogleDriveSourceAuth:
1921
+ def test_is_service_account_true(self, tmp_path):
1922
+ from video_processor.sources.google_drive import GoogleDriveSource
1923
+
1924
+ creds = tmp_path / "sa.json"
1925
+ creds.write_text(json.dumps({"type": "service_account"}))
1926
+ src = GoogleDriveSource(credentials_path=str(creds))
1927
+ assert src._is_service_account() is True
1928
+
1929
+ def test_is_service_account_false(self, tmp_path):
1930
+ from video_processor.sources.google_drive import GoogleDriveSource
1931
+
1932
+ creds = tmp_path / "oauth.json"
1933
+ creds.write_text(json.dumps({"type": "authorized_user"}))
1934
+ src = GoogleDriveSource(credentials_path=str(creds))
1935
+ assert src._is_service_account() is False
1936
+
1937
+ def test_is_service_account_no_file(self):
1938
+ from video_processor.sources.google_drive import GoogleDriveSource
1939
+
1940
+ with patch.dict("os.environ", {}, clear=True):
1941
+ src = GoogleDriveSource(credentials_path=None)
1942
+ src.credentials_path = None
1943
+ assert src._is_service_account() is False
1944
+
1945
+ def test_download_not_authed(self):
1946
+ from video_processor.sources.google_drive import GoogleDriveSource
1947
+
1948
+ src = GoogleDriveSource()
1949
+ with pytest.raises(RuntimeError, match="Not authenticated"):
1950
+ src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1951
+
1952
+
1953
+class TestDropboxSourceAuth:
1954
+ def test_init_from_env(self):
1955
+ from video_processor.sources.dropbox_source import DropboxSource
1956
+
1957
+ with patch.dict(
1958
+ "os.environ",
1959
+ {"DROPBOX_ACCESS_TOKEN": "tok", "DROPBOX_APP_KEY": "key"},
1960
+ ):
1961
+ src = DropboxSource()
1962
+ assert src.access_token == "tok"
1963
+ assert src.app_key == "key"
1964
+
1965
+ def test_not_authed_list(self):
1966
+ from video_processor.sources.dropbox_source import DropboxSource
1967
+
1968
+ src = DropboxSource()
1969
+ with pytest.raises(RuntimeError, match="Not authenticated"):
1970
+ src.list_videos()
1971
+
1972
+ def test_not_authed_download(self):
1973
+ from video_processor.sources.dropbox_source import DropboxSource
1974
+
1975
+ src = DropboxSource()
1976
+ with pytest.raises(RuntimeError, match="Not authenticated"):
1977
+ src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1978
+
1979
+
1980
+class TestNotionSourceAuth:
1981
+ def test_no_token(self):
1982
+ from video_processor.sources.notion_source import NotionSource
1983
+
1984
+ with patch.dict("os.environ", {}, clear=True):
1985
+ src = NotionSource(token="")
1986
+ assert src.authenticate() is False
1987
+
1988
+ @patch("video_processor.sources.notion_source.requests")
1989
+ def test_auth_success(self, mock_requests):
1990
+ from video_processor.sources.notion_source import NotionSource
1991
+
1992
+ mock_requests.get.return_value = MagicMock(status_code=200)
1993
+ mock_requests.get.return_value.raise_for_status = MagicMock()
1994
+ mock_requests.get.return_value.json.return_value = {"name": "Bot"}
1995
+ mock_requests.RequestException = Exception
1996
+
1997
+ src = NotionSource(token="ntn_valid")
1998
+ assert src.authenticate() is True
1999
+
2000
+ @patch("video_processor.sources.notion_source.requests")
2001
+ def test_auth_failure(self, mock_requests):
2002
+ from video_processor.sources.notion_source import NotionSource
2003
+
2004
+ mock_requests.get.return_value.raise_for_status.side_effect = Exception("401")
2005
+ mock_requests.RequestException = Exception
2006
+
2007
+ src = NotionSource(token="ntn_bad")
2008
+ assert src.authenticate() is False
2009
+
2010
+ def test_extract_property_values(self):
2011
+ from video_processor.sources.notion_source import _extract_property_value
2012
+
2013
+ assert _extract_property_value({"type": "number", "number": 42}) == "42"
2014
+ assert _extract_property_value({"type": "number", "number": None}) == ""
2015
+ assert _extract_property_value({"type": "select", "select": {"name": "High"}}) == "High"
2016
+ assert _extract_property_value({"type": "select", "select": None}) == ""
2017
+ assert _extract_property_value({"type": "checkbox", "checkbox": True}) == "True"
2018
+ assert _extract_property_value({"type": "url", "url": "https://ex.com"}) == "https://ex.com"
2019
+ assert _extract_property_value({"type": "unknown"}) == ""
2020
+
2021
+
2022
+class TestGitHubSourceAuth:
2023
+ def test_authenticate_no_token(self):
2024
+ from video_processor.sources.github_source import GitHubSource
2025
+
2026
+ src = GitHubSource(repo="owner/repo")
2027
+ with patch.dict("os.environ", {}, clear=True):
2028
+ with patch("subprocess.run", side_effect=FileNotFoundError):
2029
+ result = src.authenticate()
2030
+ assert result is True # works for public repos
2031
+
2032
+ @patch("requests.get")
2033
+ def test_list_excludes_pr_from_issues(self, mock_get):
2034
+ from video_processor.sources.github_source import GitHubSource
2035
+
2036
+ def side_effect(url, **kwargs):
2037
+ resp = MagicMock()
2038
+ resp.ok = True
2039
+ if "/readme" in url:
2040
+ resp.json.return_value = {}
2041
+ elif "/issues" in url:
2042
+ resp.json.return_value = [
2043
+ {"number": 1, "title": "Bug"},
2044
+ {"number": 2, "title": "PR as issue", "pull_request": {}},
2045
+ ]
2046
+ elif "/pulls" in url:
2047
+ resp.json.return_value = []
2048
+ return resp
2049
+
2050
+ mock_get.side_effect = side_effect
2051
+
2052
+ src = GitHubSource(repo="o/r")
2053
+ src.authenticate()
2054
+ files = src.list_videos()
2055
+ ids = [f.id for f in files]
2056
+ assert "issue:1" in ids
2057
+ assert "issue:2" not in ids # excluded because it has pull_request key
2058
+
2059
+
2060
+class TestS3SourceErrors:
2061
+ def test_not_authed_list(self):
2062
+ from video_processor.sources.s3_source import S3Source
2063
+
2064
+ src = S3Source(bucket="test")
2065
+ with pytest.raises(RuntimeError, match="Not authenticated"):
2066
+ src.list_videos()
2067
+
2068
+ def test_not_authed_download(self):
2069
+ from video_processor.sources.s3_source import S3Source
2070
+
2071
+ src = S3Source(bucket="test")
2072
+ with pytest.raises(RuntimeError, match="Not authenticated"):
2073
+ src.download(SourceFile(name="x", id="x"), Path("/tmp/x"))
15732074
--- tests/test_sources.py
+++ tests/test_sources.py
@@ -1,13 +1,15 @@
1 """Tests for all source connectors: import, instantiation, authenticate, list_videos."""
2
 
3 import os
 
4 from unittest.mock import MagicMock, patch
5
6 import pytest
7
8 from video_processor.sources.base import SourceFile
9
10 # ---------------------------------------------------------------------------
11 # SourceFile model
12 # ---------------------------------------------------------------------------
13
@@ -1568,5 +1570,504 @@
1568
1569 def test_zoom_lazy_import(self):
1570 from video_processor.sources import ZoomSource
1571
1572 assert ZoomSource is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1573
--- tests/test_sources.py
+++ tests/test_sources.py
@@ -1,13 +1,15 @@
1 """Tests for all source connectors: import, instantiation, authenticate, list_videos."""
2
3 import json
4 import os
5 from pathlib import Path
6 from unittest.mock import MagicMock, patch
7
8 import pytest
9
10 from video_processor.sources.base import BaseSource, SourceFile
11
12 # ---------------------------------------------------------------------------
13 # SourceFile model
14 # ---------------------------------------------------------------------------
15
@@ -1568,5 +1570,504 @@
1570
1571 def test_zoom_lazy_import(self):
1572 from video_processor.sources import ZoomSource
1573
1574 assert ZoomSource is not None
1575
1576 def test_invalid_lazy_import(self):
1577 from video_processor import sources
1578
1579 with pytest.raises(AttributeError):
1580 _ = sources.NonexistentSource
1581
1582
1583 # ---------------------------------------------------------------------------
1584 # BaseSource.download_all
1585 # ---------------------------------------------------------------------------
1586
1587
1588 class TestBaseSourceDownloadAll:
1589 def test_download_all_success(self, tmp_path):
1590 """download_all should download all files using path when available."""
1591
1592 class FakeSource(BaseSource):
1593 def authenticate(self):
1594 return True
1595
1596 def list_videos(self, **kwargs):
1597 return []
1598
1599 def download(self, file, destination):
1600 destination.parent.mkdir(parents=True, exist_ok=True)
1601 destination.write_text(f"content:{file.name}")
1602 return destination
1603
1604 src = FakeSource()
1605 files = [
1606 SourceFile(name="a.mp4", id="1"),
1607 SourceFile(name="b.mp4", id="2", path="subdir/b.mp4"),
1608 ]
1609 paths = src.download_all(files, tmp_path)
1610 assert len(paths) == 2
1611 assert (tmp_path / "a.mp4").read_text() == "content:a.mp4"
1612 assert (tmp_path / "subdir" / "b.mp4").read_text() == "content:b.mp4"
1613
1614 def test_download_all_partial_failure(self, tmp_path):
1615 """download_all should continue past failures and return successful paths."""
1616
1617 class PartialFail(BaseSource):
1618 def authenticate(self):
1619 return True
1620
1621 def list_videos(self, **kwargs):
1622 return []
1623
1624 def download(self, file, destination):
1625 if file.id == "bad":
1626 raise RuntimeError("download failed")
1627 destination.parent.mkdir(parents=True, exist_ok=True)
1628 destination.write_text("ok")
1629 return destination
1630
1631 src = PartialFail()
1632 files = [
1633 SourceFile(name="good.mp4", id="good"),
1634 SourceFile(name="bad.mp4", id="bad"),
1635 SourceFile(name="also_good.mp4", id="good2"),
1636 ]
1637 paths = src.download_all(files, tmp_path)
1638 assert len(paths) == 2
1639
1640
1641 # ---------------------------------------------------------------------------
1642 # Download & error handling tests
1643 # ---------------------------------------------------------------------------
1644
1645
1646 class TestRSSSourceDownload:
1647 @patch("requests.get")
1648 def test_download_entry(self, mock_get, tmp_path):
1649 from video_processor.sources.rss_source import RSSSource
1650
1651 xml = (
1652 "<rss><channel><item><title>Post 1</title>"
1653 "<link>https://example.com/1</link>"
1654 "<description>Summary here</description>"
1655 "<pubDate>Mon, 01 Jan 2025</pubDate></item></channel></rss>"
1656 )
1657 mock_get.return_value = MagicMock(text=xml, status_code=200)
1658 mock_get.return_value.raise_for_status = MagicMock()
1659
1660 src = RSSSource(url="https://example.com/feed.xml")
1661 with patch.dict("sys.modules", {"feedparser": None}):
1662 files = src.list_videos()
1663 assert len(files) == 1
1664
1665 dest = tmp_path / "entry.txt"
1666 result = src.download(files[0], dest)
1667 assert result.exists()
1668 content = result.read_text()
1669 assert "Post 1" in content
1670 assert "Summary here" in content
1671
1672 @patch("requests.get")
1673 def test_download_not_found(self, mock_get, tmp_path):
1674 from video_processor.sources.rss_source import RSSSource
1675
1676 xml = "<rss><channel></channel></rss>"
1677 mock_get.return_value = MagicMock(text=xml, status_code=200)
1678 mock_get.return_value.raise_for_status = MagicMock()
1679
1680 src = RSSSource(url="https://example.com/feed.xml")
1681 with patch.dict("sys.modules", {"feedparser": None}):
1682 src.list_videos()
1683
1684 fake = SourceFile(name="missing", id="nonexistent")
1685 with pytest.raises(ValueError, match="Entry not found"):
1686 src.download(fake, tmp_path / "out.txt")
1687
1688
1689 class TestWebSourceDownload:
1690 @patch("requests.get")
1691 def test_download_saves_text(self, mock_get, tmp_path):
1692 from video_processor.sources.web_source import WebSource
1693
1694 mock_get.return_value = MagicMock(
1695 text="<html><body><p>Page content</p></body></html>", status_code=200
1696 )
1697 mock_get.return_value.raise_for_status = MagicMock()
1698
1699 src = WebSource(url="https://example.com/page")
1700 with patch.dict("sys.modules", {"bs4": None}):
1701 file = src.list_videos()[0]
1702 dest = tmp_path / "page.txt"
1703 result = src.download(file, dest)
1704 assert result.exists()
1705 assert "Page content" in result.read_text()
1706
1707 def test_strip_html_tags(self):
1708 from video_processor.sources.web_source import _strip_html_tags
1709
1710 html = "<p>Hello</p><script>evil()</script><style>.x{}</style>"
1711 text = _strip_html_tags(html)
1712 assert "Hello" in text
1713 assert "evil" not in text
1714
1715
1716 class TestHackerNewsSourceDownload:
1717 @patch("requests.get")
1718 def test_download(self, mock_get, tmp_path):
1719 from video_processor.sources.hackernews_source import HackerNewsSource
1720
1721 story = {"title": "Story", "by": "user", "score": 1, "kids": []}
1722
1723 def side_effect(url, timeout=10):
1724 resp = MagicMock()
1725 resp.raise_for_status = MagicMock()
1726 resp.json.return_value = story
1727 return resp
1728
1729 mock_get.side_effect = side_effect
1730
1731 src = HackerNewsSource(item_id=12345)
1732 file = src.list_videos()[0]
1733 dest = tmp_path / "hn.txt"
1734 result = src.download(file, dest)
1735 assert result.exists()
1736 assert "Story" in result.read_text()
1737
1738 @patch("requests.get")
1739 def test_max_comments(self, mock_get):
1740 from video_processor.sources.hackernews_source import HackerNewsSource
1741
1742 story = {"title": "Big", "by": "u", "score": 1, "kids": list(range(100, 110))}
1743 comment = {"by": "c", "text": "hi", "kids": []}
1744
1745 def side_effect(url, timeout=10):
1746 resp = MagicMock()
1747 resp.raise_for_status = MagicMock()
1748 if "/12345.json" in url:
1749 resp.json.return_value = story
1750 else:
1751 resp.json.return_value = comment
1752 return resp
1753
1754 mock_get.side_effect = side_effect
1755
1756 src = HackerNewsSource(item_id=12345, max_comments=3)
1757 text = src.fetch_text()
1758 assert text.count("**c**") == 3
1759
1760 @patch("requests.get")
1761 def test_deleted_comments_skipped(self, mock_get):
1762 from video_processor.sources.hackernews_source import HackerNewsSource
1763
1764 story = {"title": "Story", "by": "u", "score": 1, "kids": [200, 201]}
1765
1766 def side_effect(url, timeout=10):
1767 resp = MagicMock()
1768 resp.raise_for_status = MagicMock()
1769 if "/12345.json" in url:
1770 resp.json.return_value = story
1771 elif "/200.json" in url:
1772 resp.json.return_value = {"deleted": True}
1773 elif "/201.json" in url:
1774 resp.json.return_value = {"by": "alive", "text": "here", "dead": False}
1775 return resp
1776
1777 mock_get.side_effect = side_effect
1778
1779 src = HackerNewsSource(item_id=12345)
1780 text = src.fetch_text()
1781 assert "alive" in text
1782 assert text.count("**") == 2 # only the alive comment
1783
1784
1785 class TestRedditSourceDownload:
1786 @patch("requests.get")
1787 def test_download(self, mock_get, tmp_path):
1788 from video_processor.sources.reddit_source import RedditSource
1789
1790 mock_get.return_value = MagicMock(status_code=200)
1791 mock_get.return_value.raise_for_status = MagicMock()
1792 mock_get.return_value.json.return_value = [
1793 {"data": {"children": [{"data": {"title": "Post", "author": "u", "score": 1}}]}},
1794 {"data": {"children": []}},
1795 ]
1796
1797 src = RedditSource(url="https://reddit.com/r/test/comments/abc/post")
1798 file = src.list_videos()[0]
1799 dest = tmp_path / "reddit.txt"
1800 result = src.download(file, dest)
1801 assert result.exists()
1802 assert "Post" in result.read_text()
1803
1804
1805 class TestArxivSourceDownload:
1806 @patch("requests.get")
1807 def test_download_metadata(self, mock_get, tmp_path):
1808 from video_processor.sources.arxiv_source import ArxivSource
1809
1810 xml = """<?xml version="1.0"?>
1811 <feed xmlns="http://www.w3.org/2005/Atom">
1812 <entry>
1813 <title>Paper Title</title>
1814 <summary>Abstract text</summary>
1815 <author><name>Alice</name></author>
1816 <published>2023-01-01</published>
1817 </entry>
1818 </feed>"""
1819
1820 mock_get.return_value = MagicMock(text=xml, status_code=200)
1821 mock_get.return_value.raise_for_status = MagicMock()
1822
1823 src = ArxivSource("2301.12345")
1824 files = src.list_videos()
1825 meta = [f for f in files if f.id.startswith("meta:")][0]
1826 dest = tmp_path / "paper.txt"
1827 result = src.download(meta, dest)
1828 assert result.exists()
1829 content = result.read_text()
1830 assert "Paper Title" in content
1831 assert "Alice" in content
1832 assert "Abstract text" in content
1833
1834
1835 class TestPodcastSourceDownload:
1836 @patch("requests.get")
1837 def test_max_episodes(self, mock_get):
1838 from video_processor.sources.podcast_source import PodcastSource
1839
1840 items = "".join(
1841 f"<item><title>Ep {i}</title>"
1842 f'<enclosure url="https://example.com/ep{i}.mp3" type="audio/mpeg"/></item>'
1843 for i in range(20)
1844 )
1845 xml = f"<rss><channel>{items}</channel></rss>"
1846
1847 mock_get.return_value = MagicMock(text=xml, status_code=200)
1848 mock_get.return_value.raise_for_status = MagicMock()
1849
1850 src = PodcastSource(feed_url="https://example.com/feed.xml", max_episodes=5)
1851 with patch.dict("sys.modules", {"feedparser": None}):
1852 files = src.list_videos()
1853 assert len(files) == 5
1854
1855
1856 # ---------------------------------------------------------------------------
1857 # Auth edge cases
1858 # ---------------------------------------------------------------------------
1859
1860
1861 class TestZoomSourceAuth:
1862 def test_saved_token_valid(self, tmp_path):
1863 import time
1864
1865 from video_processor.sources.zoom_source import ZoomSource
1866
1867 token_path = tmp_path / "token.json"
1868
1869 token_path.write_text(
1870 json.dumps({"access_token": "valid", "expires_at": time.time() + 3600})
1871 )
1872 src = ZoomSource(token_path=token_path)
1873 assert src._auth_saved_token() is True
1874 assert src._access_token == "valid"
1875
1876 def test_saved_token_expired_no_refresh(self, tmp_path):
1877 from video_processor.sources.zoom_source import ZoomSource
1878
1879 token_path = tmp_path / "token.json"
1880 token_path.write_text(json.dumps({"access_token": "old", "expires_at": 0}))
1881 src = ZoomSource(token_path=token_path)
1882 assert src._auth_saved_token() is False
1883
1884 @patch("video_processor.sources.zoom_source.requests")
1885 def test_server_to_server_success(self, mock_requests, tmp_path):
1886 from video_processor.sources.zoom_source import ZoomSource
1887
1888 mock_requests.post.return_value = MagicMock(status_code=200)
1889 mock_requests.post.return_value.raise_for_status = MagicMock()
1890 mock_requests.post.return_value.json.return_value = {
1891 "access_token": "s2s_tok",
1892 "expires_in": 3600,
1893 }
1894
1895 src = ZoomSource(
1896 client_id="cid",
1897 client_secret="csec",
1898 account_id="aid",
1899 token_path=tmp_path / "token.json",
1900 )
1901 assert src._auth_server_to_server() is True
1902 assert src._access_token == "s2s_tok"
1903
1904 def test_server_to_server_no_creds(self):
1905 from video_processor.sources.zoom_source import ZoomSource
1906
1907 src = ZoomSource(account_id="aid")
1908 assert src._auth_server_to_server() is False
1909
1910 def test_download_no_url_raises(self):
1911 from video_processor.sources.zoom_source import ZoomSource
1912
1913 src = ZoomSource()
1914 src._access_token = "tok"
1915 file = SourceFile(name="meeting.mp4", id="123")
1916 with pytest.raises(ValueError, match="No download URL"):
1917 src.download(file, Path("/tmp/out.mp4"))
1918
1919
1920 class TestGoogleDriveSourceAuth:
1921 def test_is_service_account_true(self, tmp_path):
1922 from video_processor.sources.google_drive import GoogleDriveSource
1923
1924 creds = tmp_path / "sa.json"
1925 creds.write_text(json.dumps({"type": "service_account"}))
1926 src = GoogleDriveSource(credentials_path=str(creds))
1927 assert src._is_service_account() is True
1928
1929 def test_is_service_account_false(self, tmp_path):
1930 from video_processor.sources.google_drive import GoogleDriveSource
1931
1932 creds = tmp_path / "oauth.json"
1933 creds.write_text(json.dumps({"type": "authorized_user"}))
1934 src = GoogleDriveSource(credentials_path=str(creds))
1935 assert src._is_service_account() is False
1936
1937 def test_is_service_account_no_file(self):
1938 from video_processor.sources.google_drive import GoogleDriveSource
1939
1940 with patch.dict("os.environ", {}, clear=True):
1941 src = GoogleDriveSource(credentials_path=None)
1942 src.credentials_path = None
1943 assert src._is_service_account() is False
1944
1945 def test_download_not_authed(self):
1946 from video_processor.sources.google_drive import GoogleDriveSource
1947
1948 src = GoogleDriveSource()
1949 with pytest.raises(RuntimeError, match="Not authenticated"):
1950 src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1951
1952
1953 class TestDropboxSourceAuth:
1954 def test_init_from_env(self):
1955 from video_processor.sources.dropbox_source import DropboxSource
1956
1957 with patch.dict(
1958 "os.environ",
1959 {"DROPBOX_ACCESS_TOKEN": "tok", "DROPBOX_APP_KEY": "key"},
1960 ):
1961 src = DropboxSource()
1962 assert src.access_token == "tok"
1963 assert src.app_key == "key"
1964
1965 def test_not_authed_list(self):
1966 from video_processor.sources.dropbox_source import DropboxSource
1967
1968 src = DropboxSource()
1969 with pytest.raises(RuntimeError, match="Not authenticated"):
1970 src.list_videos()
1971
1972 def test_not_authed_download(self):
1973 from video_processor.sources.dropbox_source import DropboxSource
1974
1975 src = DropboxSource()
1976 with pytest.raises(RuntimeError, match="Not authenticated"):
1977 src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1978
1979
1980 class TestNotionSourceAuth:
1981 def test_no_token(self):
1982 from video_processor.sources.notion_source import NotionSource
1983
1984 with patch.dict("os.environ", {}, clear=True):
1985 src = NotionSource(token="")
1986 assert src.authenticate() is False
1987
1988 @patch("video_processor.sources.notion_source.requests")
1989 def test_auth_success(self, mock_requests):
1990 from video_processor.sources.notion_source import NotionSource
1991
1992 mock_requests.get.return_value = MagicMock(status_code=200)
1993 mock_requests.get.return_value.raise_for_status = MagicMock()
1994 mock_requests.get.return_value.json.return_value = {"name": "Bot"}
1995 mock_requests.RequestException = Exception
1996
1997 src = NotionSource(token="ntn_valid")
1998 assert src.authenticate() is True
1999
2000 @patch("video_processor.sources.notion_source.requests")
2001 def test_auth_failure(self, mock_requests):
2002 from video_processor.sources.notion_source import NotionSource
2003
2004 mock_requests.get.return_value.raise_for_status.side_effect = Exception("401")
2005 mock_requests.RequestException = Exception
2006
2007 src = NotionSource(token="ntn_bad")
2008 assert src.authenticate() is False
2009
2010 def test_extract_property_values(self):
2011 from video_processor.sources.notion_source import _extract_property_value
2012
2013 assert _extract_property_value({"type": "number", "number": 42}) == "42"
2014 assert _extract_property_value({"type": "number", "number": None}) == ""
2015 assert _extract_property_value({"type": "select", "select": {"name": "High"}}) == "High"
2016 assert _extract_property_value({"type": "select", "select": None}) == ""
2017 assert _extract_property_value({"type": "checkbox", "checkbox": True}) == "True"
2018 assert _extract_property_value({"type": "url", "url": "https://ex.com"}) == "https://ex.com"
2019 assert _extract_property_value({"type": "unknown"}) == ""
2020
2021
2022 class TestGitHubSourceAuth:
2023 def test_authenticate_no_token(self):
2024 from video_processor.sources.github_source import GitHubSource
2025
2026 src = GitHubSource(repo="owner/repo")
2027 with patch.dict("os.environ", {}, clear=True):
2028 with patch("subprocess.run", side_effect=FileNotFoundError):
2029 result = src.authenticate()
2030 assert result is True # works for public repos
2031
2032 @patch("requests.get")
2033 def test_list_excludes_pr_from_issues(self, mock_get):
2034 from video_processor.sources.github_source import GitHubSource
2035
2036 def side_effect(url, **kwargs):
2037 resp = MagicMock()
2038 resp.ok = True
2039 if "/readme" in url:
2040 resp.json.return_value = {}
2041 elif "/issues" in url:
2042 resp.json.return_value = [
2043 {"number": 1, "title": "Bug"},
2044 {"number": 2, "title": "PR as issue", "pull_request": {}},
2045 ]
2046 elif "/pulls" in url:
2047 resp.json.return_value = []
2048 return resp
2049
2050 mock_get.side_effect = side_effect
2051
2052 src = GitHubSource(repo="o/r")
2053 src.authenticate()
2054 files = src.list_videos()
2055 ids = [f.id for f in files]
2056 assert "issue:1" in ids
2057 assert "issue:2" not in ids # excluded because it has pull_request key
2058
2059
2060 class TestS3SourceErrors:
2061 def test_not_authed_list(self):
2062 from video_processor.sources.s3_source import S3Source
2063
2064 src = S3Source(bucket="test")
2065 with pytest.raises(RuntimeError, match="Not authenticated"):
2066 src.list_videos()
2067
2068 def test_not_authed_download(self):
2069 from video_processor.sources.s3_source import S3Source
2070
2071 src = S3Source(bucket="test")
2072 with pytest.raises(RuntimeError, match="Not authenticated"):
2073 src.download(SourceFile(name="x", id="x"), Path("/tmp/x"))
2074

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button