PlanOpticon

test(sources): add download, auth edge case, and error handling tests Adds 33 new tests covering downloads, auth edge cases, error handling, lazy imports, and base class download_all behavior across all source connectors. 174 source tests total, 861 tests overall. Closes #111

lmata 2026-03-08 00:54 trunk

Commit 4bed44e2044825313e0482a822660aad3fd31e55ac75d10e68dabe1f6d7b78f8

Parent 42e9b20bb7830d9…

1 file changed +502 -1

~ tests/test_sources.py

M tests/test_sources.py

+502 -1

		--- tests/test_sources.py
		+++ tests/test_sources.py
		@@ -1,13 +1,15 @@
1	1	"""Tests for all source connectors: import, instantiation, authenticate, list_videos."""
2	2
	3	+import json
3	4	import os
	5	+from pathlib import Path
4	6	from unittest.mock import MagicMock, patch
5	7
6	8	import pytest
7	9
8		-from video_processor.sources.base import SourceFile
	10	+from video_processor.sources.base import BaseSource, SourceFile
9	11
10	12	# ---------------------------------------------------------------------------
11	13	# SourceFile model
12	14	# ---------------------------------------------------------------------------
13	15
		@@ -1568,5 +1570,504 @@
1568	1570
1569	1571	def test_zoom_lazy_import(self):
1570	1572	from video_processor.sources import ZoomSource
1571	1573
1572	1574	assert ZoomSource is not None
	1575	+
	1576	+ def test_invalid_lazy_import(self):
	1577	+ from video_processor import sources
	1578	+
	1579	+ with pytest.raises(AttributeError):
	1580	+ _ = sources.NonexistentSource
	1581	+
	1582	+
	1583	+# ---------------------------------------------------------------------------
	1584	+# BaseSource.download_all
	1585	+# ---------------------------------------------------------------------------
	1586	+
	1587	+
	1588	+class TestBaseSourceDownloadAll:
	1589	+ def test_download_all_success(self, tmp_path):
	1590	+ """download_all should download all files using path when available."""
	1591	+
	1592	+ class FakeSource(BaseSource):
	1593	+ def authenticate(self):
	1594	+ return True
	1595	+
	1596	+ def list_videos(self, **kwargs):
	1597	+ return []
	1598	+
	1599	+ def download(self, file, destination):
	1600	+ destination.parent.mkdir(parents=True, exist_ok=True)
	1601	+ destination.write_text(f"content:{file.name}")
	1602	+ return destination
	1603	+
	1604	+ src = FakeSource()
	1605	+ files = [
	1606	+ SourceFile(name="a.mp4", id="1"),
	1607	+ SourceFile(name="b.mp4", id="2", path="subdir/b.mp4"),
	1608	+ ]
	1609	+ paths = src.download_all(files, tmp_path)
	1610	+ assert len(paths) == 2
	1611	+ assert (tmp_path / "a.mp4").read_text() == "content:a.mp4"
	1612	+ assert (tmp_path / "subdir" / "b.mp4").read_text() == "content:b.mp4"
	1613	+
	1614	+ def test_download_all_partial_failure(self, tmp_path):
	1615	+ """download_all should continue past failures and return successful paths."""
	1616	+
	1617	+ class PartialFail(BaseSource):
	1618	+ def authenticate(self):
	1619	+ return True
	1620	+
	1621	+ def list_videos(self, **kwargs):
	1622	+ return []
	1623	+
	1624	+ def download(self, file, destination):
	1625	+ if file.id == "bad":
	1626	+ raise RuntimeError("download failed")
	1627	+ destination.parent.mkdir(parents=True, exist_ok=True)
	1628	+ destination.write_text("ok")
	1629	+ return destination
	1630	+
	1631	+ src = PartialFail()
	1632	+ files = [
	1633	+ SourceFile(name="good.mp4", id="good"),
	1634	+ SourceFile(name="bad.mp4", id="bad"),
	1635	+ SourceFile(name="also_good.mp4", id="good2"),
	1636	+ ]
	1637	+ paths = src.download_all(files, tmp_path)
	1638	+ assert len(paths) == 2
	1639	+
	1640	+
	1641	+# ---------------------------------------------------------------------------
	1642	+# Download & error handling tests
	1643	+# ---------------------------------------------------------------------------
	1644	+
	1645	+
	1646	+class TestRSSSourceDownload:
	1647	+ @patch("requests.get")
	1648	+ def test_download_entry(self, mock_get, tmp_path):
	1649	+ from video_processor.sources.rss_source import RSSSource
	1650	+
	1651	+ xml = (
	1652	+ "<rss><channel><item><title>Post 1</title>"
	1653	+ "<link>https://example.com/1</link>"
	1654	+ "<description>Summary here</description>"
	1655	+ "<pubDate>Mon, 01 Jan 2025</pubDate></item></channel></rss>"
	1656	+ )
	1657	+ mock_get.return_value = MagicMock(text=xml, status_code=200)
	1658	+ mock_get.return_value.raise_for_status = MagicMock()
	1659	+
	1660	+ src = RSSSource(url="https://example.com/feed.xml")
	1661	+ with patch.dict("sys.modules", {"feedparser": None}):
	1662	+ files = src.list_videos()
	1663	+ assert len(files) == 1
	1664	+
	1665	+ dest = tmp_path / "entry.txt"
	1666	+ result = src.download(files[0], dest)
	1667	+ assert result.exists()
	1668	+ content = result.read_text()
	1669	+ assert "Post 1" in content
	1670	+ assert "Summary here" in content
	1671	+
	1672	+ @patch("requests.get")
	1673	+ def test_download_not_found(self, mock_get, tmp_path):
	1674	+ from video_processor.sources.rss_source import RSSSource
	1675	+
	1676	+ xml = "<rss><channel></channel></rss>"
	1677	+ mock_get.return_value = MagicMock(text=xml, status_code=200)
	1678	+ mock_get.return_value.raise_for_status = MagicMock()
	1679	+
	1680	+ src = RSSSource(url="https://example.com/feed.xml")
	1681	+ with patch.dict("sys.modules", {"feedparser": None}):
	1682	+ src.list_videos()
	1683	+
	1684	+ fake = SourceFile(name="missing", id="nonexistent")
	1685	+ with pytest.raises(ValueError, match="Entry not found"):
	1686	+ src.download(fake, tmp_path / "out.txt")
	1687	+
	1688	+
	1689	+class TestWebSourceDownload:
	1690	+ @patch("requests.get")
	1691	+ def test_download_saves_text(self, mock_get, tmp_path):
	1692	+ from video_processor.sources.web_source import WebSource
	1693	+
	1694	+ mock_get.return_value = MagicMock(
	1695	+ text="<html><body><p>Page content</p></body></html>", status_code=200
	1696	+ )
	1697	+ mock_get.return_value.raise_for_status = MagicMock()
	1698	+
	1699	+ src = WebSource(url="https://example.com/page")
	1700	+ with patch.dict("sys.modules", {"bs4": None}):
	1701	+ file = src.list_videos()[0]
	1702	+ dest = tmp_path / "page.txt"
	1703	+ result = src.download(file, dest)
	1704	+ assert result.exists()
	1705	+ assert "Page content" in result.read_text()
	1706	+
	1707	+ def test_strip_html_tags(self):
	1708	+ from video_processor.sources.web_source import _strip_html_tags
	1709	+
	1710	+ html = "<p>Hello</p><script>evil()</script><style>.x{}</style>"
	1711	+ text = _strip_html_tags(html)
	1712	+ assert "Hello" in text
	1713	+ assert "evil" not in text
	1714	+
	1715	+
	1716	+class TestHackerNewsSourceDownload:
	1717	+ @patch("requests.get")
	1718	+ def test_download(self, mock_get, tmp_path):
	1719	+ from video_processor.sources.hackernews_source import HackerNewsSource
	1720	+
	1721	+ story = {"title": "Story", "by": "user", "score": 1, "kids": []}
	1722	+
	1723	+ def side_effect(url, timeout=10):
	1724	+ resp = MagicMock()
	1725	+ resp.raise_for_status = MagicMock()
	1726	+ resp.json.return_value = story
	1727	+ return resp
	1728	+
	1729	+ mock_get.side_effect = side_effect
	1730	+
	1731	+ src = HackerNewsSource(item_id=12345)
	1732	+ file = src.list_videos()[0]
	1733	+ dest = tmp_path / "hn.txt"
	1734	+ result = src.download(file, dest)
	1735	+ assert result.exists()
	1736	+ assert "Story" in result.read_text()
	1737	+
	1738	+ @patch("requests.get")
	1739	+ def test_max_comments(self, mock_get):
	1740	+ from video_processor.sources.hackernews_source import HackerNewsSource
	1741	+
	1742	+ story = {"title": "Big", "by": "u", "score": 1, "kids": list(range(100, 110))}
	1743	+ comment = {"by": "c", "text": "hi", "kids": []}
	1744	+
	1745	+ def side_effect(url, timeout=10):
	1746	+ resp = MagicMock()
	1747	+ resp.raise_for_status = MagicMock()
	1748	+ if "/12345.json" in url:
	1749	+ resp.json.return_value = story
	1750	+ else:
	1751	+ resp.json.return_value = comment
	1752	+ return resp
	1753	+
	1754	+ mock_get.side_effect = side_effect
	1755	+
	1756	+ src = HackerNewsSource(item_id=12345, max_comments=3)
	1757	+ text = src.fetch_text()
	1758	+ assert text.count("c") == 3
	1759	+
	1760	+ @patch("requests.get")
	1761	+ def test_deleted_comments_skipped(self, mock_get):
	1762	+ from video_processor.sources.hackernews_source import HackerNewsSource
	1763	+
	1764	+ story = {"title": "Story", "by": "u", "score": 1, "kids": [200, 201]}
	1765	+
	1766	+ def side_effect(url, timeout=10):
	1767	+ resp = MagicMock()
	1768	+ resp.raise_for_status = MagicMock()
	1769	+ if "/12345.json" in url:
	1770	+ resp.json.return_value = story
	1771	+ elif "/200.json" in url:
	1772	+ resp.json.return_value = {"deleted": True}
	1773	+ elif "/201.json" in url:
	1774	+ resp.json.return_value = {"by": "alive", "text": "here", "dead": False}
	1775	+ return resp
	1776	+
	1777	+ mock_get.side_effect = side_effect
	1778	+
	1779	+ src = HackerNewsSource(item_id=12345)
	1780	+ text = src.fetch_text()
	1781	+ assert "alive" in text
	1782	+ assert text.count("**") == 2 # only the alive comment
	1783	+
	1784	+
	1785	+class TestRedditSourceDownload:
	1786	+ @patch("requests.get")
	1787	+ def test_download(self, mock_get, tmp_path):
	1788	+ from video_processor.sources.reddit_source import RedditSource
	1789	+
	1790	+ mock_get.return_value = MagicMock(status_code=200)
	1791	+ mock_get.return_value.raise_for_status = MagicMock()
	1792	+ mock_get.return_value.json.return_value = [
	1793	+ {"data": {"children": [{"data": {"title": "Post", "author": "u", "score": 1}}]}},
	1794	+ {"data": {"children": []}},
	1795	+ ]
	1796	+
	1797	+ src = RedditSource(url="https://reddit.com/r/test/comments/abc/post")
	1798	+ file = src.list_videos()[0]
	1799	+ dest = tmp_path / "reddit.txt"
	1800	+ result = src.download(file, dest)
	1801	+ assert result.exists()
	1802	+ assert "Post" in result.read_text()
	1803	+
	1804	+
	1805	+class TestArxivSourceDownload:
	1806	+ @patch("requests.get")
	1807	+ def test_download_metadata(self, mock_get, tmp_path):
	1808	+ from video_processor.sources.arxiv_source import ArxivSource
	1809	+
	1810	+ xml = """<?xml version="1.0"?>
	1811	+ <feed xmlns="http://www.w3.org/2005/Atom">
	1812	+ <entry>
	1813	+ <title>Paper Title</title>
	1814	+ <summary>Abstract text</summary>
	1815	+ <author><name>Alice</name></author>
	1816	+ <published>2023-01-01</published>
	1817	+ </entry>
	1818	+ </feed>"""
	1819	+
	1820	+ mock_get.return_value = MagicMock(text=xml, status_code=200)
	1821	+ mock_get.return_value.raise_for_status = MagicMock()
	1822	+
	1823	+ src = ArxivSource("2301.12345")
	1824	+ files = src.list_videos()
	1825	+ meta = [f for f in files if f.id.startswith("meta:")][0]
	1826	+ dest = tmp_path / "paper.txt"
	1827	+ result = src.download(meta, dest)
	1828	+ assert result.exists()
	1829	+ content = result.read_text()
	1830	+ assert "Paper Title" in content
	1831	+ assert "Alice" in content
	1832	+ assert "Abstract text" in content
	1833	+
	1834	+
	1835	+class TestPodcastSourceDownload:
	1836	+ @patch("requests.get")
	1837	+ def test_max_episodes(self, mock_get):
	1838	+ from video_processor.sources.podcast_source import PodcastSource
	1839	+
	1840	+ items = "".join(
	1841	+ f"<item><title>Ep {i}</title>"
	1842	+ f'<enclosure url="https://example.com/ep{i}.mp3" type="audio/mpeg"/></item>'
	1843	+ for i in range(20)
	1844	+ )
	1845	+ xml = f"<rss><channel>{items}</channel></rss>"
	1846	+
	1847	+ mock_get.return_value = MagicMock(text=xml, status_code=200)
	1848	+ mock_get.return_value.raise_for_status = MagicMock()
	1849	+
	1850	+ src = PodcastSource(feed_url="https://example.com/feed.xml", max_episodes=5)
	1851	+ with patch.dict("sys.modules", {"feedparser": None}):
	1852	+ files = src.list_videos()
	1853	+ assert len(files) == 5
	1854	+
	1855	+
	1856	+# ---------------------------------------------------------------------------
	1857	+# Auth edge cases
	1858	+# ---------------------------------------------------------------------------
	1859	+
	1860	+
	1861	+class TestZoomSourceAuth:
	1862	+ def test_saved_token_valid(self, tmp_path):
	1863	+ import time
	1864	+
	1865	+ from video_processor.sources.zoom_source import ZoomSource
	1866	+
	1867	+ token_path = tmp_path / "token.json"
	1868	+
	1869	+ token_path.write_text(
	1870	+ json.dumps({"access_token": "valid", "expires_at": time.time() + 3600})
	1871	+ )
	1872	+ src = ZoomSource(token_path=token_path)
	1873	+ assert src._auth_saved_token() is True
	1874	+ assert src._access_token == "valid"
	1875	+
	1876	+ def test_saved_token_expired_no_refresh(self, tmp_path):
	1877	+ from video_processor.sources.zoom_source import ZoomSource
	1878	+
	1879	+ token_path = tmp_path / "token.json"
	1880	+ token_path.write_text(json.dumps({"access_token": "old", "expires_at": 0}))
	1881	+ src = ZoomSource(token_path=token_path)
	1882	+ assert src._auth_saved_token() is False
	1883	+
	1884	+ @patch("video_processor.sources.zoom_source.requests")
	1885	+ def test_server_to_server_success(self, mock_requests, tmp_path):
	1886	+ from video_processor.sources.zoom_source import ZoomSource
	1887	+
	1888	+ mock_requests.post.return_value = MagicMock(status_code=200)
	1889	+ mock_requests.post.return_value.raise_for_status = MagicMock()
	1890	+ mock_requests.post.return_value.json.return_value = {
	1891	+ "access_token": "s2s_tok",
	1892	+ "expires_in": 3600,
	1893	+ }
	1894	+
	1895	+ src = ZoomSource(
	1896	+ client_id="cid",
	1897	+ client_secret="csec",
	1898	+ account_id="aid",
	1899	+ token_path=tmp_path / "token.json",
	1900	+ )
	1901	+ assert src._auth_server_to_server() is True
	1902	+ assert src._access_token == "s2s_tok"
	1903	+
	1904	+ def test_server_to_server_no_creds(self):
	1905	+ from video_processor.sources.zoom_source import ZoomSource
	1906	+
	1907	+ src = ZoomSource(account_id="aid")
	1908	+ assert src._auth_server_to_server() is False
	1909	+
	1910	+ def test_download_no_url_raises(self):
	1911	+ from video_processor.sources.zoom_source import ZoomSource
	1912	+
	1913	+ src = ZoomSource()
	1914	+ src._access_token = "tok"
	1915	+ file = SourceFile(name="meeting.mp4", id="123")
	1916	+ with pytest.raises(ValueError, match="No download URL"):
	1917	+ src.download(file, Path("/tmp/out.mp4"))
	1918	+
	1919	+
	1920	+class TestGoogleDriveSourceAuth:
	1921	+ def test_is_service_account_true(self, tmp_path):
	1922	+ from video_processor.sources.google_drive import GoogleDriveSource
	1923	+
	1924	+ creds = tmp_path / "sa.json"
	1925	+ creds.write_text(json.dumps({"type": "service_account"}))
	1926	+ src = GoogleDriveSource(credentials_path=str(creds))
	1927	+ assert src._is_service_account() is True
	1928	+
	1929	+ def test_is_service_account_false(self, tmp_path):
	1930	+ from video_processor.sources.google_drive import GoogleDriveSource
	1931	+
	1932	+ creds = tmp_path / "oauth.json"
	1933	+ creds.write_text(json.dumps({"type": "authorized_user"}))
	1934	+ src = GoogleDriveSource(credentials_path=str(creds))
	1935	+ assert src._is_service_account() is False
	1936	+
	1937	+ def test_is_service_account_no_file(self):
	1938	+ from video_processor.sources.google_drive import GoogleDriveSource
	1939	+
	1940	+ with patch.dict("os.environ", {}, clear=True):
	1941	+ src = GoogleDriveSource(credentials_path=None)
	1942	+ src.credentials_path = None
	1943	+ assert src._is_service_account() is False
	1944	+
	1945	+ def test_download_not_authed(self):
	1946	+ from video_processor.sources.google_drive import GoogleDriveSource
	1947	+
	1948	+ src = GoogleDriveSource()
	1949	+ with pytest.raises(RuntimeError, match="Not authenticated"):
	1950	+ src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
	1951	+
	1952	+
	1953	+class TestDropboxSourceAuth:
	1954	+ def test_init_from_env(self):
	1955	+ from video_processor.sources.dropbox_source import DropboxSource
	1956	+
	1957	+ with patch.dict(
	1958	+ "os.environ",
	1959	+ {"DROPBOX_ACCESS_TOKEN": "tok", "DROPBOX_APP_KEY": "key"},
	1960	+ ):
	1961	+ src = DropboxSource()
	1962	+ assert src.access_token == "tok"
	1963	+ assert src.app_key == "key"
	1964	+
	1965	+ def test_not_authed_list(self):
	1966	+ from video_processor.sources.dropbox_source import DropboxSource
	1967	+
	1968	+ src = DropboxSource()
	1969	+ with pytest.raises(RuntimeError, match="Not authenticated"):
	1970	+ src.list_videos()
	1971	+
	1972	+ def test_not_authed_download(self):
	1973	+ from video_processor.sources.dropbox_source import DropboxSource
	1974	+
	1975	+ src = DropboxSource()
	1976	+ with pytest.raises(RuntimeError, match="Not authenticated"):
	1977	+ src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
	1978	+
	1979	+
	1980	+class TestNotionSourceAuth:
	1981	+ def test_no_token(self):
	1982	+ from video_processor.sources.notion_source import NotionSource
	1983	+
	1984	+ with patch.dict("os.environ", {}, clear=True):
	1985	+ src = NotionSource(token="")
	1986	+ assert src.authenticate() is False
	1987	+
	1988	+ @patch("video_processor.sources.notion_source.requests")
	1989	+ def test_auth_success(self, mock_requests):
	1990	+ from video_processor.sources.notion_source import NotionSource
	1991	+
	1992	+ mock_requests.get.return_value = MagicMock(status_code=200)
	1993	+ mock_requests.get.return_value.raise_for_status = MagicMock()
	1994	+ mock_requests.get.return_value.json.return_value = {"name": "Bot"}
	1995	+ mock_requests.RequestException = Exception
	1996	+
	1997	+ src = NotionSource(token="ntn_valid")
	1998	+ assert src.authenticate() is True
	1999	+
	2000	+ @patch("video_processor.sources.notion_source.requests")
	2001	+ def test_auth_failure(self, mock_requests):
	2002	+ from video_processor.sources.notion_source import NotionSource
	2003	+
	2004	+ mock_requests.get.return_value.raise_for_status.side_effect = Exception("401")
	2005	+ mock_requests.RequestException = Exception
	2006	+
	2007	+ src = NotionSource(token="ntn_bad")
	2008	+ assert src.authenticate() is False
	2009	+
	2010	+ def test_extract_property_values(self):
	2011	+ from video_processor.sources.notion_source import _extract_property_value
	2012	+
	2013	+ assert _extract_property_value({"type": "number", "number": 42}) == "42"
	2014	+ assert _extract_property_value({"type": "number", "number": None}) == ""
	2015	+ assert _extract_property_value({"type": "select", "select": {"name": "High"}}) == "High"
	2016	+ assert _extract_property_value({"type": "select", "select": None}) == ""
	2017	+ assert _extract_property_value({"type": "checkbox", "checkbox": True}) == "True"
	2018	+ assert _extract_property_value({"type": "url", "url": "https://ex.com"}) == "https://ex.com"
	2019	+ assert _extract_property_value({"type": "unknown"}) == ""
	2020	+
	2021	+
	2022	+class TestGitHubSourceAuth:
	2023	+ def test_authenticate_no_token(self):
	2024	+ from video_processor.sources.github_source import GitHubSource
	2025	+
	2026	+ src = GitHubSource(repo="owner/repo")
	2027	+ with patch.dict("os.environ", {}, clear=True):
	2028	+ with patch("subprocess.run", side_effect=FileNotFoundError):
	2029	+ result = src.authenticate()
	2030	+ assert result is True # works for public repos
	2031	+
	2032	+ @patch("requests.get")
	2033	+ def test_list_excludes_pr_from_issues(self, mock_get):
	2034	+ from video_processor.sources.github_source import GitHubSource
	2035	+
	2036	+ def side_effect(url, **kwargs):
	2037	+ resp = MagicMock()
	2038	+ resp.ok = True
	2039	+ if "/readme" in url:
	2040	+ resp.json.return_value = {}
	2041	+ elif "/issues" in url:
	2042	+ resp.json.return_value = [
	2043	+ {"number": 1, "title": "Bug"},
	2044	+ {"number": 2, "title": "PR as issue", "pull_request": {}},
	2045	+ ]
	2046	+ elif "/pulls" in url:
	2047	+ resp.json.return_value = []
	2048	+ return resp
	2049	+
	2050	+ mock_get.side_effect = side_effect
	2051	+
	2052	+ src = GitHubSource(repo="o/r")
	2053	+ src.authenticate()
	2054	+ files = src.list_videos()
	2055	+ ids = [f.id for f in files]
	2056	+ assert "issue:1" in ids
	2057	+ assert "issue:2" not in ids # excluded because it has pull_request key
	2058	+
	2059	+
	2060	+class TestS3SourceErrors:
	2061	+ def test_not_authed_list(self):
	2062	+ from video_processor.sources.s3_source import S3Source
	2063	+
	2064	+ src = S3Source(bucket="test")
	2065	+ with pytest.raises(RuntimeError, match="Not authenticated"):
	2066	+ src.list_videos()
	2067	+
	2068	+ def test_not_authed_download(self):
	2069	+ from video_processor.sources.s3_source import S3Source
	2070	+
	2071	+ src = S3Source(bucket="test")
	2072	+ with pytest.raises(RuntimeError, match="Not authenticated"):
	2073	+ src.download(SourceFile(name="x", id="x"), Path("/tmp/x"))
1573	2074

	--- tests/test_sources.py
	+++ tests/test_sources.py
	@@ -1,13 +1,15 @@
1	"""Tests for all source connectors: import, instantiation, authenticate, list_videos."""
2

3	import os

4	from unittest.mock import MagicMock, patch
5
6	import pytest
7
8	from video_processor.sources.base import SourceFile
9
10	# ---------------------------------------------------------------------------
11	# SourceFile model
12	# ---------------------------------------------------------------------------
13
	@@ -1568,5 +1570,504 @@
1568
1569	def test_zoom_lazy_import(self):
1570	from video_processor.sources import ZoomSource
1571
1572	assert ZoomSource is not None



















































































































































































































































































































































































































































































































1573

	--- tests/test_sources.py
	+++ tests/test_sources.py
	@@ -1,13 +1,15 @@
1	"""Tests for all source connectors: import, instantiation, authenticate, list_videos."""
2
3	import json
4	import os
5	from pathlib import Path
6	from unittest.mock import MagicMock, patch
7
8	import pytest
9
10	from video_processor.sources.base import BaseSource, SourceFile
11
12	# ---------------------------------------------------------------------------
13	# SourceFile model
14	# ---------------------------------------------------------------------------
15
	@@ -1568,5 +1570,504 @@
1570
1571	def test_zoom_lazy_import(self):
1572	from video_processor.sources import ZoomSource
1573
1574	assert ZoomSource is not None
1575
1576	def test_invalid_lazy_import(self):
1577	from video_processor import sources
1578
1579	with pytest.raises(AttributeError):
1580	_ = sources.NonexistentSource
1581
1582
1583	# ---------------------------------------------------------------------------
1584	# BaseSource.download_all
1585	# ---------------------------------------------------------------------------
1586
1587
1588	class TestBaseSourceDownloadAll:
1589	def test_download_all_success(self, tmp_path):
1590	"""download_all should download all files using path when available."""
1591
1592	class FakeSource(BaseSource):
1593	def authenticate(self):
1594	return True
1595
1596	def list_videos(self, **kwargs):
1597	return []
1598
1599	def download(self, file, destination):
1600	destination.parent.mkdir(parents=True, exist_ok=True)
1601	destination.write_text(f"content:{file.name}")
1602	return destination
1603
1604	src = FakeSource()
1605	files = [
1606	SourceFile(name="a.mp4", id="1"),
1607	SourceFile(name="b.mp4", id="2", path="subdir/b.mp4"),
1608	]
1609	paths = src.download_all(files, tmp_path)
1610	assert len(paths) == 2
1611	assert (tmp_path / "a.mp4").read_text() == "content:a.mp4"
1612	assert (tmp_path / "subdir" / "b.mp4").read_text() == "content:b.mp4"
1613
1614	def test_download_all_partial_failure(self, tmp_path):
1615	"""download_all should continue past failures and return successful paths."""
1616
1617	class PartialFail(BaseSource):
1618	def authenticate(self):
1619	return True
1620
1621	def list_videos(self, **kwargs):
1622	return []
1623
1624	def download(self, file, destination):
1625	if file.id == "bad":
1626	raise RuntimeError("download failed")
1627	destination.parent.mkdir(parents=True, exist_ok=True)
1628	destination.write_text("ok")
1629	return destination
1630
1631	src = PartialFail()
1632	files = [
1633	SourceFile(name="good.mp4", id="good"),
1634	SourceFile(name="bad.mp4", id="bad"),
1635	SourceFile(name="also_good.mp4", id="good2"),
1636	]
1637	paths = src.download_all(files, tmp_path)
1638	assert len(paths) == 2
1639
1640
1641	# ---------------------------------------------------------------------------
1642	# Download & error handling tests
1643	# ---------------------------------------------------------------------------
1644
1645
1646	class TestRSSSourceDownload:
1647	@patch("requests.get")
1648	def test_download_entry(self, mock_get, tmp_path):
1649	from video_processor.sources.rss_source import RSSSource
1650
1651	xml = (
1652	"<rss><channel><item><title>Post 1</title>"
1653	"<link>https://example.com/1</link>"
1654	"<description>Summary here</description>"
1655	"<pubDate>Mon, 01 Jan 2025</pubDate></item></channel></rss>"
1656	)
1657	mock_get.return_value = MagicMock(text=xml, status_code=200)
1658	mock_get.return_value.raise_for_status = MagicMock()
1659
1660	src = RSSSource(url="https://example.com/feed.xml")
1661	with patch.dict("sys.modules", {"feedparser": None}):
1662	files = src.list_videos()
1663	assert len(files) == 1
1664
1665	dest = tmp_path / "entry.txt"
1666	result = src.download(files[0], dest)
1667	assert result.exists()
1668	content = result.read_text()
1669	assert "Post 1" in content
1670	assert "Summary here" in content
1671
1672	@patch("requests.get")
1673	def test_download_not_found(self, mock_get, tmp_path):
1674	from video_processor.sources.rss_source import RSSSource
1675
1676	xml = "<rss><channel></channel></rss>"
1677	mock_get.return_value = MagicMock(text=xml, status_code=200)
1678	mock_get.return_value.raise_for_status = MagicMock()
1679
1680	src = RSSSource(url="https://example.com/feed.xml")
1681	with patch.dict("sys.modules", {"feedparser": None}):
1682	src.list_videos()
1683
1684	fake = SourceFile(name="missing", id="nonexistent")
1685	with pytest.raises(ValueError, match="Entry not found"):
1686	src.download(fake, tmp_path / "out.txt")
1687
1688
1689	class TestWebSourceDownload:
1690	@patch("requests.get")
1691	def test_download_saves_text(self, mock_get, tmp_path):
1692	from video_processor.sources.web_source import WebSource
1693
1694	mock_get.return_value = MagicMock(
1695	text="<html><body><p>Page content</p></body></html>", status_code=200
1696	)
1697	mock_get.return_value.raise_for_status = MagicMock()
1698
1699	src = WebSource(url="https://example.com/page")
1700	with patch.dict("sys.modules", {"bs4": None}):
1701	file = src.list_videos()[0]
1702	dest = tmp_path / "page.txt"
1703	result = src.download(file, dest)
1704	assert result.exists()
1705	assert "Page content" in result.read_text()
1706
1707	def test_strip_html_tags(self):
1708	from video_processor.sources.web_source import _strip_html_tags
1709
1710	html = "<p>Hello</p><script>evil()</script><style>.x{}</style>"
1711	text = _strip_html_tags(html)
1712	assert "Hello" in text
1713	assert "evil" not in text
1714
1715
1716	class TestHackerNewsSourceDownload:
1717	@patch("requests.get")
1718	def test_download(self, mock_get, tmp_path):
1719	from video_processor.sources.hackernews_source import HackerNewsSource
1720
1721	story = {"title": "Story", "by": "user", "score": 1, "kids": []}
1722
1723	def side_effect(url, timeout=10):
1724	resp = MagicMock()
1725	resp.raise_for_status = MagicMock()
1726	resp.json.return_value = story
1727	return resp
1728
1729	mock_get.side_effect = side_effect
1730
1731	src = HackerNewsSource(item_id=12345)
1732	file = src.list_videos()[0]
1733	dest = tmp_path / "hn.txt"
1734	result = src.download(file, dest)
1735	assert result.exists()
1736	assert "Story" in result.read_text()
1737
1738	@patch("requests.get")
1739	def test_max_comments(self, mock_get):
1740	from video_processor.sources.hackernews_source import HackerNewsSource
1741
1742	story = {"title": "Big", "by": "u", "score": 1, "kids": list(range(100, 110))}
1743	comment = {"by": "c", "text": "hi", "kids": []}
1744
1745	def side_effect(url, timeout=10):
1746	resp = MagicMock()
1747	resp.raise_for_status = MagicMock()
1748	if "/12345.json" in url:
1749	resp.json.return_value = story
1750	else:
1751	resp.json.return_value = comment
1752	return resp
1753
1754	mock_get.side_effect = side_effect
1755
1756	src = HackerNewsSource(item_id=12345, max_comments=3)
1757	text = src.fetch_text()
1758	assert text.count("c") == 3
1759
1760	@patch("requests.get")
1761	def test_deleted_comments_skipped(self, mock_get):
1762	from video_processor.sources.hackernews_source import HackerNewsSource
1763
1764	story = {"title": "Story", "by": "u", "score": 1, "kids": [200, 201]}
1765
1766	def side_effect(url, timeout=10):
1767	resp = MagicMock()
1768	resp.raise_for_status = MagicMock()
1769	if "/12345.json" in url:
1770	resp.json.return_value = story
1771	elif "/200.json" in url:
1772	resp.json.return_value = {"deleted": True}
1773	elif "/201.json" in url:
1774	resp.json.return_value = {"by": "alive", "text": "here", "dead": False}
1775	return resp
1776
1777	mock_get.side_effect = side_effect
1778
1779	src = HackerNewsSource(item_id=12345)
1780	text = src.fetch_text()
1781	assert "alive" in text
1782	assert text.count("**") == 2 # only the alive comment
1783
1784
1785	class TestRedditSourceDownload:
1786	@patch("requests.get")
1787	def test_download(self, mock_get, tmp_path):
1788	from video_processor.sources.reddit_source import RedditSource
1789
1790	mock_get.return_value = MagicMock(status_code=200)
1791	mock_get.return_value.raise_for_status = MagicMock()
1792	mock_get.return_value.json.return_value = [
1793	{"data": {"children": [{"data": {"title": "Post", "author": "u", "score": 1}}]}},
1794	{"data": {"children": []}},
1795	]
1796
1797	src = RedditSource(url="https://reddit.com/r/test/comments/abc/post")
1798	file = src.list_videos()[0]
1799	dest = tmp_path / "reddit.txt"
1800	result = src.download(file, dest)
1801	assert result.exists()
1802	assert "Post" in result.read_text()
1803
1804
1805	class TestArxivSourceDownload:
1806	@patch("requests.get")
1807	def test_download_metadata(self, mock_get, tmp_path):
1808	from video_processor.sources.arxiv_source import ArxivSource
1809
1810	xml = """<?xml version="1.0"?>
1811	<feed xmlns="http://www.w3.org/2005/Atom">
1812	<entry>
1813	<title>Paper Title</title>
1814	<summary>Abstract text</summary>
1815	<author><name>Alice</name></author>
1816	<published>2023-01-01</published>
1817	</entry>
1818	</feed>"""
1819
1820	mock_get.return_value = MagicMock(text=xml, status_code=200)
1821	mock_get.return_value.raise_for_status = MagicMock()
1822
1823	src = ArxivSource("2301.12345")
1824	files = src.list_videos()
1825	meta = [f for f in files if f.id.startswith("meta:")][0]
1826	dest = tmp_path / "paper.txt"
1827	result = src.download(meta, dest)
1828	assert result.exists()
1829	content = result.read_text()
1830	assert "Paper Title" in content
1831	assert "Alice" in content
1832	assert "Abstract text" in content
1833
1834
1835	class TestPodcastSourceDownload:
1836	@patch("requests.get")
1837	def test_max_episodes(self, mock_get):
1838	from video_processor.sources.podcast_source import PodcastSource
1839
1840	items = "".join(
1841	f"<item><title>Ep {i}</title>"
1842	f'<enclosure url="https://example.com/ep{i}.mp3" type="audio/mpeg"/></item>'
1843	for i in range(20)
1844	)
1845	xml = f"<rss><channel>{items}</channel></rss>"
1846
1847	mock_get.return_value = MagicMock(text=xml, status_code=200)
1848	mock_get.return_value.raise_for_status = MagicMock()
1849
1850	src = PodcastSource(feed_url="https://example.com/feed.xml", max_episodes=5)
1851	with patch.dict("sys.modules", {"feedparser": None}):
1852	files = src.list_videos()
1853	assert len(files) == 5
1854
1855
1856	# ---------------------------------------------------------------------------
1857	# Auth edge cases
1858	# ---------------------------------------------------------------------------
1859
1860
1861	class TestZoomSourceAuth:
1862	def test_saved_token_valid(self, tmp_path):
1863	import time
1864
1865	from video_processor.sources.zoom_source import ZoomSource
1866
1867	token_path = tmp_path / "token.json"
1868
1869	token_path.write_text(
1870	json.dumps({"access_token": "valid", "expires_at": time.time() + 3600})
1871	)
1872	src = ZoomSource(token_path=token_path)
1873	assert src._auth_saved_token() is True
1874	assert src._access_token == "valid"
1875
1876	def test_saved_token_expired_no_refresh(self, tmp_path):
1877	from video_processor.sources.zoom_source import ZoomSource
1878
1879	token_path = tmp_path / "token.json"
1880	token_path.write_text(json.dumps({"access_token": "old", "expires_at": 0}))
1881	src = ZoomSource(token_path=token_path)
1882	assert src._auth_saved_token() is False
1883
1884	@patch("video_processor.sources.zoom_source.requests")
1885	def test_server_to_server_success(self, mock_requests, tmp_path):
1886	from video_processor.sources.zoom_source import ZoomSource
1887
1888	mock_requests.post.return_value = MagicMock(status_code=200)
1889	mock_requests.post.return_value.raise_for_status = MagicMock()
1890	mock_requests.post.return_value.json.return_value = {
1891	"access_token": "s2s_tok",
1892	"expires_in": 3600,
1893	}
1894
1895	src = ZoomSource(
1896	client_id="cid",
1897	client_secret="csec",
1898	account_id="aid",
1899	token_path=tmp_path / "token.json",
1900	)
1901	assert src._auth_server_to_server() is True
1902	assert src._access_token == "s2s_tok"
1903
1904	def test_server_to_server_no_creds(self):
1905	from video_processor.sources.zoom_source import ZoomSource
1906
1907	src = ZoomSource(account_id="aid")
1908	assert src._auth_server_to_server() is False
1909
1910	def test_download_no_url_raises(self):
1911	from video_processor.sources.zoom_source import ZoomSource
1912
1913	src = ZoomSource()
1914	src._access_token = "tok"
1915	file = SourceFile(name="meeting.mp4", id="123")
1916	with pytest.raises(ValueError, match="No download URL"):
1917	src.download(file, Path("/tmp/out.mp4"))
1918
1919
1920	class TestGoogleDriveSourceAuth:
1921	def test_is_service_account_true(self, tmp_path):
1922	from video_processor.sources.google_drive import GoogleDriveSource
1923
1924	creds = tmp_path / "sa.json"
1925	creds.write_text(json.dumps({"type": "service_account"}))
1926	src = GoogleDriveSource(credentials_path=str(creds))
1927	assert src._is_service_account() is True
1928
1929	def test_is_service_account_false(self, tmp_path):
1930	from video_processor.sources.google_drive import GoogleDriveSource
1931
1932	creds = tmp_path / "oauth.json"
1933	creds.write_text(json.dumps({"type": "authorized_user"}))
1934	src = GoogleDriveSource(credentials_path=str(creds))
1935	assert src._is_service_account() is False
1936
1937	def test_is_service_account_no_file(self):
1938	from video_processor.sources.google_drive import GoogleDriveSource
1939
1940	with patch.dict("os.environ", {}, clear=True):
1941	src = GoogleDriveSource(credentials_path=None)
1942	src.credentials_path = None
1943	assert src._is_service_account() is False
1944
1945	def test_download_not_authed(self):
1946	from video_processor.sources.google_drive import GoogleDriveSource
1947
1948	src = GoogleDriveSource()
1949	with pytest.raises(RuntimeError, match="Not authenticated"):
1950	src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1951
1952
1953	class TestDropboxSourceAuth:
1954	def test_init_from_env(self):
1955	from video_processor.sources.dropbox_source import DropboxSource
1956
1957	with patch.dict(
1958	"os.environ",
1959	{"DROPBOX_ACCESS_TOKEN": "tok", "DROPBOX_APP_KEY": "key"},
1960	):
1961	src = DropboxSource()
1962	assert src.access_token == "tok"
1963	assert src.app_key == "key"
1964
1965	def test_not_authed_list(self):
1966	from video_processor.sources.dropbox_source import DropboxSource
1967
1968	src = DropboxSource()
1969	with pytest.raises(RuntimeError, match="Not authenticated"):
1970	src.list_videos()
1971
1972	def test_not_authed_download(self):
1973	from video_processor.sources.dropbox_source import DropboxSource
1974
1975	src = DropboxSource()
1976	with pytest.raises(RuntimeError, match="Not authenticated"):
1977	src.download(SourceFile(name="x", id="y"), Path("/tmp/x"))
1978
1979
1980	class TestNotionSourceAuth:
1981	def test_no_token(self):
1982	from video_processor.sources.notion_source import NotionSource
1983
1984	with patch.dict("os.environ", {}, clear=True):
1985	src = NotionSource(token="")
1986	assert src.authenticate() is False
1987
1988	@patch("video_processor.sources.notion_source.requests")
1989	def test_auth_success(self, mock_requests):
1990	from video_processor.sources.notion_source import NotionSource
1991
1992	mock_requests.get.return_value = MagicMock(status_code=200)
1993	mock_requests.get.return_value.raise_for_status = MagicMock()
1994	mock_requests.get.return_value.json.return_value = {"name": "Bot"}
1995	mock_requests.RequestException = Exception
1996
1997	src = NotionSource(token="ntn_valid")
1998	assert src.authenticate() is True
1999
2000	@patch("video_processor.sources.notion_source.requests")
2001	def test_auth_failure(self, mock_requests):
2002	from video_processor.sources.notion_source import NotionSource
2003
2004	mock_requests.get.return_value.raise_for_status.side_effect = Exception("401")
2005	mock_requests.RequestException = Exception
2006
2007	src = NotionSource(token="ntn_bad")
2008	assert src.authenticate() is False
2009
2010	def test_extract_property_values(self):
2011	from video_processor.sources.notion_source import _extract_property_value
2012
2013	assert _extract_property_value({"type": "number", "number": 42}) == "42"
2014	assert _extract_property_value({"type": "number", "number": None}) == ""
2015	assert _extract_property_value({"type": "select", "select": {"name": "High"}}) == "High"
2016	assert _extract_property_value({"type": "select", "select": None}) == ""
2017	assert _extract_property_value({"type": "checkbox", "checkbox": True}) == "True"
2018	assert _extract_property_value({"type": "url", "url": "https://ex.com"}) == "https://ex.com"
2019	assert _extract_property_value({"type": "unknown"}) == ""
2020
2021
2022	class TestGitHubSourceAuth:
2023	def test_authenticate_no_token(self):
2024	from video_processor.sources.github_source import GitHubSource
2025
2026	src = GitHubSource(repo="owner/repo")
2027	with patch.dict("os.environ", {}, clear=True):
2028	with patch("subprocess.run", side_effect=FileNotFoundError):
2029	result = src.authenticate()
2030	assert result is True # works for public repos
2031
2032	@patch("requests.get")
2033	def test_list_excludes_pr_from_issues(self, mock_get):
2034	from video_processor.sources.github_source import GitHubSource
2035
2036	def side_effect(url, **kwargs):
2037	resp = MagicMock()
2038	resp.ok = True
2039	if "/readme" in url:
2040	resp.json.return_value = {}
2041	elif "/issues" in url:
2042	resp.json.return_value = [
2043	{"number": 1, "title": "Bug"},
2044	{"number": 2, "title": "PR as issue", "pull_request": {}},
2045	]
2046	elif "/pulls" in url:
2047	resp.json.return_value = []
2048	return resp
2049
2050	mock_get.side_effect = side_effect
2051
2052	src = GitHubSource(repo="o/r")
2053	src.authenticate()
2054	files = src.list_videos()
2055	ids = [f.id for f in files]
2056	assert "issue:1" in ids
2057	assert "issue:2" not in ids # excluded because it has pull_request key
2058
2059
2060	class TestS3SourceErrors:
2061	def test_not_authed_list(self):
2062	from video_processor.sources.s3_source import S3Source
2063
2064	src = S3Source(bucket="test")
2065	with pytest.raises(RuntimeError, match="Not authenticated"):
2066	src.list_videos()
2067
2068	def test_not_authed_download(self):
2069	from video_processor.sources.s3_source import S3Source
2070
2071	src = S3Source(bucket="test")
2072	with pytest.raises(RuntimeError, match="Not authenticated"):
2073	src.download(SourceFile(name="x", id="x"), Path("/tmp/x"))
2074

PlanOpticon

Keyboard Shortcuts