PlanOpticon

planopticon / video_processor / sources / google_keep_source.py
Blame History Raw 171 lines
1
"""Google Keep source connector using the gws CLI (googleworkspace/cli).
2
3
Fetches notes from Google Keep via the `gws` CLI tool.
4
Outputs plain text suitable for KG ingestion.
5
6
Requires: npm install -g @googleworkspace/cli
7
Auth: gws auth login (interactive) or GOOGLE_WORKSPACE_CLI_CREDENTIALS_FILE (headless)
8
"""
9
10
import json
11
import logging
12
import shutil
13
import subprocess
14
from pathlib import Path
15
from typing import Any, Dict, List, Optional
16
17
from video_processor.sources.base import BaseSource, SourceFile
18
19
logger = logging.getLogger(__name__)
20
21
22
def _run_gws(args: List[str], timeout: int = 30) -> Dict[str, Any]:
23
"""Run a gws CLI command and return parsed JSON output."""
24
cmd = ["gws"] + args
25
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
26
if proc.returncode != 0:
27
raise RuntimeError(f"gws {' '.join(args)} failed: {proc.stderr.strip()}")
28
try:
29
return json.loads(proc.stdout)
30
except json.JSONDecodeError:
31
return {"raw": proc.stdout.strip()}
32
33
34
def _note_to_text(note: dict) -> str:
35
"""Extract text content from a Google Keep note structure.
36
37
Handles plain text notes and checklists. Checklist items are formatted
38
as ``- [x] item`` (checked) or ``- [ ] item`` (unchecked).
39
"""
40
parts: List[str] = []
41
42
title = note.get("title", "").strip()
43
if title:
44
parts.append(title)
45
46
body = note.get("body", note.get("textContent", "")).strip()
47
if body:
48
parts.append(body)
49
50
# Checklist items may appear under "list", "listContent", or "checklistItems"
51
list_items = note.get("list", note.get("listContent", note.get("checklistItems", [])))
52
if isinstance(list_items, list):
53
for item in list_items:
54
text = item.get("text", "").strip()
55
if not text:
56
continue
57
checked = item.get("checked", item.get("isChecked", False))
58
marker = "[x]" if checked else "[ ]"
59
parts.append(f"- {marker} {text}")
60
61
return "\n\n".join(parts) if parts else ""
62
63
64
class GoogleKeepSource(BaseSource):
65
"""
66
Fetch notes from Google Keep via the gws CLI.
67
68
Usage:
69
source = GoogleKeepSource() # all notes
70
source = GoogleKeepSource(label="meetings") # filter by label
71
files = source.list_videos()
72
source.download_all(files, Path("./notes"))
73
"""
74
75
def __init__(self, label: Optional[str] = None):
76
self.label = label
77
78
def authenticate(self) -> bool:
79
"""Check if gws CLI is installed and authenticated."""
80
if not shutil.which("gws"):
81
logger.error("gws CLI not found. Install with: npm install -g @googleworkspace/cli")
82
return False
83
try:
84
_run_gws(["auth", "status"], timeout=10)
85
return True
86
except (RuntimeError, subprocess.TimeoutExpired):
87
logger.error("gws not authenticated. Run: gws auth login")
88
return False
89
90
def list_videos(
91
self,
92
folder_id: Optional[str] = None,
93
folder_path: Optional[str] = None,
94
patterns: Optional[List[str]] = None,
95
) -> List[SourceFile]:
96
"""List notes in Google Keep. Returns SourceFile per note."""
97
args = ["keep", "notes", "list", "--output", "json"]
98
99
if self.label:
100
args.extend(["--label", self.label])
101
102
try:
103
result = _run_gws(args, timeout=60)
104
except RuntimeError as e:
105
logger.error(f"Failed to list Keep notes: {e}")
106
return []
107
108
# Result may be a list directly or nested under a key
109
notes: List[dict] = []
110
if isinstance(result, list):
111
notes = result
112
elif isinstance(result, dict):
113
notes = result.get("notes", result.get("items", []))
114
# If we got a single note back (not a list), wrap it
115
if not notes and "id" in result and "raw" not in result:
116
notes = [result]
117
118
files: List[SourceFile] = []
119
for note in notes:
120
note_id = note.get("id", note.get("noteId", ""))
121
title = note.get("title", "Untitled Note").strip() or "Untitled Note"
122
modified = note.get("modifiedTime", note.get("updateTime"))
123
124
# Estimate size from text content
125
text = _note_to_text(note)
126
size = len(text.encode("utf-8")) if text else None
127
128
files.append(
129
SourceFile(
130
name=title,
131
id=str(note_id),
132
size_bytes=size,
133
mime_type="text/plain",
134
modified_at=modified,
135
)
136
)
137
138
logger.info(f"Found {len(files)} note(s) in Google Keep")
139
return files
140
141
def download(self, file: SourceFile, destination: Path) -> Path:
142
"""Download a Keep note's content as a text file."""
143
destination = Path(destination)
144
destination.parent.mkdir(parents=True, exist_ok=True)
145
146
try:
147
result = _run_gws(
148
[
149
"keep",
150
"notes",
151
"get",
152
"--params",
153
json.dumps({"noteId": file.id}),
154
],
155
timeout=30,
156
)
157
except RuntimeError as e:
158
raise RuntimeError(f"Failed to fetch Keep note {file.id}: {e}") from e
159
160
# result may be the note dict directly or wrapped
161
note = result if isinstance(result, dict) else {}
162
text = _note_to_text(note)
163
164
if not text:
165
# Fallback: use raw output if structured extraction yielded nothing
166
text = note.get("raw", json.dumps(note, indent=2))
167
168
destination.write_text(text, encoding="utf-8")
169
logger.info(f"Saved note '{file.name}' to {destination}")
170
return destination
171

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button