|
1
|
"""Google Keep source connector using the gws CLI (googleworkspace/cli). |
|
2
|
|
|
3
|
Fetches notes from Google Keep via the `gws` CLI tool. |
|
4
|
Outputs plain text suitable for KG ingestion. |
|
5
|
|
|
6
|
Requires: npm install -g @googleworkspace/cli |
|
7
|
Auth: gws auth login (interactive) or GOOGLE_WORKSPACE_CLI_CREDENTIALS_FILE (headless) |
|
8
|
""" |
|
9
|
|
|
10
|
import json |
|
11
|
import logging |
|
12
|
import shutil |
|
13
|
import subprocess |
|
14
|
from pathlib import Path |
|
15
|
from typing import Any, Dict, List, Optional |
|
16
|
|
|
17
|
from video_processor.sources.base import BaseSource, SourceFile |
|
18
|
|
|
19
|
logger = logging.getLogger(__name__) |
|
20
|
|
|
21
|
|
|
22
|
def _run_gws(args: List[str], timeout: int = 30) -> Dict[str, Any]: |
|
23
|
"""Run a gws CLI command and return parsed JSON output.""" |
|
24
|
cmd = ["gws"] + args |
|
25
|
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) |
|
26
|
if proc.returncode != 0: |
|
27
|
raise RuntimeError(f"gws {' '.join(args)} failed: {proc.stderr.strip()}") |
|
28
|
try: |
|
29
|
return json.loads(proc.stdout) |
|
30
|
except json.JSONDecodeError: |
|
31
|
return {"raw": proc.stdout.strip()} |
|
32
|
|
|
33
|
|
|
34
|
def _note_to_text(note: dict) -> str: |
|
35
|
"""Extract text content from a Google Keep note structure. |
|
36
|
|
|
37
|
Handles plain text notes and checklists. Checklist items are formatted |
|
38
|
as ``- [x] item`` (checked) or ``- [ ] item`` (unchecked). |
|
39
|
""" |
|
40
|
parts: List[str] = [] |
|
41
|
|
|
42
|
title = note.get("title", "").strip() |
|
43
|
if title: |
|
44
|
parts.append(title) |
|
45
|
|
|
46
|
body = note.get("body", note.get("textContent", "")).strip() |
|
47
|
if body: |
|
48
|
parts.append(body) |
|
49
|
|
|
50
|
# Checklist items may appear under "list", "listContent", or "checklistItems" |
|
51
|
list_items = note.get("list", note.get("listContent", note.get("checklistItems", []))) |
|
52
|
if isinstance(list_items, list): |
|
53
|
for item in list_items: |
|
54
|
text = item.get("text", "").strip() |
|
55
|
if not text: |
|
56
|
continue |
|
57
|
checked = item.get("checked", item.get("isChecked", False)) |
|
58
|
marker = "[x]" if checked else "[ ]" |
|
59
|
parts.append(f"- {marker} {text}") |
|
60
|
|
|
61
|
return "\n\n".join(parts) if parts else "" |
|
62
|
|
|
63
|
|
|
64
|
class GoogleKeepSource(BaseSource): |
|
65
|
""" |
|
66
|
Fetch notes from Google Keep via the gws CLI. |
|
67
|
|
|
68
|
Usage: |
|
69
|
source = GoogleKeepSource() # all notes |
|
70
|
source = GoogleKeepSource(label="meetings") # filter by label |
|
71
|
files = source.list_videos() |
|
72
|
source.download_all(files, Path("./notes")) |
|
73
|
""" |
|
74
|
|
|
75
|
def __init__(self, label: Optional[str] = None): |
|
76
|
self.label = label |
|
77
|
|
|
78
|
def authenticate(self) -> bool: |
|
79
|
"""Check if gws CLI is installed and authenticated.""" |
|
80
|
if not shutil.which("gws"): |
|
81
|
logger.error("gws CLI not found. Install with: npm install -g @googleworkspace/cli") |
|
82
|
return False |
|
83
|
try: |
|
84
|
_run_gws(["auth", "status"], timeout=10) |
|
85
|
return True |
|
86
|
except (RuntimeError, subprocess.TimeoutExpired): |
|
87
|
logger.error("gws not authenticated. Run: gws auth login") |
|
88
|
return False |
|
89
|
|
|
90
|
def list_videos( |
|
91
|
self, |
|
92
|
folder_id: Optional[str] = None, |
|
93
|
folder_path: Optional[str] = None, |
|
94
|
patterns: Optional[List[str]] = None, |
|
95
|
) -> List[SourceFile]: |
|
96
|
"""List notes in Google Keep. Returns SourceFile per note.""" |
|
97
|
args = ["keep", "notes", "list", "--output", "json"] |
|
98
|
|
|
99
|
if self.label: |
|
100
|
args.extend(["--label", self.label]) |
|
101
|
|
|
102
|
try: |
|
103
|
result = _run_gws(args, timeout=60) |
|
104
|
except RuntimeError as e: |
|
105
|
logger.error(f"Failed to list Keep notes: {e}") |
|
106
|
return [] |
|
107
|
|
|
108
|
# Result may be a list directly or nested under a key |
|
109
|
notes: List[dict] = [] |
|
110
|
if isinstance(result, list): |
|
111
|
notes = result |
|
112
|
elif isinstance(result, dict): |
|
113
|
notes = result.get("notes", result.get("items", [])) |
|
114
|
# If we got a single note back (not a list), wrap it |
|
115
|
if not notes and "id" in result and "raw" not in result: |
|
116
|
notes = [result] |
|
117
|
|
|
118
|
files: List[SourceFile] = [] |
|
119
|
for note in notes: |
|
120
|
note_id = note.get("id", note.get("noteId", "")) |
|
121
|
title = note.get("title", "Untitled Note").strip() or "Untitled Note" |
|
122
|
modified = note.get("modifiedTime", note.get("updateTime")) |
|
123
|
|
|
124
|
# Estimate size from text content |
|
125
|
text = _note_to_text(note) |
|
126
|
size = len(text.encode("utf-8")) if text else None |
|
127
|
|
|
128
|
files.append( |
|
129
|
SourceFile( |
|
130
|
name=title, |
|
131
|
id=str(note_id), |
|
132
|
size_bytes=size, |
|
133
|
mime_type="text/plain", |
|
134
|
modified_at=modified, |
|
135
|
) |
|
136
|
) |
|
137
|
|
|
138
|
logger.info(f"Found {len(files)} note(s) in Google Keep") |
|
139
|
return files |
|
140
|
|
|
141
|
def download(self, file: SourceFile, destination: Path) -> Path: |
|
142
|
"""Download a Keep note's content as a text file.""" |
|
143
|
destination = Path(destination) |
|
144
|
destination.parent.mkdir(parents=True, exist_ok=True) |
|
145
|
|
|
146
|
try: |
|
147
|
result = _run_gws( |
|
148
|
[ |
|
149
|
"keep", |
|
150
|
"notes", |
|
151
|
"get", |
|
152
|
"--params", |
|
153
|
json.dumps({"noteId": file.id}), |
|
154
|
], |
|
155
|
timeout=30, |
|
156
|
) |
|
157
|
except RuntimeError as e: |
|
158
|
raise RuntimeError(f"Failed to fetch Keep note {file.id}: {e}") from e |
|
159
|
|
|
160
|
# result may be the note dict directly or wrapped |
|
161
|
note = result if isinstance(result, dict) else {} |
|
162
|
text = _note_to_text(note) |
|
163
|
|
|
164
|
if not text: |
|
165
|
# Fallback: use raw output if structured extraction yielded nothing |
|
166
|
text = note.get("raw", json.dumps(note, indent=2)) |
|
167
|
|
|
168
|
destination.write_text(text, encoding="utf-8") |
|
169
|
logger.info(f"Saved note '{file.name}' to {destination}") |
|
170
|
return destination |
|
171
|
|