PlanOpticon

1
2
<!doctype html>
3
<html lang="en" class="no-js">
4
<head>
5
6
<meta charset="utf-8">
7
<meta name="viewport" content="width=device-width,initial-scale=1">
8
9
<meta name="description" content="AI-powered video analysis and knowledge extraction">
10
11
12
<meta name="author" content="CONFLICT LLC">
13
14
15
<link rel="canonical" href="https://planopticon.dev/guide/document-ingestion/">
16
17
18
<link rel="prev" href="../batch/">
19
20
21
<link rel="next" href="../cloud-sources/">
22
23
24
25
26
27
<link rel="icon" href="../../assets/images/favicon.png">
28
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.4">
29
30
31
32
<title>Document Ingestion - PlanOpticon</title>
33
34
35
36
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
37
38
39
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
40
41
42
43
44
45
46
47
48
49
50
51
52
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
53
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
54
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
55
56
57
58
<link rel="stylesheet" href="../../assets/_mkdocstrings.css">
59
60
<link rel="stylesheet" href="../../assets/css/custom.css">
61
62
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
63
64
65
66
67
68
</head>
69
70
71
72
73
74
75
76
77
78
<body dir="ltr" data-md-color-scheme="slate" data-md-color-primary="custom" data-md-color-accent="custom">
79
80
81
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
82
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
83
<label class="md-overlay" for="__drawer"></label>
84
<div data-md-component="skip">
85
86
87
<a href="#document-ingestion" class="md-skip">
88
Skip to content
89
</a>
90
91
</div>
92
<div data-md-component="announce">
93
94
</div>
95
96
97
98
99
<header class="md-header" data-md-component="header">
100
<nav class="md-header__inner md-grid" aria-label="Header">
101
<a href="../.." title="PlanOpticon" class="md-header__button md-logo" aria-label="PlanOpticon" data-md-component="logo">
102
103
<img src="../../assets/images/conflict-logo.svg" alt="logo">
104
105
</a>
106
<label class="md-header__button md-icon" for="__drawer">
107
108
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
109
</label>
110
<div class="md-header__title" data-md-component="header-title">
111
<div class="md-header__ellipsis">
112
<div class="md-header__topic">
113
<span class="md-ellipsis">
114
PlanOpticon
115
</span>
116
</div>
117
<div class="md-header__topic" data-md-component="header-topic">
118
<span class="md-ellipsis">
119
120
Document Ingestion
121
122
</span>
123
</div>
124
</div>
125
</div>
126
127
128
<form class="md-header__option" data-md-component="palette">
129
130
131
132
133
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="custom" data-md-color-accent="custom" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
134
135
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
136
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
137
</label>
138
139
140
141
142
143
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="custom" data-md-color-accent="custom" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
144
145
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_0" hidden>
146
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
147
</label>
148
149
150
</form>
151
152
153
154
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
155
156
157
158
159
160
<label class="md-header__button md-icon" for="__search">
161
162
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
163
</label>
164
<div class="md-search" data-md-component="search" role="dialog">
165
<label class="md-search__overlay" for="__search"></label>
166
<div class="md-search__inner" role="search">
167
<form class="md-search__form" name="search">
168
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
169
<label class="md-search__icon md-icon" for="__search">
170
171
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
172
173
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
174
</label>
175
<nav class="md-search__options" aria-label="Search">
176
177
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
178
179
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
180
</button>
181
</nav>
182
183
<div class="md-search__suggest" data-md-component="search-suggest"></div>
184
185
</form>
186
<div class="md-search__output">
187
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
188
<div class="md-search-result" data-md-component="search-result">
189
<div class="md-search-result__meta">
190
Initializing search
191
</div>
192
<ol class="md-search-result__list" role="presentation"></ol>
193
</div>
194
</div>
195
</div>
196
</div>
197
</div>
198
199
200
201
<div class="md-header__source">
202
<a href="https://github.com/ConflictHQ/PlanOpticon" title="Go to repository" class="md-source" data-md-component="source">
203
<div class="md-source__icon md-icon">
204
205
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
206
</div>
207
<div class="md-source__repository">
208
ConflictHQ/PlanOpticon
209
</div>
210
</a>
211
</div>
212
213
</nav>
214
215
</header>
216
217
<div class="md-container" data-md-component="container">
218
219
220
221
222
223
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
224
<div class="md-grid">
225
<ul class="md-tabs__list">
226
227
228
229
230
231
232
<li class="md-tabs__item">
233
<a href="../.." class="md-tabs__link">
234
235
236
237
238
239
Home
240
241
</a>
242
</li>
243
244
245
246
247
248
249
250
251
252
253
<li class="md-tabs__item">
254
<a href="../../getting-started/installation/" class="md-tabs__link">
255
256
257
258
Getting Started
259
260
</a>
261
</li>
262
263
264
265
266
267
268
269
270
271
272
273
274
275
<li class="md-tabs__item md-tabs__item--active">
276
<a href="../single-video/" class="md-tabs__link">
277
278
279
280
User Guide
281
282
</a>
283
</li>
284
285
286
287
288
289
290
291
292
293
<li class="md-tabs__item">
294
<a href="../../use-cases/" class="md-tabs__link">
295
296
297
298
299
300
Use Cases
301
302
</a>
303
</li>
304
305
306
307
308
309
310
311
312
<li class="md-tabs__item">
313
<a href="../../cli-reference/" class="md-tabs__link">
314
315
316
317
318
319
CLI Reference
320
321
</a>
322
</li>
323
324
325
326
327
328
329
330
331
332
333
<li class="md-tabs__item">
334
<a href="../../architecture/overview/" class="md-tabs__link">
335
336
337
338
Architecture
339
340
</a>
341
</li>
342
343
344
345
346
347
348
349
350
351
352
353
<li class="md-tabs__item">
354
<a href="../../api/models/" class="md-tabs__link">
355
356
357
358
API Reference
359
360
</a>
361
</li>
362
363
364
365
366
367
368
369
370
371
<li class="md-tabs__item">
372
<a href="../../faq/" class="md-tabs__link">
373
374
375
376
377
378
FAQ & Troubleshooting
379
380
</a>
381
</li>
382
383
384
385
386
387
388
389
390
<li class="md-tabs__item">
391
<a href="../../contributing/" class="md-tabs__link">
392
393
394
395
396
397
Contributing
398
399
</a>
400
</li>
401
402
403
404
</ul>
405
</div>
406
</nav>
407
408
409
410
<main class="md-main" data-md-component="main">
411
<div class="md-main__inner md-grid">
412
413
414
415
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
416
<div class="md-sidebar__scrollwrap">
417
<div class="md-sidebar__inner">
418
419
420
421
422
423
424
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
425
<label class="md-nav__title" for="__drawer">
426
<a href="../.." title="PlanOpticon" class="md-nav__button md-logo" aria-label="PlanOpticon" data-md-component="logo">
427
428
<img src="../../assets/images/conflict-logo.svg" alt="logo">
429
430
</a>
431
PlanOpticon
432
</label>
433
434
<div class="md-nav__source">
435
<a href="https://github.com/ConflictHQ/PlanOpticon" title="Go to repository" class="md-source" data-md-component="source">
436
<div class="md-source__icon md-icon">
437
438
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
439
</div>
440
<div class="md-source__repository">
441
ConflictHQ/PlanOpticon
442
</div>
443
</a>
444
</div>
445
446
<ul class="md-nav__list" data-md-scrollfix>
447
448
449
450
451
452
453
454
<li class="md-nav__item">
455
<a href="../.." class="md-nav__link">
456
457
458
459
<span class="md-ellipsis">
460
461
462
Home
463
464
465
466
</span>
467
468
469
470
</a>
471
</li>
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
<li class="md-nav__item md-nav__item--nested">
492
493
494
495
496
497
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
498
499
500
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
501
502
503
504
<span class="md-ellipsis">
505
506
507
Getting Started
508
509
510
511
</span>
512
513
514
515
<span class="md-nav__icon md-icon"></span>
516
</label>
517
518
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
519
<label class="md-nav__title" for="__nav_2">
520
<span class="md-nav__icon md-icon"></span>
521
522
523
Getting Started
524
525
526
</label>
527
<ul class="md-nav__list" data-md-scrollfix>
528
529
530
531
532
533
534
535
<li class="md-nav__item">
536
<a href="../../getting-started/installation/" class="md-nav__link">
537
538
539
540
<span class="md-ellipsis">
541
542
543
Installation
544
545
546
547
</span>
548
549
550
551
</a>
552
</li>
553
554
555
556
557
558
559
560
561
562
563
<li class="md-nav__item">
564
<a href="../../getting-started/quickstart/" class="md-nav__link">
565
566
567
568
<span class="md-ellipsis">
569
570
571
Quick Start
572
573
574
575
</span>
576
577
578
579
</a>
580
</li>
581
582
583
584
585
586
587
588
589
590
591
<li class="md-nav__item">
592
<a href="../../getting-started/configuration/" class="md-nav__link">
593
594
595
596
<span class="md-ellipsis">
597
598
599
Configuration
600
601
602
603
</span>
604
605
606
607
</a>
608
</li>
609
610
611
612
613
</ul>
614
</nav>
615
616
</li>
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
642
643
644
645
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
646
647
648
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
649
650
651
652
<span class="md-ellipsis">
653
654
655
User Guide
656
657
658
659
</span>
660
661
662
663
<span class="md-nav__icon md-icon"></span>
664
</label>
665
666
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
667
<label class="md-nav__title" for="__nav_3">
668
<span class="md-nav__icon md-icon"></span>
669
670
671
User Guide
672
673
674
</label>
675
<ul class="md-nav__list" data-md-scrollfix>
676
677
678
679
680
681
682
683
<li class="md-nav__item">
684
<a href="../single-video/" class="md-nav__link">
685
686
687
688
<span class="md-ellipsis">
689
690
691
Single Video Analysis
692
693
694
695
</span>
696
697
698
699
</a>
700
</li>
701
702
703
704
705
706
707
708
709
710
711
<li class="md-nav__item">
712
<a href="../batch/" class="md-nav__link">
713
714
715
716
<span class="md-ellipsis">
717
718
719
Batch Processing
720
721
722
723
</span>
724
725
726
727
</a>
728
</li>
729
730
731
732
733
734
735
736
737
738
739
740
741
<li class="md-nav__item md-nav__item--active">
742
743
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
744
745
746
747
748
749
<label class="md-nav__link md-nav__link--active" for="__toc">
750
751
752
753
<span class="md-ellipsis">
754
755
756
Document Ingestion
757
758
759
760
</span>
761
762
763
764
<span class="md-nav__icon md-icon"></span>
765
</label>
766
767
<a href="./" class="md-nav__link md-nav__link--active">
768
769
770
771
<span class="md-ellipsis">
772
773
774
Document Ingestion
775
776
777
778
</span>
779
780
781
782
</a>
783
784
785
786
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
787
788
789
790
791
792
793
<label class="md-nav__title" for="__toc">
794
<span class="md-nav__icon md-icon"></span>
795
Table of contents
796
</label>
797
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
798
799
<li class="md-nav__item">
800
<a href="#supported-formats" class="md-nav__link">
801
<span class="md-ellipsis">
802
803
Supported formats
804
805
</span>
806
</a>
807
808
</li>
809
810
<li class="md-nav__item">
811
<a href="#cli-usage" class="md-nav__link">
812
<span class="md-ellipsis">
813
814
CLI usage
815
816
</span>
817
</a>
818
819
<nav class="md-nav" aria-label="CLI usage">
820
<ul class="md-nav__list">
821
822
<li class="md-nav__item">
823
<a href="#planopticon-ingest" class="md-nav__link">
824
<span class="md-ellipsis">
825
826
planopticon ingest
827
828
</span>
829
</a>
830
831
</li>
832
833
<li class="md-nav__item">
834
<a href="#single-file-ingestion" class="md-nav__link">
835
<span class="md-ellipsis">
836
837
Single file ingestion
838
839
</span>
840
</a>
841
842
</li>
843
844
<li class="md-nav__item">
845
<a href="#directory-ingestion" class="md-nav__link">
846
<span class="md-ellipsis">
847
848
Directory ingestion
849
850
</span>
851
</a>
852
853
</li>
854
855
<li class="md-nav__item">
856
<a href="#merging-into-an-existing-knowledge-graph" class="md-nav__link">
857
<span class="md-ellipsis">
858
859
Merging into an existing knowledge graph
860
861
</span>
862
</a>
863
864
</li>
865
866
<li class="md-nav__item">
867
<a href="#choosing-an-llm-provider" class="md-nav__link">
868
<span class="md-ellipsis">
869
870
Choosing an LLM provider
871
872
</span>
873
</a>
874
875
</li>
876
877
<li class="md-nav__item">
878
<a href="#output" class="md-nav__link">
879
<span class="md-ellipsis">
880
881
Output
882
883
</span>
884
</a>
885
886
</li>
887
888
</ul>
889
</nav>
890
891
</li>
892
893
<li class="md-nav__item">
894
<a href="#how-each-processor-works" class="md-nav__link">
895
<span class="md-ellipsis">
896
897
How each processor works
898
899
</span>
900
</a>
901
902
<nav class="md-nav" aria-label="How each processor works">
903
<ul class="md-nav__list">
904
905
<li class="md-nav__item">
906
<a href="#pdf-processor" class="md-nav__link">
907
<span class="md-ellipsis">
908
909
PDF processor
910
911
</span>
912
</a>
913
914
</li>
915
916
<li class="md-nav__item">
917
<a href="#markdown-processor" class="md-nav__link">
918
<span class="md-ellipsis">
919
920
Markdown processor
921
922
</span>
923
</a>
924
925
</li>
926
927
<li class="md-nav__item">
928
<a href="#plaintext-processor" class="md-nav__link">
929
<span class="md-ellipsis">
930
931
Plaintext processor
932
933
</span>
934
</a>
935
936
</li>
937
938
</ul>
939
</nav>
940
941
</li>
942
943
<li class="md-nav__item">
944
<a href="#the-ingestion-pipeline" class="md-nav__link">
945
<span class="md-ellipsis">
946
947
The ingestion pipeline
948
949
</span>
950
</a>
951
952
<nav class="md-nav" aria-label="The ingestion pipeline">
953
<ul class="md-nav__list">
954
955
<li class="md-nav__item">
956
<a href="#step-1-processor-selection" class="md-nav__link">
957
<span class="md-ellipsis">
958
959
Step 1: Processor selection
960
961
</span>
962
</a>
963
964
</li>
965
966
<li class="md-nav__item">
967
<a href="#step-2-text-extraction" class="md-nav__link">
968
<span class="md-ellipsis">
969
970
Step 2: Text extraction
971
972
</span>
973
</a>
974
975
</li>
976
977
<li class="md-nav__item">
978
<a href="#step-3-source-registration" class="md-nav__link">
979
<span class="md-ellipsis">
980
981
Step 3: Source registration
982
983
</span>
984
</a>
985
986
</li>
987
988
<li class="md-nav__item">
989
<a href="#step-4-entity-and-relationship-extraction" class="md-nav__link">
990
<span class="md-ellipsis">
991
992
Step 4: Entity and relationship extraction
993
994
</span>
995
</a>
996
997
</li>
998
999
<li class="md-nav__item">
1000
<a href="#step-5-storage" class="md-nav__link">
1001
<span class="md-ellipsis">
1002
1003
Step 5: Storage
1004
1005
</span>
1006
</a>
1007
1008
</li>
1009
1010
</ul>
1011
</nav>
1012
1013
</li>
1014
1015
<li class="md-nav__item">
1016
<a href="#combining-with-video-analysis" class="md-nav__link">
1017
<span class="md-ellipsis">
1018
1019
Combining with video analysis
1020
1021
</span>
1022
</a>
1023
1024
</li>
1025
1026
<li class="md-nav__item">
1027
<a href="#python-api" class="md-nav__link">
1028
<span class="md-ellipsis">
1029
1030
Python API
1031
1032
</span>
1033
</a>
1034
1035
<nav class="md-nav" aria-label="Python API">
1036
<ul class="md-nav__list">
1037
1038
<li class="md-nav__item">
1039
<a href="#ingesting-a-single-file" class="md-nav__link">
1040
<span class="md-ellipsis">
1041
1042
Ingesting a single file
1043
1044
</span>
1045
</a>
1046
1047
</li>
1048
1049
<li class="md-nav__item">
1050
<a href="#ingesting-a-directory" class="md-nav__link">
1051
<span class="md-ellipsis">
1052
1053
Ingesting a directory
1054
1055
</span>
1056
</a>
1057
1058
</li>
1059
1060
<li class="md-nav__item">
1061
<a href="#listing-supported-extensions" class="md-nav__link">
1062
<span class="md-ellipsis">
1063
1064
Listing supported extensions
1065
1066
</span>
1067
</a>
1068
1069
</li>
1070
1071
<li class="md-nav__item">
1072
<a href="#working-with-processors-directly" class="md-nav__link">
1073
<span class="md-ellipsis">
1074
1075
Working with processors directly
1076
1077
</span>
1078
</a>
1079
1080
</li>
1081
1082
</ul>
1083
</nav>
1084
1085
</li>
1086
1087
<li class="md-nav__item">
1088
<a href="#extending-with-custom-processors" class="md-nav__link">
1089
<span class="md-ellipsis">
1090
1091
Extending with custom processors
1092
1093
</span>
1094
</a>
1095
1096
</li>
1097
1098
<li class="md-nav__item">
1099
<a href="#companion-repl" class="md-nav__link">
1100
<span class="md-ellipsis">
1101
1102
Companion REPL
1103
1104
</span>
1105
</a>
1106
1107
</li>
1108
1109
<li class="md-nav__item">
1110
<a href="#common-workflows" class="md-nav__link">
1111
<span class="md-ellipsis">
1112
1113
Common workflows
1114
1115
</span>
1116
</a>
1117
1118
<nav class="md-nav" aria-label="Common workflows">
1119
<ul class="md-nav__list">
1120
1121
<li class="md-nav__item">
1122
<a href="#build-a-project-knowledge-base-from-scratch" class="md-nav__link">
1123
<span class="md-ellipsis">
1124
1125
Build a project knowledge base from scratch
1126
1127
</span>
1128
</a>
1129
1130
</li>
1131
1132
<li class="md-nav__item">
1133
<a href="#incrementally-build-a-knowledge-graph" class="md-nav__link">
1134
<span class="md-ellipsis">
1135
1136
Incrementally build a knowledge graph
1137
1138
</span>
1139
</a>
1140
1141
</li>
1142
1143
<li class="md-nav__item">
1144
<a href="#ingest-from-google-workspace-or-microsoft-365" class="md-nav__link">
1145
<span class="md-ellipsis">
1146
1147
Ingest from Google Workspace or Microsoft 365
1148
1149
</span>
1150
</a>
1151
1152
</li>
1153
1154
</ul>
1155
</nav>
1156
1157
</li>
1158
1159
</ul>
1160
1161
</nav>
1162
1163
</li>
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
<li class="md-nav__item">
1175
<a href="../cloud-sources/" class="md-nav__link">
1176
1177
1178
1179
<span class="md-ellipsis">
1180
1181
1182
Cloud Sources
1183
1184
1185
1186
</span>
1187
1188
1189
1190
</a>
1191
</li>
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
<li class="md-nav__item">
1203
<a href="../knowledge-graphs/" class="md-nav__link">
1204
1205
1206
1207
<span class="md-ellipsis">
1208
1209
1210
Knowledge Graphs
1211
1212
1213
1214
</span>
1215
1216
1217
1218
</a>
1219
</li>
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
<li class="md-nav__item">
1231
<a href="../companion/" class="md-nav__link">
1232
1233
1234
1235
<span class="md-ellipsis">
1236
1237
1238
Interactive Companion
1239
1240
1241
1242
</span>
1243
1244
1245
1246
</a>
1247
</li>
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
<li class="md-nav__item">
1259
<a href="../planning-agent/" class="md-nav__link">
1260
1261
1262
1263
<span class="md-ellipsis">
1264
1265
1266
Planning Agent
1267
1268
1269
1270
</span>
1271
1272
1273
1274
</a>
1275
</li>
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
<li class="md-nav__item">
1287
<a href="../authentication/" class="md-nav__link">
1288
1289
1290
1291
<span class="md-ellipsis">
1292
1293
1294
Authentication
1295
1296
1297
1298
</span>
1299
1300
1301
1302
</a>
1303
</li>
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
<li class="md-nav__item">
1315
<a href="../export/" class="md-nav__link">
1316
1317
1318
1319
<span class="md-ellipsis">
1320
1321
1322
Export & Documents
1323
1324
1325
1326
</span>
1327
1328
1329
1330
</a>
1331
</li>
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
<li class="md-nav__item">
1343
<a href="../output-formats/" class="md-nav__link">
1344
1345
1346
1347
<span class="md-ellipsis">
1348
1349
1350
Output Formats
1351
1352
1353
1354
</span>
1355
1356
1357
1358
</a>
1359
</li>
1360
1361
1362
1363
1364
</ul>
1365
</nav>
1366
1367
</li>
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
<li class="md-nav__item">
1378
<a href="../../use-cases/" class="md-nav__link">
1379
1380
1381
1382
<span class="md-ellipsis">
1383
1384
1385
Use Cases
1386
1387
1388
1389
</span>
1390
1391
1392
1393
</a>
1394
</li>
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
<li class="md-nav__item">
1405
<a href="../../cli-reference/" class="md-nav__link">
1406
1407
1408
1409
<span class="md-ellipsis">
1410
1411
1412
CLI Reference
1413
1414
1415
1416
</span>
1417
1418
1419
1420
</a>
1421
</li>
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
<li class="md-nav__item md-nav__item--nested">
1442
1443
1444
1445
1446
1447
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_6" >
1448
1449
1450
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
1451
1452
1453
1454
<span class="md-ellipsis">
1455
1456
1457
Architecture
1458
1459
1460
1461
</span>
1462
1463
1464
1465
<span class="md-nav__icon md-icon"></span>
1466
</label>
1467
1468
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
1469
<label class="md-nav__title" for="__nav_6">
1470
<span class="md-nav__icon md-icon"></span>
1471
1472
1473
Architecture
1474
1475
1476
</label>
1477
<ul class="md-nav__list" data-md-scrollfix>
1478
1479
1480
1481
1482
1483
1484
1485
<li class="md-nav__item">
1486
<a href="../../architecture/overview/" class="md-nav__link">
1487
1488
1489
1490
<span class="md-ellipsis">
1491
1492
1493
Overview
1494
1495
1496
1497
</span>
1498
1499
1500
1501
</a>
1502
</li>
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
<li class="md-nav__item">
1514
<a href="../../architecture/providers/" class="md-nav__link">
1515
1516
1517
1518
<span class="md-ellipsis">
1519
1520
1521
Provider System
1522
1523
1524
1525
</span>
1526
1527
1528
1529
</a>
1530
</li>
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
<li class="md-nav__item">
1542
<a href="../../architecture/pipeline/" class="md-nav__link">
1543
1544
1545
1546
<span class="md-ellipsis">
1547
1548
1549
Processing Pipeline
1550
1551
1552
1553
</span>
1554
1555
1556
1557
</a>
1558
</li>
1559
1560
1561
1562
1563
</ul>
1564
</nav>
1565
1566
</li>
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
<li class="md-nav__item md-nav__item--nested">
1587
1588
1589
1590
1591
1592
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
1593
1594
1595
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
1596
1597
1598
1599
<span class="md-ellipsis">
1600
1601
1602
API Reference
1603
1604
1605
1606
</span>
1607
1608
1609
1610
<span class="md-nav__icon md-icon"></span>
1611
</label>
1612
1613
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
1614
<label class="md-nav__title" for="__nav_7">
1615
<span class="md-nav__icon md-icon"></span>
1616
1617
1618
API Reference
1619
1620
1621
</label>
1622
<ul class="md-nav__list" data-md-scrollfix>
1623
1624
1625
1626
1627
1628
1629
1630
<li class="md-nav__item">
1631
<a href="../../api/models/" class="md-nav__link">
1632
1633
1634
1635
<span class="md-ellipsis">
1636
1637
1638
Models
1639
1640
1641
1642
</span>
1643
1644
1645
1646
</a>
1647
</li>
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
<li class="md-nav__item">
1659
<a href="../../api/providers/" class="md-nav__link">
1660
1661
1662
1663
<span class="md-ellipsis">
1664
1665
1666
Providers
1667
1668
1669
1670
</span>
1671
1672
1673
1674
</a>
1675
</li>
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
<li class="md-nav__item">
1687
<a href="../../api/analyzers/" class="md-nav__link">
1688
1689
1690
1691
<span class="md-ellipsis">
1692
1693
1694
Analyzers
1695
1696
1697
1698
</span>
1699
1700
1701
1702
</a>
1703
</li>
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
<li class="md-nav__item">
1715
<a href="../../api/agent/" class="md-nav__link">
1716
1717
1718
1719
<span class="md-ellipsis">
1720
1721
1722
Agent & Skills
1723
1724
1725
1726
</span>
1727
1728
1729
1730
</a>
1731
</li>
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
<li class="md-nav__item">
1743
<a href="../../api/sources/" class="md-nav__link">
1744
1745
1746
1747
<span class="md-ellipsis">
1748
1749
1750
Sources
1751
1752
1753
1754
</span>
1755
1756
1757
1758
</a>
1759
</li>
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
<li class="md-nav__item">
1771
<a href="../../api/auth/" class="md-nav__link">
1772
1773
1774
1775
<span class="md-ellipsis">
1776
1777
1778
Authentication
1779
1780
1781
1782
</span>
1783
1784
1785
1786
</a>
1787
</li>
1788
1789
1790
1791
1792
</ul>
1793
</nav>
1794
1795
</li>
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
<li class="md-nav__item">
1806
<a href="../../faq/" class="md-nav__link">
1807
1808
1809
1810
<span class="md-ellipsis">
1811
1812
1813
FAQ & Troubleshooting
1814
1815
1816
1817
</span>
1818
1819
1820
1821
</a>
1822
</li>
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
<li class="md-nav__item">
1833
<a href="../../contributing/" class="md-nav__link">
1834
1835
1836
1837
<span class="md-ellipsis">
1838
1839
1840
Contributing
1841
1842
1843
1844
</span>
1845
1846
1847
1848
</a>
1849
</li>
1850
1851
1852
1853
</ul>
1854
</nav>
1855
</div>
1856
</div>
1857
</div>
1858
1859
1860
1861
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
1862
<div class="md-sidebar__scrollwrap">
1863
<div class="md-sidebar__inner">
1864
1865
1866
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
1867
1868
1869
1870
1871
1872
1873
<label class="md-nav__title" for="__toc">
1874
<span class="md-nav__icon md-icon"></span>
1875
Table of contents
1876
</label>
1877
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
1878
1879
<li class="md-nav__item">
1880
<a href="#supported-formats" class="md-nav__link">
1881
<span class="md-ellipsis">
1882
1883
Supported formats
1884
1885
</span>
1886
</a>
1887
1888
</li>
1889
1890
<li class="md-nav__item">
1891
<a href="#cli-usage" class="md-nav__link">
1892
<span class="md-ellipsis">
1893
1894
CLI usage
1895
1896
</span>
1897
</a>
1898
1899
<nav class="md-nav" aria-label="CLI usage">
1900
<ul class="md-nav__list">
1901
1902
<li class="md-nav__item">
1903
<a href="#planopticon-ingest" class="md-nav__link">
1904
<span class="md-ellipsis">
1905
1906
planopticon ingest
1907
1908
</span>
1909
</a>
1910
1911
</li>
1912
1913
<li class="md-nav__item">
1914
<a href="#single-file-ingestion" class="md-nav__link">
1915
<span class="md-ellipsis">
1916
1917
Single file ingestion
1918
1919
</span>
1920
</a>
1921
1922
</li>
1923
1924
<li class="md-nav__item">
1925
<a href="#directory-ingestion" class="md-nav__link">
1926
<span class="md-ellipsis">
1927
1928
Directory ingestion
1929
1930
</span>
1931
</a>
1932
1933
</li>
1934
1935
<li class="md-nav__item">
1936
<a href="#merging-into-an-existing-knowledge-graph" class="md-nav__link">
1937
<span class="md-ellipsis">
1938
1939
Merging into an existing knowledge graph
1940
1941
</span>
1942
</a>
1943
1944
</li>
1945
1946
<li class="md-nav__item">
1947
<a href="#choosing-an-llm-provider" class="md-nav__link">
1948
<span class="md-ellipsis">
1949
1950
Choosing an LLM provider
1951
1952
</span>
1953
</a>
1954
1955
</li>
1956
1957
<li class="md-nav__item">
1958
<a href="#output" class="md-nav__link">
1959
<span class="md-ellipsis">
1960
1961
Output
1962
1963
</span>
1964
</a>
1965
1966
</li>
1967
1968
</ul>
1969
</nav>
1970
1971
</li>
1972
1973
<li class="md-nav__item">
1974
<a href="#how-each-processor-works" class="md-nav__link">
1975
<span class="md-ellipsis">
1976
1977
How each processor works
1978
1979
</span>
1980
</a>
1981
1982
<nav class="md-nav" aria-label="How each processor works">
1983
<ul class="md-nav__list">
1984
1985
<li class="md-nav__item">
1986
<a href="#pdf-processor" class="md-nav__link">
1987
<span class="md-ellipsis">
1988
1989
PDF processor
1990
1991
</span>
1992
</a>
1993
1994
</li>
1995
1996
<li class="md-nav__item">
1997
<a href="#markdown-processor" class="md-nav__link">
1998
<span class="md-ellipsis">
1999
2000
Markdown processor
2001
2002
</span>
2003
</a>
2004
2005
</li>
2006
2007
<li class="md-nav__item">
2008
<a href="#plaintext-processor" class="md-nav__link">
2009
<span class="md-ellipsis">
2010
2011
Plaintext processor
2012
2013
</span>
2014
</a>
2015
2016
</li>
2017
2018
</ul>
2019
</nav>
2020
2021
</li>
2022
2023
<li class="md-nav__item">
2024
<a href="#the-ingestion-pipeline" class="md-nav__link">
2025
<span class="md-ellipsis">
2026
2027
The ingestion pipeline
2028
2029
</span>
2030
</a>
2031
2032
<nav class="md-nav" aria-label="The ingestion pipeline">
2033
<ul class="md-nav__list">
2034
2035
<li class="md-nav__item">
2036
<a href="#step-1-processor-selection" class="md-nav__link">
2037
<span class="md-ellipsis">
2038
2039
Step 1: Processor selection
2040
2041
</span>
2042
</a>
2043
2044
</li>
2045
2046
<li class="md-nav__item">
2047
<a href="#step-2-text-extraction" class="md-nav__link">
2048
<span class="md-ellipsis">
2049
2050
Step 2: Text extraction
2051
2052
</span>
2053
</a>
2054
2055
</li>
2056
2057
<li class="md-nav__item">
2058
<a href="#step-3-source-registration" class="md-nav__link">
2059
<span class="md-ellipsis">
2060
2061
Step 3: Source registration
2062
2063
</span>
2064
</a>
2065
2066
</li>
2067
2068
<li class="md-nav__item">
2069
<a href="#step-4-entity-and-relationship-extraction" class="md-nav__link">
2070
<span class="md-ellipsis">
2071
2072
Step 4: Entity and relationship extraction
2073
2074
</span>
2075
</a>
2076
2077
</li>
2078
2079
<li class="md-nav__item">
2080
<a href="#step-5-storage" class="md-nav__link">
2081
<span class="md-ellipsis">
2082
2083
Step 5: Storage
2084
2085
</span>
2086
</a>
2087
2088
</li>
2089
2090
</ul>
2091
</nav>
2092
2093
</li>
2094
2095
<li class="md-nav__item">
2096
<a href="#combining-with-video-analysis" class="md-nav__link">
2097
<span class="md-ellipsis">
2098
2099
Combining with video analysis
2100
2101
</span>
2102
</a>
2103
2104
</li>
2105
2106
<li class="md-nav__item">
2107
<a href="#python-api" class="md-nav__link">
2108
<span class="md-ellipsis">
2109
2110
Python API
2111
2112
</span>
2113
</a>
2114
2115
<nav class="md-nav" aria-label="Python API">
2116
<ul class="md-nav__list">
2117
2118
<li class="md-nav__item">
2119
<a href="#ingesting-a-single-file" class="md-nav__link">
2120
<span class="md-ellipsis">
2121
2122
Ingesting a single file
2123
2124
</span>
2125
</a>
2126
2127
</li>
2128
2129
<li class="md-nav__item">
2130
<a href="#ingesting-a-directory" class="md-nav__link">
2131
<span class="md-ellipsis">
2132
2133
Ingesting a directory
2134
2135
</span>
2136
</a>
2137
2138
</li>
2139
2140
<li class="md-nav__item">
2141
<a href="#listing-supported-extensions" class="md-nav__link">
2142
<span class="md-ellipsis">
2143
2144
Listing supported extensions
2145
2146
</span>
2147
</a>
2148
2149
</li>
2150
2151
<li class="md-nav__item">
2152
<a href="#working-with-processors-directly" class="md-nav__link">
2153
<span class="md-ellipsis">
2154
2155
Working with processors directly
2156
2157
</span>
2158
</a>
2159
2160
</li>
2161
2162
</ul>
2163
</nav>
2164
2165
</li>
2166
2167
<li class="md-nav__item">
2168
<a href="#extending-with-custom-processors" class="md-nav__link">
2169
<span class="md-ellipsis">
2170
2171
Extending with custom processors
2172
2173
</span>
2174
</a>
2175
2176
</li>
2177
2178
<li class="md-nav__item">
2179
<a href="#companion-repl" class="md-nav__link">
2180
<span class="md-ellipsis">
2181
2182
Companion REPL
2183
2184
</span>
2185
</a>
2186
2187
</li>
2188
2189
<li class="md-nav__item">
2190
<a href="#common-workflows" class="md-nav__link">
2191
<span class="md-ellipsis">
2192
2193
Common workflows
2194
2195
</span>
2196
</a>
2197
2198
<nav class="md-nav" aria-label="Common workflows">
2199
<ul class="md-nav__list">
2200
2201
<li class="md-nav__item">
2202
<a href="#build-a-project-knowledge-base-from-scratch" class="md-nav__link">
2203
<span class="md-ellipsis">
2204
2205
Build a project knowledge base from scratch
2206
2207
</span>
2208
</a>
2209
2210
</li>
2211
2212
<li class="md-nav__item">
2213
<a href="#incrementally-build-a-knowledge-graph" class="md-nav__link">
2214
<span class="md-ellipsis">
2215
2216
Incrementally build a knowledge graph
2217
2218
</span>
2219
</a>
2220
2221
</li>
2222
2223
<li class="md-nav__item">
2224
<a href="#ingest-from-google-workspace-or-microsoft-365" class="md-nav__link">
2225
<span class="md-ellipsis">
2226
2227
Ingest from Google Workspace or Microsoft 365
2228
2229
</span>
2230
</a>
2231
2232
</li>
2233
2234
</ul>
2235
</nav>
2236
2237
</li>
2238
2239
</ul>
2240
2241
</nav>
2242
</div>
2243
</div>
2244
</div>
2245
2246
2247
2248
<div class="md-content" data-md-component="content">
2249
2250
<article class="md-content__inner md-typeset">
2251
2252
2253
2254
2255
2256
2257
2258
2259
<h1 id="document-ingestion">Document Ingestion<a class="headerlink" href="#document-ingestion" title="Permanent link">&para;</a></h1>
2260
<p>Document ingestion lets you process files -- PDFs, Markdown, and plaintext -- into a knowledge graph. PlanOpticon extracts text from documents, chunks it into manageable pieces, runs LLM-powered entity and relationship extraction, and stores the results in a FalkorDB knowledge graph. This is the same knowledge graph format produced by video analysis, so you can combine video and document insights in a single graph.</p>
2261
<h2 id="supported-formats">Supported formats<a class="headerlink" href="#supported-formats" title="Permanent link">&para;</a></h2>
2262
<table>
2263
<thead>
2264
<tr>
2265
<th>Extension</th>
2266
<th>Processor</th>
2267
<th>Description</th>
2268
</tr>
2269
</thead>
2270
<tbody>
2271
<tr>
2272
<td><code>.pdf</code></td>
2273
<td><code>PdfProcessor</code></td>
2274
<td>Extracts text page by page using pymupdf or pdfplumber</td>
2275
</tr>
2276
<tr>
2277
<td><code>.md</code>, <code>.markdown</code></td>
2278
<td><code>MarkdownProcessor</code></td>
2279
<td>Splits on headings into sections</td>
2280
</tr>
2281
<tr>
2282
<td><code>.txt</code>, <code>.text</code>, <code>.log</code>, <code>.csv</code></td>
2283
<td><code>PlaintextProcessor</code></td>
2284
<td>Splits on paragraph boundaries</td>
2285
</tr>
2286
</tbody>
2287
</table>
2288
<p>Additional formats can be added by implementing the <code>DocumentProcessor</code> base class and registering it (see <a href="#extending-with-custom-processors">Extending with custom processors</a> below).</p>
2289
<h2 id="cli-usage">CLI usage<a class="headerlink" href="#cli-usage" title="Permanent link">&para;</a></h2>
2290
<h3 id="planopticon-ingest"><code>planopticon ingest</code><a class="headerlink" href="#planopticon-ingest" title="Permanent link">&para;</a></h3>
2291
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>planopticon ingest INPUT_PATH [OPTIONS]
2292
</code></pre></div>
2293
<p><strong>Arguments:</strong></p>
2294
<table>
2295
<thead>
2296
<tr>
2297
<th>Argument</th>
2298
<th>Description</th>
2299
</tr>
2300
</thead>
2301
<tbody>
2302
<tr>
2303
<td><code>INPUT_PATH</code></td>
2304
<td>Path to a file or directory to ingest (must exist)</td>
2305
</tr>
2306
</tbody>
2307
</table>
2308
<p><strong>Options:</strong></p>
2309
<table>
2310
<thead>
2311
<tr>
2312
<th>Option</th>
2313
<th>Short</th>
2314
<th>Default</th>
2315
<th>Description</th>
2316
</tr>
2317
</thead>
2318
<tbody>
2319
<tr>
2320
<td><code>--output</code></td>
2321
<td><code>-o</code></td>
2322
<td>Current directory</td>
2323
<td>Output directory for the knowledge graph</td>
2324
</tr>
2325
<tr>
2326
<td><code>--db-path</code></td>
2327
<td></td>
2328
<td>None</td>
2329
<td>Path to an existing <code>knowledge_graph.db</code> to merge into</td>
2330
</tr>
2331
<tr>
2332
<td><code>--recursive / --no-recursive</code></td>
2333
<td><code>-r</code></td>
2334
<td><code>--recursive</code></td>
2335
<td>Recurse into subdirectories (directory ingestion only)</td>
2336
</tr>
2337
<tr>
2338
<td><code>--provider</code></td>
2339
<td><code>-p</code></td>
2340
<td><code>auto</code></td>
2341
<td>LLM provider for entity extraction (<code>openai</code>, <code>anthropic</code>, <code>gemini</code>, <code>ollama</code>, <code>azure</code>, <code>together</code>, <code>fireworks</code>, <code>cerebras</code>, <code>xai</code>)</td>
2342
</tr>
2343
<tr>
2344
<td><code>--chat-model</code></td>
2345
<td></td>
2346
<td>None</td>
2347
<td>Override the model used for LLM entity extraction</td>
2348
</tr>
2349
</tbody>
2350
</table>
2351
<h3 id="single-file-ingestion">Single file ingestion<a class="headerlink" href="#single-file-ingestion" title="Permanent link">&para;</a></h3>
2352
<p>Process a single document and create a new knowledge graph:</p>
2353
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>spec.md
2354
</code></pre></div>
2355
<p>This creates <code>knowledge_graph.db</code> and <code>knowledge_graph.json</code> in the current directory.</p>
2356
<p>Specify an output directory:</p>
2357
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>report.pdf<span class="w"> </span>-o<span class="w"> </span>./results
2358
</code></pre></div>
2359
<p>This creates <code>./results/knowledge_graph.db</code> and <code>./results/knowledge_graph.json</code>.</p>
2360
<h3 id="directory-ingestion">Directory ingestion<a class="headerlink" href="#directory-ingestion" title="Permanent link">&para;</a></h3>
2361
<p>Process all supported files in a directory:</p>
2362
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./docs/
2363
</code></pre></div>
2364
<p>By default, this recurses into subdirectories. To process only the top-level directory:</p>
2365
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./docs/<span class="w"> </span>--no-recursive
2366
</code></pre></div>
2367
<p>PlanOpticon automatically filters for supported file extensions. Unsupported files are silently skipped.</p>
2368
<h3 id="merging-into-an-existing-knowledge-graph">Merging into an existing knowledge graph<a class="headerlink" href="#merging-into-an-existing-knowledge-graph" title="Permanent link">&para;</a></h3>
2369
<p>To add document content to an existing knowledge graph (e.g., one created from video analysis), use <code>--db-path</code>:</p>
2370
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="c1"># First, analyze a video</span>
2371
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>planopticon<span class="w"> </span>analyze<span class="w"> </span>meeting.mp4<span class="w"> </span>-o<span class="w"> </span>./results
2372
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a>
2373
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="c1"># Then, ingest supplementary documents into the same graph</span>
2374
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./meeting-notes/<span class="w"> </span>--db-path<span class="w"> </span>./results/knowledge_graph.db
2375
</code></pre></div>
2376
<p>The ingested entities and relationships are merged with the existing graph. Duplicate entities are consolidated automatically by the knowledge graph engine.</p>
2377
<h3 id="choosing-an-llm-provider">Choosing an LLM provider<a class="headerlink" href="#choosing-an-llm-provider" title="Permanent link">&para;</a></h3>
2378
<p>Entity and relationship extraction requires an LLM. By default, PlanOpticon auto-detects available providers based on your environment variables. You can override this:</p>
2379
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="c1"># Use Anthropic for extraction</span>
2380
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>docs/<span class="w"> </span>-p<span class="w"> </span>anthropic
2381
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a>
2382
<a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="c1"># Use a specific model</span>
2383
<a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>docs/<span class="w"> </span>-p<span class="w"> </span>openai<span class="w"> </span>--chat-model<span class="w"> </span>gpt-4o
2384
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a>
2385
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="c1"># Use a local Ollama model</span>
2386
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>docs/<span class="w"> </span>-p<span class="w"> </span>ollama<span class="w"> </span>--chat-model<span class="w"> </span>llama3
2387
</code></pre></div>
2388
<h3 id="output">Output<a class="headerlink" href="#output" title="Permanent link">&para;</a></h3>
2389
<p>After ingestion, PlanOpticon prints a summary:</p>
2390
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>Knowledge graph: ./knowledge_graph.db
2391
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a> spec.md: 12 chunks
2392
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a> architecture.md: 8 chunks
2393
<a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a> requirements.txt: 3 chunks
2394
<a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a>
2395
<a id="__codelineno-7-6" name="__codelineno-7-6" href="#__codelineno-7-6"></a>Ingestion complete:
2396
<a id="__codelineno-7-7" name="__codelineno-7-7" href="#__codelineno-7-7"></a> Files processed: 3
2397
<a id="__codelineno-7-8" name="__codelineno-7-8" href="#__codelineno-7-8"></a> Total chunks: 23
2398
<a id="__codelineno-7-9" name="__codelineno-7-9" href="#__codelineno-7-9"></a> Entities extracted: 47
2399
<a id="__codelineno-7-10" name="__codelineno-7-10" href="#__codelineno-7-10"></a> Relationships: 31
2400
<a id="__codelineno-7-11" name="__codelineno-7-11" href="#__codelineno-7-11"></a> Knowledge graph: ./knowledge_graph.db
2401
</code></pre></div>
2402
<p>Both <code>.db</code> (SQLite/FalkorDB) and <code>.json</code> formats are saved automatically.</p>
2403
<h2 id="how-each-processor-works">How each processor works<a class="headerlink" href="#how-each-processor-works" title="Permanent link">&para;</a></h2>
2404
<h3 id="pdf-processor">PDF processor<a class="headerlink" href="#pdf-processor" title="Permanent link">&para;</a></h3>
2405
<p>The <code>PdfProcessor</code> extracts text from PDF files on a per-page basis. It tries two extraction libraries in order:</p>
2406
<ol>
2407
<li><strong>pymupdf</strong> (preferred) -- Fast, reliable text extraction. Install with <code>pip install pymupdf</code>.</li>
2408
<li><strong>pdfplumber</strong> (fallback) -- Alternative extractor. Install with <code>pip install pdfplumber</code>.</li>
2409
</ol>
2410
<p>If neither library is installed, the processor raises an <code>ImportError</code> with installation instructions.</p>
2411
<p>Each page becomes a separate <code>DocumentChunk</code> with:</p>
2412
<ul>
2413
<li><code>text</code>: The extracted text content of the page</li>
2414
<li><code>page</code>: The 1-based page number</li>
2415
<li><code>metadata.extraction_method</code>: Which library was used (<code>pymupdf</code> or <code>pdfplumber</code>)</li>
2416
</ul>
2417
<p>To install PDF support:</p>
2418
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>pip<span class="w"> </span>install<span class="w"> </span><span class="s1">&#39;planopticon[pdf]&#39;</span>
2419
<a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="c1"># or</span>
2420
<a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a>pip<span class="w"> </span>install<span class="w"> </span>pymupdf
2421
<a id="__codelineno-8-4" name="__codelineno-8-4" href="#__codelineno-8-4"></a><span class="c1"># or</span>
2422
<a id="__codelineno-8-5" name="__codelineno-8-5" href="#__codelineno-8-5"></a>pip<span class="w"> </span>install<span class="w"> </span>pdfplumber
2423
</code></pre></div>
2424
<h3 id="markdown-processor">Markdown processor<a class="headerlink" href="#markdown-processor" title="Permanent link">&para;</a></h3>
2425
<p>The <code>MarkdownProcessor</code> splits Markdown files on heading boundaries (lines starting with <code>#</code> through <code>######</code>). Each heading and its content until the next heading becomes a separate chunk.</p>
2426
<p><strong>Splitting behavior:</strong></p>
2427
<ul>
2428
<li>If the file contains headings, each heading section becomes a chunk. The <code>section</code> field records the heading text.</li>
2429
<li>Content before the first heading is captured as a <code>(preamble)</code> chunk.</li>
2430
<li>If the file contains no headings, it falls back to paragraph-based chunking (same as plaintext).</li>
2431
</ul>
2432
<p>For example, a file with this structure:</p>
2433
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a>Some intro text.
2434
<a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a>
2435
<a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="gh"># Architecture</span>
2436
<a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a>
2437
<a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a>The system uses a microservices architecture...
2438
<a id="__codelineno-9-6" name="__codelineno-9-6" href="#__codelineno-9-6"></a>
2439
<a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a><span class="gu">## Components</span>
2440
<a id="__codelineno-9-8" name="__codelineno-9-8" href="#__codelineno-9-8"></a>
2441
<a id="__codelineno-9-9" name="__codelineno-9-9" href="#__codelineno-9-9"></a>There are three main components...
2442
<a id="__codelineno-9-10" name="__codelineno-9-10" href="#__codelineno-9-10"></a>
2443
<a id="__codelineno-9-11" name="__codelineno-9-11" href="#__codelineno-9-11"></a><span class="gh"># Deployment</span>
2444
<a id="__codelineno-9-12" name="__codelineno-9-12" href="#__codelineno-9-12"></a>
2445
<a id="__codelineno-9-13" name="__codelineno-9-13" href="#__codelineno-9-13"></a>Deployment is handled via...
2446
</code></pre></div>
2447
<p>Produces four chunks: <code>(preamble)</code>, <code>Architecture</code>, <code>Components</code>, and <code>Deployment</code>.</p>
2448
<h3 id="plaintext-processor">Plaintext processor<a class="headerlink" href="#plaintext-processor" title="Permanent link">&para;</a></h3>
2449
<p>The <code>PlaintextProcessor</code> handles <code>.txt</code>, <code>.text</code>, <code>.log</code>, and <code>.csv</code> files. It splits text on paragraph boundaries (double newlines) and groups paragraphs into chunks with a configurable maximum size.</p>
2450
<p><strong>Chunking parameters:</strong></p>
2451
<table>
2452
<thead>
2453
<tr>
2454
<th>Parameter</th>
2455
<th>Default</th>
2456
<th>Description</th>
2457
</tr>
2458
</thead>
2459
<tbody>
2460
<tr>
2461
<td><code>max_chunk_size</code></td>
2462
<td>2000 characters</td>
2463
<td>Maximum size of each chunk</td>
2464
</tr>
2465
<tr>
2466
<td><code>overlap</code></td>
2467
<td>200 characters</td>
2468
<td>Number of characters from the end of one chunk to repeat at the start of the next</td>
2469
</tr>
2470
</tbody>
2471
</table>
2472
<p>The overlap ensures that entities or context that spans a paragraph boundary are not lost. Chunks are created by accumulating paragraphs until the next paragraph would exceed <code>max_chunk_size</code>, at which point the current chunk is flushed and a new one begins.</p>
2473
<h2 id="the-ingestion-pipeline">The ingestion pipeline<a class="headerlink" href="#the-ingestion-pipeline" title="Permanent link">&para;</a></h2>
2474
<p>Document ingestion follows this pipeline:</p>
2475
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a>File on disk
2476
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a> |
2477
<a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a> v
2478
<a id="__codelineno-10-4" name="__codelineno-10-4" href="#__codelineno-10-4"></a>Processor selection (by file extension)
2479
<a id="__codelineno-10-5" name="__codelineno-10-5" href="#__codelineno-10-5"></a> |
2480
<a id="__codelineno-10-6" name="__codelineno-10-6" href="#__codelineno-10-6"></a> v
2481
<a id="__codelineno-10-7" name="__codelineno-10-7" href="#__codelineno-10-7"></a>Text extraction (PDF pages / Markdown sections / plaintext paragraphs)
2482
<a id="__codelineno-10-8" name="__codelineno-10-8" href="#__codelineno-10-8"></a> |
2483
<a id="__codelineno-10-9" name="__codelineno-10-9" href="#__codelineno-10-9"></a> v
2484
<a id="__codelineno-10-10" name="__codelineno-10-10" href="#__codelineno-10-10"></a>DocumentChunk objects (text + metadata)
2485
<a id="__codelineno-10-11" name="__codelineno-10-11" href="#__codelineno-10-11"></a> |
2486
<a id="__codelineno-10-12" name="__codelineno-10-12" href="#__codelineno-10-12"></a> v
2487
<a id="__codelineno-10-13" name="__codelineno-10-13" href="#__codelineno-10-13"></a>Source registration (provenance tracking in the KG)
2488
<a id="__codelineno-10-14" name="__codelineno-10-14" href="#__codelineno-10-14"></a> |
2489
<a id="__codelineno-10-15" name="__codelineno-10-15" href="#__codelineno-10-15"></a> v
2490
<a id="__codelineno-10-16" name="__codelineno-10-16" href="#__codelineno-10-16"></a>KG content addition (LLM entity/relationship extraction per chunk)
2491
<a id="__codelineno-10-17" name="__codelineno-10-17" href="#__codelineno-10-17"></a> |
2492
<a id="__codelineno-10-18" name="__codelineno-10-18" href="#__codelineno-10-18"></a> v
2493
<a id="__codelineno-10-19" name="__codelineno-10-19" href="#__codelineno-10-19"></a>Knowledge graph storage (.db + .json)
2494
</code></pre></div>
2495
<h3 id="step-1-processor-selection">Step 1: Processor selection<a class="headerlink" href="#step-1-processor-selection" title="Permanent link">&para;</a></h3>
2496
<p>PlanOpticon maintains a registry of processors keyed by file extension. When you call <code>ingest_file()</code>, it looks up the appropriate processor using <code>get_processor(path)</code>. If no processor is registered for the file extension, a <code>ValueError</code> is raised.</p>
2497
<h3 id="step-2-text-extraction">Step 2: Text extraction<a class="headerlink" href="#step-2-text-extraction" title="Permanent link">&para;</a></h3>
2498
<p>The selected processor reads the file and produces a list of <code>DocumentChunk</code> objects. Each chunk contains:</p>
2499
<table>
2500
<thead>
2501
<tr>
2502
<th>Field</th>
2503
<th>Type</th>
2504
<th>Description</th>
2505
</tr>
2506
</thead>
2507
<tbody>
2508
<tr>
2509
<td><code>text</code></td>
2510
<td><code>str</code></td>
2511
<td>The extracted text content</td>
2512
</tr>
2513
<tr>
2514
<td><code>source_file</code></td>
2515
<td><code>str</code></td>
2516
<td>Path to the source file</td>
2517
</tr>
2518
<tr>
2519
<td><code>chunk_index</code></td>
2520
<td><code>int</code></td>
2521
<td>Sequential index of this chunk within the file</td>
2522
</tr>
2523
<tr>
2524
<td><code>page</code></td>
2525
<td><code>Optional[int]</code></td>
2526
<td>Page number (PDF only, 1-based)</td>
2527
</tr>
2528
<tr>
2529
<td><code>section</code></td>
2530
<td><code>Optional[str]</code></td>
2531
<td>Section heading (Markdown only)</td>
2532
</tr>
2533
<tr>
2534
<td><code>metadata</code></td>
2535
<td><code>Dict[str, Any]</code></td>
2536
<td>Additional metadata (e.g., extraction method)</td>
2537
</tr>
2538
</tbody>
2539
</table>
2540
<h3 id="step-3-source-registration">Step 3: Source registration<a class="headerlink" href="#step-3-source-registration" title="Permanent link">&para;</a></h3>
2541
<p>Each ingested file is registered as a source in the knowledge graph with provenance metadata:</p>
2542
<ul>
2543
<li><code>source_id</code>: A SHA-256 hash of the absolute file path (first 12 characters), unless you provide a custom ID</li>
2544
<li><code>source_type</code>: Always <code>"document"</code></li>
2545
<li><code>title</code>: The file stem (filename without extension)</li>
2546
<li><code>path</code>: The file path</li>
2547
<li><code>mime_type</code>: Detected MIME type</li>
2548
<li><code>ingested_at</code>: ISO-8601 timestamp</li>
2549
<li><code>metadata</code>: Chunk count and file extension</li>
2550
</ul>
2551
<h3 id="step-4-entity-and-relationship-extraction">Step 4: Entity and relationship extraction<a class="headerlink" href="#step-4-entity-and-relationship-extraction" title="Permanent link">&para;</a></h3>
2552
<p>Each chunk's text is passed to <code>knowledge_graph.add_content()</code>, which uses the configured LLM provider to extract entities and relationships. The content source is tagged with the document name and either the page number or section name:</p>
2553
<ul>
2554
<li><code>document:report.pdf:page:3</code></li>
2555
<li><code>document:spec.md:section:Architecture</code></li>
2556
<li><code>document:notes.txt</code> (no page or section)</li>
2557
</ul>
2558
<h3 id="step-5-storage">Step 5: Storage<a class="headerlink" href="#step-5-storage" title="Permanent link">&para;</a></h3>
2559
<p>The knowledge graph is saved in both <code>.db</code> (SQLite-backed FalkorDB) and <code>.json</code> formats.</p>
2560
<h2 id="combining-with-video-analysis">Combining with video analysis<a class="headerlink" href="#combining-with-video-analysis" title="Permanent link">&para;</a></h2>
2561
<p>A common workflow is to analyze a video recording and then ingest related documents into the same knowledge graph:</p>
2562
<div class="highlight"><pre><span></span><code><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a><span class="c1"># Step 1: Analyze the meeting recording</span>
2563
<a id="__codelineno-11-2" name="__codelineno-11-2" href="#__codelineno-11-2"></a>planopticon<span class="w"> </span>analyze<span class="w"> </span>meeting-recording.mp4<span class="w"> </span>-o<span class="w"> </span>./project-kg
2564
<a id="__codelineno-11-3" name="__codelineno-11-3" href="#__codelineno-11-3"></a>
2565
<a id="__codelineno-11-4" name="__codelineno-11-4" href="#__codelineno-11-4"></a><span class="c1"># Step 2: Ingest the meeting agenda</span>
2566
<a id="__codelineno-11-5" name="__codelineno-11-5" href="#__codelineno-11-5"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>agenda.md<span class="w"> </span>--db-path<span class="w"> </span>./project-kg/knowledge_graph.db
2567
<a id="__codelineno-11-6" name="__codelineno-11-6" href="#__codelineno-11-6"></a>
2568
<a id="__codelineno-11-7" name="__codelineno-11-7" href="#__codelineno-11-7"></a><span class="c1"># Step 3: Ingest the project spec</span>
2569
<a id="__codelineno-11-8" name="__codelineno-11-8" href="#__codelineno-11-8"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>project-spec.pdf<span class="w"> </span>--db-path<span class="w"> </span>./project-kg/knowledge_graph.db
2570
<a id="__codelineno-11-9" name="__codelineno-11-9" href="#__codelineno-11-9"></a>
2571
<a id="__codelineno-11-10" name="__codelineno-11-10" href="#__codelineno-11-10"></a><span class="c1"># Step 4: Ingest a whole docs folder</span>
2572
<a id="__codelineno-11-11" name="__codelineno-11-11" href="#__codelineno-11-11"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./reference-docs/<span class="w"> </span>--db-path<span class="w"> </span>./project-kg/knowledge_graph.db
2573
<a id="__codelineno-11-12" name="__codelineno-11-12" href="#__codelineno-11-12"></a>
2574
<a id="__codelineno-11-13" name="__codelineno-11-13" href="#__codelineno-11-13"></a><span class="c1"># Step 5: Query the combined graph</span>
2575
<a id="__codelineno-11-14" name="__codelineno-11-14" href="#__codelineno-11-14"></a>planopticon<span class="w"> </span>query<span class="w"> </span>--db-path<span class="w"> </span>./project-kg/knowledge_graph.db
2576
</code></pre></div>
2577
<p>The resulting knowledge graph contains entities and relationships from all sources -- video transcripts, meeting agendas, specs, and reference documents -- with full provenance tracking so you can trace any entity back to its source.</p>
2578
<h2 id="python-api">Python API<a class="headerlink" href="#python-api" title="Permanent link">&para;</a></h2>
2579
<h3 id="ingesting-a-single-file">Ingesting a single file<a class="headerlink" href="#ingesting-a-single-file" title="Permanent link">&para;</a></h3>
2580
<div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
2581
<a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.integrators.knowledge_graph</span><span class="w"> </span><span class="kn">import</span> <span class="n">KnowledgeGraph</span>
2582
<a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.processors.ingest</span><span class="w"> </span><span class="kn">import</span> <span class="n">ingest_file</span>
2583
<a id="__codelineno-12-4" name="__codelineno-12-4" href="#__codelineno-12-4"></a>
2584
<a id="__codelineno-12-5" name="__codelineno-12-5" href="#__codelineno-12-5"></a><span class="n">kg</span> <span class="o">=</span> <span class="n">KnowledgeGraph</span><span class="p">(</span><span class="n">db_path</span><span class="o">=</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;knowledge_graph.db&quot;</span><span class="p">))</span>
2585
<a id="__codelineno-12-6" name="__codelineno-12-6" href="#__codelineno-12-6"></a><span class="n">chunk_count</span> <span class="o">=</span> <span class="n">ingest_file</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;document.pdf&quot;</span><span class="p">),</span> <span class="n">kg</span><span class="p">)</span>
2586
<a id="__codelineno-12-7" name="__codelineno-12-7" href="#__codelineno-12-7"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Processed </span><span class="si">{</span><span class="n">chunk_count</span><span class="si">}</span><span class="s2"> chunks&quot;</span><span class="p">)</span>
2587
<a id="__codelineno-12-8" name="__codelineno-12-8" href="#__codelineno-12-8"></a>
2588
<a id="__codelineno-12-9" name="__codelineno-12-9" href="#__codelineno-12-9"></a><span class="n">kg</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;knowledge_graph.db&quot;</span><span class="p">))</span>
2589
</code></pre></div>
2590
<h3 id="ingesting-a-directory">Ingesting a directory<a class="headerlink" href="#ingesting-a-directory" title="Permanent link">&para;</a></h3>
2591
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
2592
<a id="__codelineno-13-2" name="__codelineno-13-2" href="#__codelineno-13-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.integrators.knowledge_graph</span><span class="w"> </span><span class="kn">import</span> <span class="n">KnowledgeGraph</span>
2593
<a id="__codelineno-13-3" name="__codelineno-13-3" href="#__codelineno-13-3"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.processors.ingest</span><span class="w"> </span><span class="kn">import</span> <span class="n">ingest_directory</span>
2594
<a id="__codelineno-13-4" name="__codelineno-13-4" href="#__codelineno-13-4"></a>
2595
<a id="__codelineno-13-5" name="__codelineno-13-5" href="#__codelineno-13-5"></a><span class="n">kg</span> <span class="o">=</span> <span class="n">KnowledgeGraph</span><span class="p">(</span><span class="n">db_path</span><span class="o">=</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;knowledge_graph.db&quot;</span><span class="p">))</span>
2596
<a id="__codelineno-13-6" name="__codelineno-13-6" href="#__codelineno-13-6"></a><span class="n">results</span> <span class="o">=</span> <span class="n">ingest_directory</span><span class="p">(</span>
2597
<a id="__codelineno-13-7" name="__codelineno-13-7" href="#__codelineno-13-7"></a> <span class="n">Path</span><span class="p">(</span><span class="s2">&quot;./docs&quot;</span><span class="p">),</span>
2598
<a id="__codelineno-13-8" name="__codelineno-13-8" href="#__codelineno-13-8"></a> <span class="n">kg</span><span class="p">,</span>
2599
<a id="__codelineno-13-9" name="__codelineno-13-9" href="#__codelineno-13-9"></a> <span class="n">recursive</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
2600
<a id="__codelineno-13-10" name="__codelineno-13-10" href="#__codelineno-13-10"></a> <span class="n">extensions</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;.md&quot;</span><span class="p">,</span> <span class="s2">&quot;.pdf&quot;</span><span class="p">],</span> <span class="c1"># Optional: filter by extension</span>
2601
<a id="__codelineno-13-11" name="__codelineno-13-11" href="#__codelineno-13-11"></a><span class="p">)</span>
2602
<a id="__codelineno-13-12" name="__codelineno-13-12" href="#__codelineno-13-12"></a>
2603
<a id="__codelineno-13-13" name="__codelineno-13-13" href="#__codelineno-13-13"></a><span class="k">for</span> <span class="n">filepath</span><span class="p">,</span> <span class="n">chunks</span> <span class="ow">in</span> <span class="n">results</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
2604
<a id="__codelineno-13-14" name="__codelineno-13-14" href="#__codelineno-13-14"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot; </span><span class="si">{</span><span class="n">filepath</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">chunks</span><span class="si">}</span><span class="s2"> chunks&quot;</span><span class="p">)</span>
2605
<a id="__codelineno-13-15" name="__codelineno-13-15" href="#__codelineno-13-15"></a>
2606
<a id="__codelineno-13-16" name="__codelineno-13-16" href="#__codelineno-13-16"></a><span class="n">kg</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;knowledge_graph.db&quot;</span><span class="p">))</span>
2607
</code></pre></div>
2608
<h3 id="listing-supported-extensions">Listing supported extensions<a class="headerlink" href="#listing-supported-extensions" title="Permanent link">&para;</a></h3>
2609
<div class="highlight"><pre><span></span><code><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.processors.base</span><span class="w"> </span><span class="kn">import</span> <span class="n">list_supported_extensions</span>
2610
<a id="__codelineno-14-2" name="__codelineno-14-2" href="#__codelineno-14-2"></a>
2611
<a id="__codelineno-14-3" name="__codelineno-14-3" href="#__codelineno-14-3"></a><span class="n">extensions</span> <span class="o">=</span> <span class="n">list_supported_extensions</span><span class="p">()</span>
2612
<a id="__codelineno-14-4" name="__codelineno-14-4" href="#__codelineno-14-4"></a><span class="nb">print</span><span class="p">(</span><span class="n">extensions</span><span class="p">)</span>
2613
<a id="__codelineno-14-5" name="__codelineno-14-5" href="#__codelineno-14-5"></a><span class="c1"># [&#39;.csv&#39;, &#39;.log&#39;, &#39;.markdown&#39;, &#39;.md&#39;, &#39;.pdf&#39;, &#39;.text&#39;, &#39;.txt&#39;]</span>
2614
</code></pre></div>
2615
<h3 id="working-with-processors-directly">Working with processors directly<a class="headerlink" href="#working-with-processors-directly" title="Permanent link">&para;</a></h3>
2616
<div class="highlight"><pre><span></span><code><a id="__codelineno-15-1" name="__codelineno-15-1" href="#__codelineno-15-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
2617
<a id="__codelineno-15-2" name="__codelineno-15-2" href="#__codelineno-15-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.processors.base</span><span class="w"> </span><span class="kn">import</span> <span class="n">get_processor</span>
2618
<a id="__codelineno-15-3" name="__codelineno-15-3" href="#__codelineno-15-3"></a>
2619
<a id="__codelineno-15-4" name="__codelineno-15-4" href="#__codelineno-15-4"></a><span class="n">processor</span> <span class="o">=</span> <span class="n">get_processor</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;report.pdf&quot;</span><span class="p">))</span>
2620
<a id="__codelineno-15-5" name="__codelineno-15-5" href="#__codelineno-15-5"></a><span class="k">if</span> <span class="n">processor</span><span class="p">:</span>
2621
<a id="__codelineno-15-6" name="__codelineno-15-6" href="#__codelineno-15-6"></a> <span class="n">chunks</span> <span class="o">=</span> <span class="n">processor</span><span class="o">.</span><span class="n">process</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="s2">&quot;report.pdf&quot;</span><span class="p">))</span>
2622
<a id="__codelineno-15-7" name="__codelineno-15-7" href="#__codelineno-15-7"></a> <span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">chunks</span><span class="p">:</span>
2623
<a id="__codelineno-15-8" name="__codelineno-15-8" href="#__codelineno-15-8"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Page </span><span class="si">{</span><span class="n">chunk</span><span class="o">.</span><span class="n">page</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">chunk</span><span class="o">.</span><span class="n">text</span><span class="p">[:</span><span class="mi">100</span><span class="p">]</span><span class="si">}</span><span class="s2">...&quot;</span><span class="p">)</span>
2624
</code></pre></div>
2625
<h2 id="extending-with-custom-processors">Extending with custom processors<a class="headerlink" href="#extending-with-custom-processors" title="Permanent link">&para;</a></h2>
2626
<p>To add support for a new file format, implement the <code>DocumentProcessor</code> abstract class and register it:</p>
2627
<div class="highlight"><pre><span></span><code><a id="__codelineno-16-1" name="__codelineno-16-1" href="#__codelineno-16-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pathlib</span><span class="w"> </span><span class="kn">import</span> <span class="n">Path</span>
2628
<a id="__codelineno-16-2" name="__codelineno-16-2" href="#__codelineno-16-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">List</span>
2629
<a id="__codelineno-16-3" name="__codelineno-16-3" href="#__codelineno-16-3"></a><span class="kn">from</span><span class="w"> </span><span class="nn">video_processor.processors.base</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
2630
<a id="__codelineno-16-4" name="__codelineno-16-4" href="#__codelineno-16-4"></a> <span class="n">DocumentChunk</span><span class="p">,</span>
2631
<a id="__codelineno-16-5" name="__codelineno-16-5" href="#__codelineno-16-5"></a> <span class="n">DocumentProcessor</span><span class="p">,</span>
2632
<a id="__codelineno-16-6" name="__codelineno-16-6" href="#__codelineno-16-6"></a> <span class="n">register_processor</span><span class="p">,</span>
2633
<a id="__codelineno-16-7" name="__codelineno-16-7" href="#__codelineno-16-7"></a><span class="p">)</span>
2634
<a id="__codelineno-16-8" name="__codelineno-16-8" href="#__codelineno-16-8"></a>
2635
<a id="__codelineno-16-9" name="__codelineno-16-9" href="#__codelineno-16-9"></a>
2636
<a id="__codelineno-16-10" name="__codelineno-16-10" href="#__codelineno-16-10"></a><span class="k">class</span><span class="w"> </span><span class="nc">HtmlProcessor</span><span class="p">(</span><span class="n">DocumentProcessor</span><span class="p">):</span>
2637
<a id="__codelineno-16-11" name="__codelineno-16-11" href="#__codelineno-16-11"></a> <span class="n">supported_extensions</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;.html&quot;</span><span class="p">,</span> <span class="s2">&quot;.htm&quot;</span><span class="p">]</span>
2638
<a id="__codelineno-16-12" name="__codelineno-16-12" href="#__codelineno-16-12"></a>
2639
<a id="__codelineno-16-13" name="__codelineno-16-13" href="#__codelineno-16-13"></a> <span class="k">def</span><span class="w"> </span><span class="nf">can_process</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
2640
<a id="__codelineno-16-14" name="__codelineno-16-14" href="#__codelineno-16-14"></a> <span class="k">return</span> <span class="n">path</span><span class="o">.</span><span class="n">suffix</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">supported_extensions</span>
2641
<a id="__codelineno-16-15" name="__codelineno-16-15" href="#__codelineno-16-15"></a>
2642
<a id="__codelineno-16-16" name="__codelineno-16-16" href="#__codelineno-16-16"></a> <span class="k">def</span><span class="w"> </span><span class="nf">process</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="n">Path</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">DocumentChunk</span><span class="p">]:</span>
2643
<a id="__codelineno-16-17" name="__codelineno-16-17" href="#__codelineno-16-17"></a> <span class="kn">from</span><span class="w"> </span><span class="nn">bs4</span><span class="w"> </span><span class="kn">import</span> <span class="n">BeautifulSoup</span>
2644
<a id="__codelineno-16-18" name="__codelineno-16-18" href="#__codelineno-16-18"></a>
2645
<a id="__codelineno-16-19" name="__codelineno-16-19" href="#__codelineno-16-19"></a> <span class="n">soup</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="n">path</span><span class="o">.</span><span class="n">read_text</span><span class="p">(),</span> <span class="s2">&quot;html.parser&quot;</span><span class="p">)</span>
2646
<a id="__codelineno-16-20" name="__codelineno-16-20" href="#__codelineno-16-20"></a> <span class="n">text</span> <span class="o">=</span> <span class="n">soup</span><span class="o">.</span><span class="n">get_text</span><span class="p">(</span><span class="n">separator</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">)</span>
2647
<a id="__codelineno-16-21" name="__codelineno-16-21" href="#__codelineno-16-21"></a> <span class="k">return</span> <span class="p">[</span>
2648
<a id="__codelineno-16-22" name="__codelineno-16-22" href="#__codelineno-16-22"></a> <span class="n">DocumentChunk</span><span class="p">(</span>
2649
<a id="__codelineno-16-23" name="__codelineno-16-23" href="#__codelineno-16-23"></a> <span class="n">text</span><span class="o">=</span><span class="n">text</span><span class="p">,</span>
2650
<a id="__codelineno-16-24" name="__codelineno-16-24" href="#__codelineno-16-24"></a> <span class="n">source_file</span><span class="o">=</span><span class="nb">str</span><span class="p">(</span><span class="n">path</span><span class="p">),</span>
2651
<a id="__codelineno-16-25" name="__codelineno-16-25" href="#__codelineno-16-25"></a> <span class="n">chunk_index</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
2652
<a id="__codelineno-16-26" name="__codelineno-16-26" href="#__codelineno-16-26"></a> <span class="p">)</span>
2653
<a id="__codelineno-16-27" name="__codelineno-16-27" href="#__codelineno-16-27"></a> <span class="p">]</span>
2654
<a id="__codelineno-16-28" name="__codelineno-16-28" href="#__codelineno-16-28"></a>
2655
<a id="__codelineno-16-29" name="__codelineno-16-29" href="#__codelineno-16-29"></a>
2656
<a id="__codelineno-16-30" name="__codelineno-16-30" href="#__codelineno-16-30"></a><span class="n">register_processor</span><span class="p">(</span><span class="n">HtmlProcessor</span><span class="o">.</span><span class="n">supported_extensions</span><span class="p">,</span> <span class="n">HtmlProcessor</span><span class="p">)</span>
2657
</code></pre></div>
2658
<p>After registration, <code>planopticon ingest</code> will automatically handle <code>.html</code> and <code>.htm</code> files.</p>
2659
<h2 id="companion-repl">Companion REPL<a class="headerlink" href="#companion-repl" title="Permanent link">&para;</a></h2>
2660
<p>Inside the interactive companion REPL, you can ingest files using the <code>/ingest</code> command:</p>
2661
<div class="highlight"><pre><span></span><code><a id="__codelineno-17-1" name="__codelineno-17-1" href="#__codelineno-17-1"></a>&gt; /ingest ./meeting-notes.md
2662
<a id="__codelineno-17-2" name="__codelineno-17-2" href="#__codelineno-17-2"></a>Ingested meeting-notes.md: 5 chunks
2663
</code></pre></div>
2664
<p>This adds content to the currently loaded knowledge graph.</p>
2665
<h2 id="common-workflows">Common workflows<a class="headerlink" href="#common-workflows" title="Permanent link">&para;</a></h2>
2666
<h3 id="build-a-project-knowledge-base-from-scratch">Build a project knowledge base from scratch<a class="headerlink" href="#build-a-project-knowledge-base-from-scratch" title="Permanent link">&para;</a></h3>
2667
<div class="highlight"><pre><span></span><code><a id="__codelineno-18-1" name="__codelineno-18-1" href="#__codelineno-18-1"></a><span class="c1"># Ingest all project docs</span>
2668
<a id="__codelineno-18-2" name="__codelineno-18-2" href="#__codelineno-18-2"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./project-docs/<span class="w"> </span>-o<span class="w"> </span>./knowledge-base
2669
<a id="__codelineno-18-3" name="__codelineno-18-3" href="#__codelineno-18-3"></a>
2670
<a id="__codelineno-18-4" name="__codelineno-18-4" href="#__codelineno-18-4"></a><span class="c1"># Query what was captured</span>
2671
<a id="__codelineno-18-5" name="__codelineno-18-5" href="#__codelineno-18-5"></a>planopticon<span class="w"> </span>query<span class="w"> </span>--db-path<span class="w"> </span>./knowledge-base/knowledge_graph.db
2672
<a id="__codelineno-18-6" name="__codelineno-18-6" href="#__codelineno-18-6"></a>
2673
<a id="__codelineno-18-7" name="__codelineno-18-7" href="#__codelineno-18-7"></a><span class="c1"># Export as an Obsidian vault</span>
2674
<a id="__codelineno-18-8" name="__codelineno-18-8" href="#__codelineno-18-8"></a>planopticon<span class="w"> </span><span class="nb">export</span><span class="w"> </span>obsidian<span class="w"> </span>./knowledge-base/knowledge_graph.db<span class="w"> </span>-o<span class="w"> </span>./vault
2675
</code></pre></div>
2676
<h3 id="incrementally-build-a-knowledge-graph">Incrementally build a knowledge graph<a class="headerlink" href="#incrementally-build-a-knowledge-graph" title="Permanent link">&para;</a></h3>
2677
<div class="highlight"><pre><span></span><code><a id="__codelineno-19-1" name="__codelineno-19-1" href="#__codelineno-19-1"></a><span class="c1"># Start with initial docs</span>
2678
<a id="__codelineno-19-2" name="__codelineno-19-2" href="#__codelineno-19-2"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./sprint-1-docs/<span class="w"> </span>-o<span class="w"> </span>./kg
2679
<a id="__codelineno-19-3" name="__codelineno-19-3" href="#__codelineno-19-3"></a>
2680
<a id="__codelineno-19-4" name="__codelineno-19-4" href="#__codelineno-19-4"></a><span class="c1"># Add more docs over time</span>
2681
<a id="__codelineno-19-5" name="__codelineno-19-5" href="#__codelineno-19-5"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./sprint-2-docs/<span class="w"> </span>--db-path<span class="w"> </span>./kg/knowledge_graph.db
2682
<a id="__codelineno-19-6" name="__codelineno-19-6" href="#__codelineno-19-6"></a>planopticon<span class="w"> </span>ingest<span class="w"> </span>./sprint-3-docs/<span class="w"> </span>--db-path<span class="w"> </span>./kg/knowledge_graph.db
2683
<a id="__codelineno-19-7" name="__codelineno-19-7" href="#__codelineno-19-7"></a>
2684
<a id="__codelineno-19-8" name="__codelineno-19-8" href="#__codelineno-19-8"></a><span class="c1"># The graph grows with each ingestion</span>
2685
<a id="__codelineno-19-9" name="__codelineno-19-9" href="#__codelineno-19-9"></a>planopticon<span class="w"> </span>query<span class="w"> </span>--db-path<span class="w"> </span>./kg/knowledge_graph.db<span class="w"> </span>stats
2686
</code></pre></div>
2687
<h3 id="ingest-from-google-workspace-or-microsoft-365">Ingest from Google Workspace or Microsoft 365<a class="headerlink" href="#ingest-from-google-workspace-or-microsoft-365" title="Permanent link">&para;</a></h3>
2688
<p>PlanOpticon provides integrated commands that fetch cloud documents and ingest them in one step:</p>
2689
<div class="highlight"><pre><span></span><code><a id="__codelineno-20-1" name="__codelineno-20-1" href="#__codelineno-20-1"></a><span class="c1"># Google Workspace</span>
2690
<a id="__codelineno-20-2" name="__codelineno-20-2" href="#__codelineno-20-2"></a>planopticon<span class="w"> </span>gws<span class="w"> </span>ingest<span class="w"> </span>--folder-id<span class="w"> </span>FOLDER_ID<span class="w"> </span>-o<span class="w"> </span>./results
2691
<a id="__codelineno-20-3" name="__codelineno-20-3" href="#__codelineno-20-3"></a>
2692
<a id="__codelineno-20-4" name="__codelineno-20-4" href="#__codelineno-20-4"></a><span class="c1"># Microsoft 365 / SharePoint</span>
2693
<a id="__codelineno-20-5" name="__codelineno-20-5" href="#__codelineno-20-5"></a>planopticon<span class="w"> </span>m365<span class="w"> </span>ingest<span class="w"> </span>--web-url<span class="w"> </span>https://contoso.sharepoint.com/sites/proj<span class="w"> </span><span class="se">\</span>
2694
<a id="__codelineno-20-6" name="__codelineno-20-6" href="#__codelineno-20-6"></a><span class="w"> </span>--folder-url<span class="w"> </span>/sites/proj/Shared<span class="se">\ </span>Documents
2695
</code></pre></div>
2696
<p>These commands handle authentication, document download, text extraction, and knowledge graph creation automatically.</p>
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
</article>
2711
</div>
2712
2713
2714
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
2715
2716
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
2717
</div>
2718
2719
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
2720
2721
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
2722
Back to top
2723
</button>
2724
2725
</main>
2726
2727
<footer class="md-footer">
2728
2729
<div class="md-footer-meta md-typeset">
2730
<div class="md-footer-meta__inner md-grid">
2731
<div class="md-copyright">
2732
2733
<div class="md-copyright__highlight">
2734
Copyright &copy; 2026 CONFLICT LLC
2735
</div>
2736
2737
2738
Made with
2739
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
2740
Material for MkDocs
2741
</a>
2742
2743
</div>
2744
2745
2746
<div class="md-social">
2747
2748
2749
2750
2751
2752
2753
2754
2755
<a href="https://github.com/ConflictHQ/PlanOpticon" target="_blank" rel="noopener" title="github.com" class="md-social__link">
2756
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
2757
</a>
2758
2759
</div>
2760
2761
</div>
2762
</div>
2763
</footer>
2764
2765
</div>
2766
<div class="md-dialog" data-md-component="dialog">
2767
<div class="md-dialog__inner md-typeset"></div>
2768
</div>
2769
2770
2771
2772
2773
2774
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["navigation.instant", "navigation.tabs", "navigation.sections", "navigation.expand", "navigation.top", "search.suggest", "search.highlight", "content.code.copy", "content.tabs.link", "header.autohide"], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
2775
2776
2777
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
2778
2779
2780
</body>
2781
</html>

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button