Fossil SCM

fossil-scm / src / content.c
Blame History Raw 1308 lines
1
/*
2
** Copyright (c) 2006 D. Richard Hipp
3
**
4
** This program is free software; you can redistribute it and/or
5
** modify it under the terms of the Simplified BSD License (also
6
** known as the "2-Clause License" or "FreeBSD License".)
7
**
8
** This program is distributed in the hope that it will be useful,
9
** but without any warranty; without even the implied warranty of
10
** merchantability or fitness for a particular purpose.
11
**
12
** Author contact information:
13
** [email protected]
14
** http://www.hwaci.com/drh/
15
**
16
*******************************************************************************
17
**
18
** Procedures store and retrieve records from the repository
19
*/
20
#include "config.h"
21
#include "content.h"
22
#include <assert.h>
23
24
/*
25
** The artifact retrieval cache
26
*/
27
static struct {
28
i64 szTotal; /* Total size of all entries in the cache */
29
int n; /* Current number of cache entries */
30
int nAlloc; /* Number of slots allocated in a[] */
31
int nextAge; /* Age counter for implementing LRU */
32
struct cacheLine { /* One instance of this for each cache entry */
33
int rid; /* Artifact id */
34
int age; /* Age. Newer is larger */
35
Blob content; /* Content of the artifact */
36
} *a; /* The positive cache */
37
Bag inCache; /* Set of artifacts currently in cache */
38
39
/*
40
** The missing artifact cache.
41
**
42
** Artifacts whose record ID are in missingCache cannot be retrieved
43
** either because they are phantoms or because they are a delta that
44
** depends on a phantom. Artifacts whose content we are certain is
45
** available are in availableCache. If an artifact is in neither cache
46
** then its current availability is unknown.
47
*/
48
Bag missing; /* Cache of artifacts that are incomplete */
49
Bag available; /* Cache of artifacts that are complete */
50
} contentCache;
51
52
/*
53
** Remove the oldest element from the content cache
54
*/
55
static void content_cache_expire_oldest(void){
56
int i;
57
int mnAge = contentCache.nextAge;
58
int mn = -1;
59
for(i=0; i<contentCache.n; i++){
60
if( contentCache.a[i].age<mnAge ){
61
mnAge = contentCache.a[i].age;
62
mn = i;
63
}
64
}
65
if( mn>=0 ){
66
bag_remove(&contentCache.inCache, contentCache.a[mn].rid);
67
contentCache.szTotal -= blob_size(&contentCache.a[mn].content);
68
blob_reset(&contentCache.a[mn].content);
69
contentCache.n--;
70
contentCache.a[mn] = contentCache.a[contentCache.n];
71
}
72
}
73
74
/*
75
** Add an entry to the content cache.
76
**
77
** This routines hands responsibility for the artifact over to the cache.
78
** The cache will deallocate memory when it has finished with it.
79
*/
80
void content_cache_insert(int rid, Blob *pBlob){
81
struct cacheLine *p;
82
if( contentCache.n>500 || contentCache.szTotal>50000000 ){
83
i64 szBefore;
84
do{
85
szBefore = contentCache.szTotal;
86
content_cache_expire_oldest();
87
}while( contentCache.szTotal>50000000 && contentCache.szTotal<szBefore );
88
}
89
if( contentCache.n>=contentCache.nAlloc ){
90
contentCache.nAlloc = contentCache.nAlloc*2 + 10;
91
contentCache.a = fossil_realloc(contentCache.a,
92
contentCache.nAlloc*sizeof(contentCache.a[0]));
93
}
94
p = &contentCache.a[contentCache.n++];
95
p->rid = rid;
96
p->age = contentCache.nextAge++;
97
contentCache.szTotal += blob_size(pBlob);
98
p->content = *pBlob;
99
blob_zero(pBlob);
100
bag_insert(&contentCache.inCache, rid);
101
}
102
103
/*
104
** Clear the content cache. If it is passed true, it
105
** also frees all associated memory, otherwise it may
106
** retain parts for future uses of the cache.
107
*/
108
void content_clear_cache(int bFreeIt){
109
int i;
110
for(i=0; i<contentCache.n; i++){
111
blob_reset(&contentCache.a[i].content);
112
}
113
bag_clear(&contentCache.missing);
114
bag_clear(&contentCache.available);
115
bag_clear(&contentCache.inCache);
116
contentCache.n = 0;
117
contentCache.szTotal = 0;
118
if(bFreeIt){
119
fossil_free(contentCache.a);
120
contentCache.a = 0;
121
contentCache.nAlloc = 0;
122
}
123
}
124
125
/*
126
** Return the srcid associated with rid. Or return 0 if rid is
127
** original content and not a delta.
128
*/
129
int delta_source_rid(int rid){
130
static Stmt q;
131
int srcid;
132
db_static_prepare(&q, "SELECT srcid FROM delta WHERE rid=:rid");
133
db_bind_int(&q, ":rid", rid);
134
if( db_step(&q)==SQLITE_ROW ){
135
srcid = db_column_int(&q, 0);
136
}else{
137
srcid = 0;
138
}
139
db_reset(&q);
140
return srcid;
141
}
142
143
/*
144
** Return the blob.size field given blob.rid
145
*/
146
int content_size(int rid, int dflt){
147
static Stmt q;
148
int sz = dflt;
149
db_static_prepare(&q, "SELECT size FROM blob WHERE rid=:r");
150
db_bind_int(&q, ":r", rid);
151
if( db_step(&q)==SQLITE_ROW ){
152
sz = db_column_int(&q, 0);
153
}
154
db_reset(&q);
155
return sz;
156
}
157
158
/*
159
** Check to see if content is available for artifact "rid". Return
160
** true if it is. Return false if rid is a phantom or depends on
161
** a phantom.
162
*/
163
int content_is_available(int rid){
164
int srcid;
165
int depth = 0; /* Limit to recursion depth */
166
while( depth++ < 10000000 ){
167
if( bag_find(&contentCache.missing, rid) ){
168
return 0;
169
}
170
if( bag_find(&contentCache.available, rid) ){
171
return 1;
172
}
173
if( content_size(rid, -1)<0 ){
174
bag_insert(&contentCache.missing, rid);
175
return 0;
176
}
177
srcid = delta_source_rid(rid);
178
if( srcid==0 ){
179
bag_insert(&contentCache.available, rid);
180
return 1;
181
}
182
rid = srcid;
183
}
184
fossil_panic("delta-loop in repository");
185
return 0;
186
}
187
188
/*
189
** Mark artifact rid as being available now. Update the cache to
190
** show that everything that was formerly unavailable because rid
191
** was missing is now available.
192
*/
193
static void content_mark_available(int rid){
194
Bag pending;
195
static Stmt q;
196
if( bag_find(&contentCache.available, rid) ) return;
197
bag_init(&pending);
198
bag_insert(&pending, rid);
199
while( (rid = bag_first(&pending))!=0 ){
200
bag_remove(&pending, rid);
201
bag_remove(&contentCache.missing, rid);
202
bag_insert(&contentCache.available, rid);
203
db_static_prepare(&q, "SELECT rid FROM delta WHERE srcid=:rid");
204
db_bind_int(&q, ":rid", rid);
205
while( db_step(&q)==SQLITE_ROW ){
206
int nx = db_column_int(&q, 0);
207
bag_insert(&pending, nx);
208
}
209
db_reset(&q);
210
}
211
bag_clear(&pending);
212
}
213
214
/*
215
** Get the blob.content value for blob.rid=rid. Return 1 on success or
216
** 0 on failure.
217
*/
218
static int content_of_blob(int rid, Blob *pBlob){
219
static Stmt q;
220
int rc = 0;
221
db_static_prepare(&q, "SELECT content FROM blob WHERE rid=:rid AND size>=0");
222
db_bind_int(&q, ":rid", rid);
223
if( db_step(&q)==SQLITE_ROW ){
224
db_ephemeral_blob(&q, 0, pBlob);
225
blob_uncompress(pBlob, pBlob);
226
rc = 1;
227
}
228
db_reset(&q);
229
return rc;
230
}
231
232
/*
233
** Extract the content for ID rid and put it into the
234
** uninitialized blob. Return 1 on success. If the record
235
** is a phantom, zero pBlob and return 0.
236
*/
237
int content_get(int rid, Blob *pBlob){
238
int rc;
239
int i;
240
int nextRid;
241
242
assert( g.repositoryOpen );
243
blob_zero(pBlob);
244
if( rid==0 ) return 0;
245
246
/* Early out if we know the content is not available */
247
if( bag_find(&contentCache.missing, rid) ){
248
return 0;
249
}
250
251
/* Look for the artifact in the cache first */
252
if( bag_find(&contentCache.inCache, rid) ){
253
for(i=0; i<contentCache.n; i++){
254
if( contentCache.a[i].rid==rid ){
255
blob_copy(pBlob, &contentCache.a[i].content);
256
contentCache.a[i].age = contentCache.nextAge++;
257
return 1;
258
}
259
}
260
}
261
262
nextRid = delta_source_rid(rid);
263
if( nextRid==0 ){
264
rc = content_of_blob(rid, pBlob);
265
}else{
266
int n = 1;
267
int nAlloc = 10;
268
int *a = 0;
269
int mx;
270
Blob delta, next;
271
272
a = fossil_malloc( sizeof(a[0])*nAlloc );
273
a[0] = rid;
274
a[1] = nextRid;
275
n = 1;
276
while( !bag_find(&contentCache.inCache, nextRid)
277
&& (nextRid = delta_source_rid(nextRid))>0 ){
278
n++;
279
if( n>=nAlloc ){
280
if( n>db_int(0, "SELECT max(rid) FROM blob") ){
281
fossil_panic("infinite loop in DELTA table");
282
}
283
nAlloc = nAlloc*2 + 10;
284
a = fossil_realloc(a, nAlloc*sizeof(a[0]));
285
}
286
a[n] = nextRid;
287
}
288
mx = n;
289
rc = content_get(a[n], pBlob);
290
n--;
291
while( rc && n>=0 ){
292
rc = content_of_blob(a[n], &delta);
293
if( rc ){
294
if( blob_delta_apply(pBlob, &delta, &next)<0 ){
295
rc = 1;
296
}else{
297
blob_reset(&delta);
298
if( (mx-n)%8==0 ){
299
content_cache_insert(a[n+1], pBlob);
300
}else{
301
blob_reset(pBlob);
302
}
303
*pBlob = next;
304
}
305
}
306
n--;
307
}
308
free(a);
309
if( !rc ) blob_reset(pBlob);
310
}
311
if( rc==0 ){
312
bag_insert(&contentCache.missing, rid);
313
}else{
314
bag_insert(&contentCache.available, rid);
315
}
316
return rc;
317
}
318
319
/*
320
** COMMAND: artifact*
321
**
322
** Usage: %fossil artifact ARTIFACT-ID ?OUTPUT-FILENAME? ?OPTIONS?
323
**
324
** Extract an artifact by its artifact hash and write the results on
325
** standard output, or if the optional second argument is given, in
326
** the named output file.
327
**
328
** Options:
329
** -R|--repository REPO Extract artifacts from repository REPO
330
**
331
** See also: [[finfo]]
332
*/
333
void artifact_cmd(void){
334
int rid;
335
Blob content;
336
const char *zFile;
337
db_find_and_open_repository(OPEN_ANY_SCHEMA, 0);
338
if( g.argc!=4 && g.argc!=3 ) usage("ARTIFACT-ID ?FILENAME? ?OPTIONS?");
339
zFile = g.argc==4 ? g.argv[3] : "-";
340
rid = name_to_rid(g.argv[2]);
341
if( rid==0 ){
342
fossil_fatal("%s",g.zErrMsg);
343
}
344
content_get(rid, &content);
345
blob_write_to_file(&content, zFile);
346
}
347
348
/*
349
** COMMAND: test-content-rawget
350
**
351
** Extract a blob from the database and write it into a file. This
352
** version does not expand the delta.
353
*/
354
void test_content_rawget_cmd(void){
355
int rid;
356
Blob content;
357
const char *zFile;
358
if( g.argc!=4 && g.argc!=3 ) usage("RECORDID ?FILENAME?");
359
zFile = g.argc==4 ? g.argv[3] : "-";
360
db_must_be_within_tree();
361
rid = name_to_rid(g.argv[2]);
362
blob_zero(&content);
363
db_blob(&content, "SELECT content FROM blob WHERE rid=%d", rid);
364
blob_uncompress(&content, &content);
365
blob_write_to_file(&content, zFile);
366
}
367
368
/*
369
** The following flag is set to disable the automatic calls to
370
** manifest_crosslink() when a record is dephantomized. This
371
** flag can be set (for example) when doing a clone when we know
372
** that rebuild will be run over all records at the conclusion
373
** of the operation.
374
*/
375
static int ignoreDephantomizations = 0;
376
377
/*
378
** When a record is converted from a phantom to a real record,
379
** if that record has other records that are derived by delta,
380
** then call manifest_crosslink() on those other records.
381
**
382
** If the formerly phantom record or any of the other records
383
** derived by delta from the former phantom are a baseline manifest,
384
** then also invoke manifest_crosslink() on the delta-manifests
385
** associated with that baseline.
386
**
387
** Tail recursion is used to minimize stack depth.
388
*/
389
void after_dephantomize(int rid, int linkFlag){
390
Stmt q;
391
int nChildAlloc = 0;
392
int *aChild = 0;
393
Blob content;
394
395
if( ignoreDephantomizations ) return;
396
while( rid ){
397
int nChildUsed = 0;
398
int i;
399
400
/* Parse the object rid itself */
401
if( linkFlag ){
402
content_get(rid, &content);
403
manifest_crosslink(rid, &content, MC_NONE);
404
assert( blob_is_reset(&content) );
405
}
406
407
/* Parse all delta-manifests that depend on baseline-manifest rid */
408
db_prepare(&q, "SELECT rid FROM orphan WHERE baseline=%d", rid);
409
while( db_step(&q)==SQLITE_ROW ){
410
int child = db_column_int(&q, 0);
411
if( nChildUsed>=nChildAlloc ){
412
nChildAlloc = nChildAlloc*2 + 10;
413
aChild = fossil_realloc(aChild, nChildAlloc*sizeof(aChild));
414
}
415
aChild[nChildUsed++] = child;
416
}
417
db_finalize(&q);
418
for(i=0; i<nChildUsed; i++){
419
content_get(aChild[i], &content);
420
manifest_crosslink(aChild[i], &content, MC_NONE);
421
assert( blob_is_reset(&content) );
422
}
423
if( nChildUsed ){
424
db_multi_exec("DELETE FROM orphan WHERE baseline=%d", rid);
425
}
426
427
/* Recursively dephantomize all artifacts that are derived by
428
** delta from artifact rid and which have not already been
429
** cross-linked. */
430
nChildUsed = 0;
431
db_prepare(&q,
432
"SELECT rid FROM delta"
433
" WHERE srcid=%d"
434
" AND NOT EXISTS(SELECT 1 FROM mlink WHERE mid=delta.rid)",
435
rid
436
);
437
while( db_step(&q)==SQLITE_ROW ){
438
int child = db_column_int(&q, 0);
439
if( nChildUsed>=nChildAlloc ){
440
nChildAlloc = nChildAlloc*2 + 10;
441
aChild = fossil_realloc(aChild, nChildAlloc*sizeof(aChild));
442
}
443
aChild[nChildUsed++] = child;
444
}
445
db_finalize(&q);
446
for(i=1; i<nChildUsed; i++){
447
after_dephantomize(aChild[i], 1);
448
}
449
450
/* Tail recursion for the common case where only a single artifact
451
** is derived by delta from rid... */
452
rid = nChildUsed>0 ? aChild[0] : 0;
453
linkFlag = 1;
454
}
455
free(aChild);
456
}
457
458
/*
459
** Turn dephantomization processing on or off.
460
*/
461
void content_enable_dephantomize(int onoff){
462
ignoreDephantomizations = !onoff;
463
}
464
465
/*
466
** Make sure the g.rcvid global variable has been initialized.
467
**
468
** If the g.zIpAddr variable has not been set when this routine is
469
** called, use zSrc as the source of content for the rcvfrom
470
** table entry.
471
*/
472
void content_rcvid_init(const char *zSrc){
473
if( g.rcvid==0 ){
474
user_select();
475
if( g.zIpAddr ) zSrc = g.zIpAddr;
476
db_multi_exec(
477
"INSERT INTO rcvfrom(uid, mtime, nonce, ipaddr)"
478
"VALUES(%d, julianday('now'), %Q, %Q)",
479
g.userUid, g.zNonce, zSrc
480
);
481
g.rcvid = db_last_insert_rowid();
482
}
483
}
484
485
/*
486
** Write content into the database. Return the record ID. If the
487
** content is already in the database, just return the record ID.
488
**
489
** If srcId is specified, then pBlob is delta content from
490
** the srcId record. srcId might be a phantom.
491
**
492
** pBlob is normally uncompressed text. But if nBlob>0 then the
493
** pBlob value has already been compressed and nBlob is its uncompressed
494
** size. If nBlob>0 then zUuid must be valid.
495
**
496
** zUuid is the UUID of the artifact, if it is specified. When srcId is
497
** specified then zUuid must always be specified. If srcId is zero,
498
** and zUuid is zero then the correct zUuid is computed from pBlob.
499
**
500
** If the record already exists but is a phantom, the pBlob content
501
** is inserted and the phantom becomes a real record.
502
**
503
** The original content of pBlob is not disturbed. The caller continues
504
** to be responsible for pBlob. This routine does *not* take over
505
** responsibility for freeing pBlob.
506
*/
507
int content_put_ex(
508
Blob *pBlob, /* Content to add to the repository */
509
const char *zUuid, /* artifact hash of reconstructed pBlob */
510
int srcId, /* pBlob is a delta from this entry */
511
int nBlob, /* pBlob is compressed. Original size is this */
512
int isPrivate /* The content should be marked private */
513
){
514
int size;
515
int rid;
516
Stmt s1;
517
Blob cmpr;
518
Blob hash;
519
int markAsUnclustered = 0;
520
int isDephantomize = 0;
521
522
assert( g.repositoryOpen );
523
assert( pBlob!=0 );
524
assert( srcId==0 || zUuid!=0 );
525
db_begin_transaction();
526
if( zUuid==0 ){
527
assert( nBlob==0 );
528
/* First check the auxiliary hash to see if there is already an artifact
529
** that uses the auxiliary hash name */
530
hname_hash(pBlob, 1, &hash);
531
rid = fast_uuid_to_rid(blob_str(&hash));
532
if( rid==0 ){
533
/* No existing artifact with the auxiliary hash name. Therefore, use
534
** the primary hash name. */
535
blob_reset(&hash);
536
hname_hash(pBlob, 0, &hash);
537
}
538
}else{
539
blob_init(&hash, zUuid, -1);
540
}
541
if( g.eHashPolicy==HPOLICY_AUTO && blob_size(&hash)>HNAME_LEN_SHA1 ){
542
g.eHashPolicy = HPOLICY_SHA3;
543
db_set_int("hash-policy", HPOLICY_SHA3, 0);
544
}
545
if( nBlob ){
546
size = nBlob;
547
}else{
548
size = blob_size(pBlob);
549
if( srcId ){
550
size = delta_output_size(blob_buffer(pBlob), size);
551
}
552
}
553
554
/* Check to see if the entry already exists and if it does whether
555
** or not the entry is a phantom
556
*/
557
db_prepare(&s1, "SELECT rid, size FROM blob WHERE uuid=%B", &hash);
558
if( db_step(&s1)==SQLITE_ROW ){
559
rid = db_column_int(&s1, 0);
560
if( db_column_int(&s1, 1)>=0 ){
561
/* The entry is not a phantom. There is nothing for us to do
562
** other than return the RID. */
563
db_finalize(&s1);
564
db_end_transaction(0);
565
return rid;
566
}
567
}else{
568
rid = 0; /* No entry with the same hash currently exists */
569
markAsUnclustered = 1;
570
}
571
db_finalize(&s1);
572
573
/* Construct a received-from ID if we do not already have one */
574
content_rcvid_init(0);
575
576
if( nBlob ){
577
cmpr = pBlob[0];
578
}else{
579
blob_compress(pBlob, &cmpr);
580
}
581
if( rid>0 ){
582
/* We are just adding data to a phantom */
583
db_prepare(&s1,
584
"UPDATE blob SET rcvid=%d, size=%d, content=:data WHERE rid=%d",
585
g.rcvid, size, rid
586
);
587
db_bind_blob(&s1, ":data", &cmpr);
588
db_exec(&s1);
589
db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
590
if( srcId==0 || content_is_available(srcId) ){
591
isDephantomize = 1;
592
content_mark_available(rid);
593
}
594
}else{
595
/* We are creating a new entry */
596
db_prepare(&s1,
597
"INSERT INTO blob(rcvid,size,uuid,content)"
598
"VALUES(%d,%d,'%q',:data)",
599
g.rcvid, size, blob_str(&hash)
600
);
601
db_bind_blob(&s1, ":data", &cmpr);
602
db_exec(&s1);
603
rid = db_last_insert_rowid();
604
if( !pBlob ){
605
assert(!"cannot happen: pBlob is always non-NULL");
606
db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);
607
}
608
}
609
if( g.markPrivate || isPrivate ){
610
db_multi_exec("INSERT OR IGNORE INTO private VALUES(%d)", rid);
611
markAsUnclustered = 0;
612
}
613
if( nBlob==0 ) blob_reset(&cmpr);
614
615
/* If the srcId is specified, then the data we just added is
616
** really a delta. Record this fact in the delta table.
617
*/
618
if( srcId ){
619
db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);
620
}
621
if( !isDephantomize && bag_find(&contentCache.missing, rid) &&
622
(srcId==0 || content_is_available(srcId)) ){
623
content_mark_available(rid);
624
}
625
if( isDephantomize ){
626
after_dephantomize(rid, 0);
627
}
628
629
/* Add the element to the unclustered table if has never been
630
** previously seen.
631
*/
632
if( markAsUnclustered ){
633
db_multi_exec("INSERT OR IGNORE INTO unclustered VALUES(%d)", rid);
634
}
635
636
/* Finish the transaction and cleanup */
637
db_finalize(&s1);
638
db_end_transaction(0);
639
blob_reset(&hash);
640
641
/* Make arrangements to verify that the data can be recovered
642
** before we commit */
643
verify_before_commit(rid);
644
return rid;
645
}
646
647
/*
648
** This is the simple common case for inserting content into the
649
** repository. pBlob is the content to be inserted.
650
**
651
** pBlob is uncompressed and is not deltaed. It is exactly the content
652
** to be inserted.
653
**
654
** The original content of pBlob is not disturbed. The caller continues
655
** to be responsible for pBlob. This routine does *not* take over
656
** responsibility for freeing pBlob.
657
*/
658
int content_put(Blob *pBlob){
659
return content_put_ex(pBlob, 0, 0, 0, 0);
660
}
661
662
663
/*
664
** Create a new phantom with the given hash and return its artifact ID.
665
*/
666
int content_new(const char *zUuid, int isPrivate){
667
int rid;
668
static Stmt s1, s2, s3;
669
670
assert( g.repositoryOpen );
671
db_begin_transaction();
672
if( uuid_is_shunned(zUuid) ){
673
db_end_transaction(0);
674
return 0;
675
}
676
db_static_prepare(&s1,
677
"INSERT INTO blob(rcvid,size,uuid,content)"
678
"VALUES(0,-1,:uuid,NULL)"
679
);
680
db_bind_text(&s1, ":uuid", zUuid);
681
db_exec(&s1);
682
rid = db_last_insert_rowid();
683
db_static_prepare(&s2,
684
"INSERT INTO phantom VALUES(:rid)"
685
);
686
db_bind_int(&s2, ":rid", rid);
687
db_exec(&s2);
688
if( g.markPrivate || isPrivate ){
689
db_multi_exec("INSERT INTO private VALUES(%d)", rid);
690
}else{
691
db_static_prepare(&s3,
692
"INSERT INTO unclustered VALUES(:rid)"
693
);
694
db_bind_int(&s3, ":rid", rid);
695
db_exec(&s3);
696
}
697
bag_insert(&contentCache.missing, rid);
698
db_end_transaction(0);
699
return rid;
700
}
701
702
703
/*
704
** COMMAND: test-content-put
705
**
706
** Usage: %fossil test-content-put FILE
707
**
708
** Read the content of FILE and add it to the Blob table as a new
709
** artifact using a direct call to content_put().
710
*/
711
void test_content_put_cmd(void){
712
int rid;
713
Blob content;
714
if( g.argc!=3 ) usage("FILENAME");
715
db_must_be_within_tree();
716
user_select();
717
blob_read_from_file(&content, g.argv[2], ExtFILE);
718
rid = content_put(&content);
719
fossil_print("inserted as record %d\n", rid);
720
}
721
722
/*
723
** Make sure the content at rid is the original content and is not a
724
** delta.
725
*/
726
void content_undelta(int rid){
727
if( delta_source_rid(rid)>0 ){
728
Blob x;
729
if( content_get(rid, &x) ){
730
Stmt s;
731
db_prepare(&s, "UPDATE blob SET content=:c, size=%d WHERE rid=%d",
732
blob_size(&x), rid);
733
blob_compress(&x, &x);
734
db_bind_blob(&s, ":c", &x);
735
db_exec(&s);
736
db_finalize(&s);
737
blob_reset(&x);
738
db_multi_exec("DELETE FROM delta WHERE rid=%d", rid);
739
}
740
}
741
}
742
743
/*
744
** COMMAND: test-content-undelta
745
**
746
** Make sure the content at RECORDID is not a delta
747
*/
748
void test_content_undelta_cmd(void){
749
int rid;
750
if( g.argc!=3 ) usage("RECORDID");
751
db_must_be_within_tree();
752
rid = atoi(g.argv[2]);
753
content_undelta(rid);
754
}
755
756
/*
757
** Return true if the given RID is marked as PRIVATE.
758
*/
759
int content_is_private(int rid){
760
static Stmt s1;
761
int rc;
762
db_static_prepare(&s1,
763
"SELECT 1 FROM private WHERE rid=:rid"
764
);
765
db_bind_int(&s1, ":rid", rid);
766
rc = db_step(&s1);
767
db_reset(&s1);
768
return rc==SQLITE_ROW;
769
}
770
771
/*
772
** Make sure an artifact is public.
773
*/
774
void content_make_public(int rid){
775
static Stmt s1;
776
db_static_prepare(&s1,
777
"DELETE FROM private WHERE rid=:rid"
778
);
779
db_bind_int(&s1, ":rid", rid);
780
db_exec(&s1);
781
}
782
783
/*
784
** Make sure an artifact is private
785
*/
786
void content_make_private(int rid){
787
static Stmt s1;
788
db_static_prepare(&s1,
789
"INSERT OR IGNORE INTO private(rid) VALUES(:rid)"
790
);
791
db_bind_int(&s1, ":rid", rid);
792
db_exec(&s1);
793
}
794
795
/*
796
** Try to change the storage of rid so that it is a delta from one
797
** of the artifacts given in aSrc[0]..aSrc[nSrc-1]. The aSrc[*] that
798
** gives the smallest delta is chosen.
799
**
800
** If rid is already a delta from some other place then no
801
** conversion occurs and this is a no-op unless force==1. If force==1,
802
** then nSrc must also be 1.
803
**
804
** If rid refers to a phantom, no delta is created.
805
**
806
** Never generate a delta that carries a private artifact into a public
807
** artifact. Otherwise, when we go to send the public artifact on a
808
** sync operation, the other end of the sync will never be able to receive
809
** the source of the delta. It is OK to delta private->private and
810
** public->private and public->public. Just no private->public delta.
811
**
812
** If aSrc[bestSrc] is already a delta that depends on rid, then it is
813
** converted to undeltaed text before the aSrc[bestSrc]->rid delta is
814
** created, in order to prevent a delta loop.
815
**
816
** If either rid or aSrc[i] contain less than 50 bytes, or if the
817
** resulting delta does not achieve a compression of at least 25%
818
** the rid is left untouched.
819
**
820
** Return the number of bytes by which the storage associated with rid
821
** is reduced. A return of 0 means no new deltification occurs.
822
*/
823
int content_deltify(int rid, int *aSrc, int nSrc, int force){
824
int s;
825
Blob data; /* Content of rid */
826
Blob src; /* Content of aSrc[i] */
827
Blob delta; /* Delta from aSrc[i] to rid */
828
Blob bestDelta; /* Best delta seen so far */
829
int bestSrc = 0; /* Which aSrc is the source of the best delta */
830
int rc = 0; /* Value to return */
831
int i; /* Loop variable for aSrc[] */
832
833
/*
834
** Historically this routine gracefully ignored the rid 0, but the
835
** addition of a call to content_is_available() in [188ffef2] caused
836
** rid 0 to trigger an assert via bag_find(). Rather than track down
837
** all such calls (e.g. the one via /technoteedit), we'll continue
838
** to gracefully ignore rid 0 here.
839
*/
840
if( 0==rid ) return 0;
841
842
/* If rid is already a child (a delta) of some other artifact, return
843
** immediately if the force flags is false
844
*/
845
if( !force && delta_source_rid(rid)>0 ) return 0;
846
847
/* If rid refers to a phantom, skip deltification. */
848
if( 0==content_is_available(rid) ) return 0;
849
850
/* Get the complete content of the object to be delta-ed. If the size
851
** is less than 50 bytes, then there really is no point in trying to do
852
** a delta, so return immediately
853
*/
854
content_get(rid, &data);
855
if( blob_size(&data)<50 ){
856
/* Do not try to create a delta for objects smaller than 50 bytes */
857
blob_reset(&data);
858
return 0;
859
}
860
blob_init(&bestDelta, 0, 0);
861
862
/* Loop over all candidate delta sources */
863
for(i=0; i<nSrc; i++){
864
int srcid = aSrc[i];
865
if( srcid==rid ) continue;
866
if( content_is_private(srcid) && !content_is_private(rid) ) continue;
867
868
/* Compute all ancestors of srcid and make sure rid is not one of them.
869
** If rid is an ancestor of srcid, then making rid a descendant of srcid
870
** would create a delta loop. */
871
s = srcid;
872
while( (s = delta_source_rid(s))>0 ){
873
if( s==rid ){
874
content_undelta(srcid);
875
break;
876
}
877
}
878
if( s!=0 ) continue;
879
880
content_get(srcid, &src);
881
if( blob_size(&src)<50 ){
882
/* The source is smaller then 50 bytes, so don't bother trying to use it*/
883
blob_reset(&src);
884
continue;
885
}
886
blob_delta_create(&src, &data, &delta);
887
if( blob_size(&delta) < blob_size(&data)*0.75
888
&& (bestSrc<=0 || blob_size(&delta)<blob_size(&bestDelta))
889
){
890
/* This is the best delta seen so far. Remember it */
891
blob_reset(&bestDelta);
892
bestDelta = delta;
893
bestSrc = srcid;
894
}else{
895
/* This delta is not a candidate for becoming the new parent of rid */
896
blob_reset(&delta);
897
}
898
blob_reset(&src);
899
}
900
901
/* If there is a winning candidate for the new parent of rid, then
902
** make that candidate the new parent now */
903
if( bestSrc>0 ){
904
Stmt s1, s2; /* Statements used to create the delta */
905
blob_compress(&bestDelta, &bestDelta);
906
db_prepare(&s1, "UPDATE blob SET content=:data WHERE rid=%d", rid);
907
db_prepare(&s2, "REPLACE INTO delta(rid,srcid)VALUES(%d,%d)", rid, bestSrc);
908
db_bind_blob(&s1, ":data", &bestDelta);
909
db_begin_transaction();
910
rc = db_int(0, "SELECT octet_length(content) FROM blob WHERE rid=%d", rid);
911
db_exec(&s1);
912
db_exec(&s2);
913
db_end_transaction(0);
914
db_finalize(&s1);
915
db_finalize(&s2);
916
verify_before_commit(rid);
917
rc -= blob_size(&bestDelta);
918
}
919
blob_reset(&data);
920
blob_reset(&bestDelta);
921
return rc;
922
}
923
924
/*
925
** COMMAND: test-content-deltify
926
**
927
** Usage: %fossil RID SRCID SRCID ... [-force]
928
**
929
** Convert the content at RID into a delta one of the from SRCIDs.
930
*/
931
void test_content_deltify_cmd(void){
932
int nSrc;
933
int *aSrc;
934
int i;
935
int bForce = find_option("force",0,0)!=0;
936
if( g.argc<3 ) usage("[--force] RID SRCID SRCID...");
937
aSrc = fossil_malloc( (g.argc-2)*sizeof(aSrc[0]) );
938
nSrc = 0;
939
for(i=2; i<g.argc; i++) aSrc[nSrc++] = atoi(g.argv[i]);
940
db_must_be_within_tree();
941
content_deltify(atoi(g.argv[2]), aSrc, nSrc, bForce);
942
}
943
944
/*
945
** Return true if Blob p looks like it might be a parsable control artifact.
946
*/
947
static int looks_like_control_artifact(Blob *p){
948
const char *z = blob_buffer(p);
949
int n = blob_size(p);
950
if( n<10 ) return 0;
951
if( strncmp(z, "-----BEGIN PGP SIGNED MESSAGE-----", 34)==0 ) return 1;
952
if( strncmp(z, "-----BEGIN SSH SIGNED MESSAGE-----", 34)==0 ) return 1;
953
if( z[0]<'A' || z[0]>'Z' || z[1]!=' ' || z[0]=='I' ) return 0;
954
if( z[n-1]!='\n' ) return 0;
955
return 1;
956
}
957
958
/*
959
** COMMAND: test-integrity
960
**
961
** Verify that all content can be extracted from the BLOB table correctly.
962
** If the BLOB table is correct, then the repository can always be
963
** successfully reconstructed using "fossil rebuild".
964
**
965
** Options:
966
** -d|--db-only Run "PRAGMA integrity_check" on the database only.
967
** No other validation is performed.
968
** --parse Parse all manifests, wikis, tickets, events, and
969
** so forth, reporting any errors found.
970
** --quick Run "PRAGMA quick_check" on the database only.
971
** No other validation is performed.
972
*/
973
void test_integrity(void){
974
Stmt q;
975
Blob content;
976
int n1 = 0;
977
int n2 = 0;
978
int nErr = 0;
979
int total;
980
int nCA = 0;
981
int anCA[10];
982
int bParse = find_option("parse",0,0)!=0;
983
int bDbOnly = find_option("db-only","d",0)!=0;
984
int bQuick = find_option("quick",0,0)!=0;
985
db_find_and_open_repository(OPEN_ANY_SCHEMA, 2);
986
if( bDbOnly || bQuick ){
987
const char *zType = bQuick ? "quick" : "integrity";
988
char *zRes;
989
zRes = db_text(0,"PRAGMA repository.%s_check", zType/*safe-for-%s*/);
990
if( fossil_strcmp(zRes,"ok")!=0 ){
991
fossil_print("%s_check failed!\n", zType);
992
exit(1);
993
}else{
994
fossil_print("ok\n");
995
}
996
return;
997
}
998
memset(anCA, 0, sizeof(anCA));
999
1000
/* Make sure no public artifact is a delta from a private artifact */
1001
db_prepare(&q,
1002
"SELECT "
1003
" rid, (SELECT uuid FROM blob WHERE rid=delta.rid),"
1004
" srcid, (SELECT uuid FROM blob WHERE rid=delta.srcid)"
1005
" FROM delta"
1006
" WHERE srcid in private AND rid NOT IN private"
1007
);
1008
while( db_step(&q)==SQLITE_ROW ){
1009
int rid = db_column_int(&q, 0);
1010
const char *zId = db_column_text(&q, 1);
1011
int srcid = db_column_int(&q, 2);
1012
const char *zSrc = db_column_text(&q, 3);
1013
fossil_print(
1014
"public artifact %S (%d) is a delta from private artifact %S (%d)\n",
1015
zId, rid, zSrc, srcid
1016
);
1017
nErr++;
1018
}
1019
db_finalize(&q);
1020
1021
db_prepare(&q, "SELECT rid, uuid, size FROM blob ORDER BY rid");
1022
total = db_int(0, "SELECT max(rid) FROM blob");
1023
while( db_step(&q)==SQLITE_ROW ){
1024
int rid = db_column_int(&q, 0);
1025
const char *zUuid = db_column_text(&q, 1);
1026
int nUuid = db_column_bytes(&q, 1);
1027
int size = db_column_int(&q, 2);
1028
n1++;
1029
fossil_print(" %d/%d\r", n1, total);
1030
fflush(stdout);
1031
if( size<0 ){
1032
fossil_print("skip phantom %d %s\n", rid, zUuid);
1033
continue; /* Ignore phantoms */
1034
}
1035
content_get(rid, &content);
1036
if( (int)blob_size(&content)!=size ){
1037
fossil_print("size mismatch on artifact %d: wanted %d but got %d\n",
1038
rid, size, blob_size(&content));
1039
nErr++;
1040
}
1041
if( !hname_verify_hash(&content, zUuid, nUuid) ){
1042
fossil_print("wrong hash on artifact %d\n",rid);
1043
nErr++;
1044
}
1045
if( bParse && looks_like_control_artifact(&content) ){
1046
Blob err;
1047
int i, n;
1048
char *z;
1049
Manifest *p;
1050
char zFirstLine[400];
1051
blob_zero(&err);
1052
1053
z = blob_buffer(&content);
1054
n = blob_size(&content);
1055
for(i=0; i<n && z[i] && z[i]!='\n' && i<(int)sizeof(zFirstLine)-1; i++){}
1056
memcpy(zFirstLine, z, i);
1057
zFirstLine[i] = 0;
1058
p = manifest_parse(&content, 0, &err);
1059
if( p==0 ){
1060
fossil_print("manifest_parse failed for %s:\n%s\n",
1061
zUuid, blob_str(&err));
1062
if( strncmp(blob_str(&err), "line 1:", 7)==0 ){
1063
fossil_print("\"%s\"\n", zFirstLine);
1064
}
1065
}else{
1066
anCA[p->type]++;
1067
manifest_destroy(p);
1068
nCA++;
1069
}
1070
blob_reset(&err);
1071
}else{
1072
blob_reset(&content);
1073
}
1074
n2++;
1075
}
1076
db_finalize(&q);
1077
fossil_print("%d non-phantom blobs (out of %d total) checked: %d errors\n",
1078
n2, n1, nErr);
1079
if( bParse ){
1080
static const char *const azType[] = { 0, "manifest", "cluster",
1081
"control", "wiki", "ticket", "attachment", "event" };
1082
int i;
1083
fossil_print("%d total control artifacts\n", nCA);
1084
for(i=1; i<count(azType); i++){
1085
if( anCA[i] ) fossil_print(" %d %ss\n", anCA[i], azType[i]);
1086
}
1087
}
1088
fossil_print("low-level database integrity-check: ");
1089
fossil_print("%s\n", db_text(0, "PRAGMA integrity_check(10)"));
1090
}
1091
1092
/*
1093
** COMMAND: test-orphans
1094
**
1095
** Search the repository for orphaned artifacts.
1096
*/
1097
void test_orphans(void){
1098
Stmt q;
1099
int cnt = 0;
1100
1101
db_find_and_open_repository(0, 0);
1102
db_multi_exec(
1103
"CREATE TEMP TABLE used(id INTEGER PRIMARY KEY ON CONFLICT IGNORE);"
1104
"INSERT INTO used SELECT mid FROM mlink;" /* Manifests */
1105
"INSERT INTO used SELECT fid FROM mlink;" /* Files */
1106
"INSERT INTO used SELECT srcid FROM tagxref WHERE srcid>0;" /* Tags */
1107
"INSERT INTO used SELECT rid FROM tagxref;" /* Wiki & tickets */
1108
"INSERT INTO used SELECT rid FROM attachment JOIN blob ON src=uuid;"
1109
"INSERT INTO used SELECT attachid FROM attachment;"
1110
"INSERT INTO used SELECT objid FROM event;"
1111
);
1112
db_prepare(&q, "SELECT rid, uuid, size FROM blob WHERE rid NOT IN used");
1113
while( db_step(&q)==SQLITE_ROW ){
1114
fossil_print("%7d %s size: %d\n",
1115
db_column_int(&q, 0),
1116
db_column_text(&q, 1),
1117
db_column_int(&q,2));
1118
cnt++;
1119
}
1120
db_finalize(&q);
1121
fossil_print("%d orphans\n", cnt);
1122
}
1123
1124
/* Allowed flags for check_exists */
1125
#define MISSING_SHUNNED 0x0001 /* Do not report shunned artifacts */
1126
1127
/* This is a helper routine for test-artifacts.
1128
**
1129
** Check to see that the artifact hash referenced by zUuid exists in the
1130
** repository. If it does, return 0. If it does not, generate an error
1131
** message and return 1.
1132
*/
1133
static int check_exists(
1134
const char *zUuid, /* Hash of the artifact we are checking for */
1135
unsigned flags, /* Flags */
1136
Manifest *p, /* The control artifact that references zUuid */
1137
const char *zRole, /* Role of zUuid in p */
1138
const char *zDetail /* Additional information, such as a filename */
1139
){
1140
static Stmt q;
1141
int rc = 0;
1142
1143
db_static_prepare(&q, "SELECT size FROM blob WHERE uuid=:uuid");
1144
if( zUuid==0 || zUuid[0]==0 ) return 0;
1145
db_bind_text(&q, ":uuid", zUuid);
1146
if( db_step(&q)==SQLITE_ROW ){
1147
int size = db_column_int(&q, 0);
1148
if( size<0 ) rc = 2;
1149
}else{
1150
rc = 1;
1151
}
1152
db_reset(&q);
1153
if( rc ){
1154
const char *zCFType = "control artifact";
1155
char *zSrc;
1156
char *zDate;
1157
const char *zErrType = "MISSING";
1158
if( db_exists("SELECT 1 FROM shun WHERE uuid=%Q", zUuid) ){
1159
if( flags & MISSING_SHUNNED ) return 0;
1160
zErrType = "SHUNNED";
1161
}
1162
switch( p->type ){
1163
case CFTYPE_MANIFEST: zCFType = "check-in"; break;
1164
case CFTYPE_CLUSTER: zCFType = "cluster"; break;
1165
case CFTYPE_CONTROL: zCFType = "tag"; break;
1166
case CFTYPE_WIKI: zCFType = "wiki"; break;
1167
case CFTYPE_TICKET: zCFType = "ticket"; break;
1168
case CFTYPE_ATTACHMENT: zCFType = "attachment"; break;
1169
case CFTYPE_EVENT: zCFType = "event"; break;
1170
}
1171
zSrc = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", p->rid);
1172
if( p->rDate>0.0 ){
1173
zDate = db_text(0, "SELECT datetime(%.17g)", p->rDate);
1174
}else{
1175
zDate = db_text(0,
1176
"SELECT datetime(rcvfrom.mtime)"
1177
" FROM blob, rcvfrom"
1178
" WHERE blob.rcvid=rcvfrom.rcvid"
1179
" AND blob.rid=%d", p->rid);
1180
}
1181
fossil_print("%s: %s\n %s %s %S (%d) %s\n",
1182
zErrType, zUuid, zRole, zCFType, zSrc, p->rid, zDate);
1183
if( zDetail && zDetail[0] ){
1184
fossil_print(" %s\n", zDetail);
1185
}
1186
fossil_free(zSrc);
1187
fossil_free(zDate);
1188
rc = 1;
1189
}
1190
return rc;
1191
}
1192
1193
/*
1194
** COMMAND: test-missing
1195
**
1196
** Usage: %fossil test-missing
1197
**
1198
** Look at every artifact in the repository and verify that
1199
** all references are satisfied. Report any referenced artifacts
1200
** that are missing or shunned.
1201
**
1202
** Options:
1203
** --notshunned Do not report shunned artifacts
1204
** -q|--quiet Only show output if there are errors
1205
*/
1206
void test_missing(void){
1207
Stmt q;
1208
Blob content;
1209
int nErr = 0;
1210
int nArtifact = 0;
1211
int i;
1212
Manifest *p;
1213
unsigned flags = 0;
1214
int quietFlag;
1215
1216
if( find_option("notshunned", 0, 0)!=0 ) flags |= MISSING_SHUNNED;
1217
quietFlag = g.fQuiet;
1218
db_find_and_open_repository(OPEN_ANY_SCHEMA, 0);
1219
db_prepare(&q,
1220
"SELECT mid FROM mlink UNION "
1221
"SELECT srcid FROM tagxref WHERE srcid>0 UNION "
1222
"SELECT rid FROM tagxref UNION "
1223
"SELECT rid FROM attachment JOIN blob ON src=uuid UNION "
1224
"SELECT objid FROM event");
1225
while( db_step(&q)==SQLITE_ROW ){
1226
int rid = db_column_int(&q, 0);
1227
content_get(rid, &content);
1228
p = manifest_parse(&content, rid, 0);
1229
if( p ){
1230
nArtifact++;
1231
nErr += check_exists(p->zBaseline, flags, p, "baseline of", 0);
1232
nErr += check_exists(p->zAttachSrc, flags, p, "file of", 0);
1233
for(i=0; i<p->nFile; i++){
1234
nErr += check_exists(p->aFile[i].zUuid, flags, p, "file of",
1235
p->aFile[i].zName);
1236
}
1237
for(i=0; i<p->nParent; i++){
1238
nErr += check_exists(p->azParent[i], flags, p, "parent of", 0);
1239
}
1240
for(i=0; i<p->nCherrypick; i++){
1241
nErr += check_exists(p->aCherrypick[i].zCPTarget+1, flags, p,
1242
"cherry-pick target of", 0);
1243
nErr += check_exists(p->aCherrypick[i].zCPBase, flags, p,
1244
"cherry-pick baseline of", 0);
1245
}
1246
for(i=0; i<p->nCChild; i++){
1247
nErr += check_exists(p->azCChild[i], flags, p, "in", 0);
1248
}
1249
for(i=0; i<p->nTag; i++){
1250
nErr += check_exists(p->aTag[i].zUuid, flags, p, "target of", 0);
1251
}
1252
manifest_destroy(p);
1253
}
1254
}
1255
db_finalize(&q);
1256
if( nErr>0 || quietFlag==0 ){
1257
fossil_print("%d missing or shunned references in %d control artifacts\n",
1258
nErr, nArtifact);
1259
}
1260
}
1261
1262
/*
1263
** COMMAND: test-content-erase
1264
**
1265
** Usage: %fossil test-content-erase RID ....
1266
**
1267
** Remove all traces of one or more artifacts from the local repository.
1268
**
1269
** WARNING: This command destroys data and can cause you to lose work.
1270
** Make sure you have a backup copy before using this command!
1271
**
1272
** WARNING: You must run "fossil rebuild" after this command to rebuild
1273
** the metadata.
1274
**
1275
** Note that the arguments are the integer raw RID values from the BLOB table,
1276
** not artifact hashes or labels.
1277
*/
1278
void test_content_erase(void){
1279
int i;
1280
Blob x;
1281
char c;
1282
Stmt q;
1283
prompt_user("This command erases information from the repository and\n"
1284
"might irrecoverably damage the repository. Make sure you\n"
1285
"have a backup copy!\n"
1286
"Continue? (y/N)? ", &x);
1287
c = blob_str(&x)[0];
1288
blob_reset(&x);
1289
if( c!='y' && c!='Y' ) return;
1290
db_find_and_open_repository(OPEN_ANY_SCHEMA, 0);
1291
db_begin_transaction();
1292
db_prepare(&q, "SELECT rid FROM delta WHERE srcid=:rid");
1293
for(i=2; i<g.argc; i++){
1294
int rid = atoi(g.argv[i]);
1295
fossil_print("Erasing artifact %d (%s)\n",
1296
rid, db_text("", "SELECT uuid FROM blob WHERE rid=%d", rid));
1297
db_bind_int(&q, ":rid", rid);
1298
while( db_step(&q)==SQLITE_ROW ){
1299
content_undelta(db_column_int(&q,0));
1300
}
1301
db_reset(&q);
1302
db_multi_exec("DELETE FROM blob WHERE rid=%d", rid);
1303
db_multi_exec("DELETE FROM delta WHERE rid=%d", rid);
1304
}
1305
db_finalize(&q);
1306
db_end_transaction(0);
1307
}
1308

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button