Fossil SCM

Work toward making fossil work better on large repositories. This version implements a cache in the content manager. It is not clear yet if this is necessarily a good idea - this check-in might end up on an abandoned branch at some point.

drh 2008-03-06 22:58 trunk
Commit 61ddd63b72f1436c9ad18b6bba9a754276ac50fc
-1
--- src/blob.c
+++ src/blob.c
@@ -206,11 +206,10 @@
206206
/*
207207
** Copy a blob
208208
*/
209209
void blob_copy(Blob *pTo, Blob *pFrom){
210210
blob_is_init(pFrom);
211
- blob_is_init(pTo);
212211
blob_zero(pTo);
213212
blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
214213
}
215214
216215
/*
217216
--- src/blob.c
+++ src/blob.c
@@ -206,11 +206,10 @@
206 /*
207 ** Copy a blob
208 */
209 void blob_copy(Blob *pTo, Blob *pFrom){
210 blob_is_init(pFrom);
211 blob_is_init(pTo);
212 blob_zero(pTo);
213 blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
214 }
215
216 /*
217
--- src/blob.c
+++ src/blob.c
@@ -206,11 +206,10 @@
206 /*
207 ** Copy a blob
208 */
209 void blob_copy(Blob *pTo, Blob *pFrom){
210 blob_is_init(pFrom);
 
211 blob_zero(pTo);
212 blob_append(pTo, blob_buffer(pFrom), blob_size(pFrom));
213 }
214
215 /*
216
+191 -6
--- src/content.c
+++ src/content.c
@@ -24,19 +24,126 @@
2424
** Procedures store and retrieve records from the repository
2525
*/
2626
#include "config.h"
2727
#include "content.h"
2828
#include <assert.h>
29
+
30
+/*
31
+** Macros for debugging
32
+*/
33
+#if 0
34
+# define CONTENT_TRACE(X) printf X;
35
+#else
36
+# define CONTENT_TRACE(X)
37
+#endif
38
+
39
+/*
40
+** The artifact retrival cache
41
+*/
42
+#define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */
43
+#define EXPELL_INTERVAL 5 /* How often to expell from a full cache */
44
+static struct {
45
+ int n; /* Current number of positive cache entries */
46
+ int nextAge; /* Age counter for implementing LRU */
47
+ int skipCnt; /* Used to limit entries expelled from cache */
48
+ struct { /* One instance of this for each cache entry */
49
+ int rid; /* Artifact id */
50
+ int age; /* Age. Newer is larger */
51
+ Blob content; /* Content of the artifact */
52
+ } a[MX_CACHE_CNT]; /* The positive cache */
53
+
54
+ /*
55
+ ** The missing artifact cache.
56
+ **
57
+ ** Artifacts whose record ID are in missingCache cannot be retrieved
58
+ ** either because they are phantoms or because they are a delta that
59
+ ** depends on a phantom. Artifacts whose content we are certain is
60
+ ** available are in availableCache. If an artifact is in neither cache
61
+ ** then its current availablity is unknown.
62
+ */
63
+ Bag missing; /* Cache of artifacts that are incomplete */
64
+ Bag available; /* Cache of artifacts that are complete */
65
+} contentCache;
66
+
67
+
68
+/*
69
+** Clear the content cache.
70
+*/
71
+void content_clear_cache(void){
72
+ int i;
73
+ for(i=0; i<contentCache.n; i++){
74
+ blob_reset(&contentCache.a[i].content);
75
+ }
76
+ bag_clear(&contentCache.missing);
77
+ bag_clear(&contentCache.available);
78
+ contentCache.n = 0;
79
+}
2980
3081
/*
3182
** Return the srcid associated with rid. Or return 0 if rid is
3283
** original content and not a delta.
3384
*/
3485
static int findSrcid(int rid){
3586
int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid);
3687
return srcid;
3788
}
89
+
90
+/*
91
+** Check to see if content is available for artifact "rid". Return
92
+** true if it is. Return false if rid is a phantom or depends on
93
+** a phantom.
94
+*/
95
+int content_is_available(int rid){
96
+ int srcid;
97
+ if( bag_find(&contentCache.missing, rid) ){
98
+ return 0;
99
+ }
100
+ if( bag_find(&contentCache.available, rid) ){
101
+ return 1;
102
+ }
103
+ if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){
104
+ bag_insert(&contentCache.missing, rid);
105
+ return 0;
106
+ }
107
+ srcid = findSrcid(rid);
108
+ if( srcid==0 ){
109
+ bag_insert(&contentCache.available, rid);
110
+ return 1;
111
+ }
112
+ if( content_is_available(srcid) ){
113
+ bag_insert(&contentCache.available, rid);
114
+ return 1;
115
+ }else{
116
+ bag_insert(&contentCache.missing, rid);
117
+ return 0;
118
+ }
119
+}
120
+
121
+/*
122
+** Mark artifact rid as being available now. Update the cache to
123
+** show that everything that was formerly unavailable because rid
124
+** was missing is now available.
125
+*/
126
+static void content_mark_available(int rid){
127
+ Bag pending;
128
+ Stmt q;
129
+ if( bag_find(&contentCache.available, rid) ) return;
130
+ bag_init(&pending);
131
+ bag_insert(&pending, rid);
132
+ while( (rid = bag_first(&pending))!=0 ){
133
+ bag_remove(&pending, rid);
134
+ bag_remove(&contentCache.missing, rid);
135
+ bag_insert(&contentCache.available, rid);
136
+ db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid);
137
+ while( db_step(&q)==SQLITE_ROW ){
138
+ int nx = db_column_int(&q, 0);
139
+ bag_insert(&pending, nx);
140
+ }
141
+ db_finalize(&q);
142
+ }
143
+ bag_clear(&pending);
144
+}
38145
39146
/*
40147
** Extract the content for ID rid and put it into the
41148
** uninitialized blob. Return 1 on success. If the record
42149
** is a phantom, zero pBlob and return 0.
@@ -44,16 +151,46 @@
44151
int content_get(int rid, Blob *pBlob){
45152
Stmt q;
46153
Blob src;
47154
int srcid;
48155
int rc = 0;
156
+ int i;
49157
static Bag inProcess;
50158
51159
assert( g.repositoryOpen );
52
- srcid = findSrcid(rid);
53160
blob_zero(pBlob);
161
+
162
+ /* Early out if we know the content is not available */
163
+ if( bag_find(&contentCache.missing, rid) ){
164
+ CONTENT_TRACE(("%*smiss from cache: %d\n",
165
+ bag_count(&inProcess), "", rid))
166
+ return 0;
167
+ }
168
+
169
+ /* Look for the artifact in the cache first */
170
+ for(i=0; i<contentCache.n; i++){
171
+ if( contentCache.a[i].rid==rid ){
172
+ *pBlob = contentCache.a[i].content;
173
+ blob_zero(&contentCache.a[i].content);
174
+ contentCache.n--;
175
+ if( i<contentCache.n ){
176
+ contentCache.a[i] = contentCache.a[contentCache.n];
177
+ }
178
+ CONTENT_TRACE(("%*shit cache: %d\n",
179
+ bag_count(&inProcess), "", rid))
180
+ return 1;
181
+ }
182
+ }
183
+
184
+ /* See if we need to apply a delta to find this artifact */
185
+ srcid = findSrcid(rid);
186
+ CONTENT_TRACE(("%*ssearching for %d. Need %d.\n",
187
+ bag_count(&inProcess), "", rid, srcid))
188
+
189
+
54190
if( srcid ){
191
+ /* Yes, a delta is required */
55192
if( bag_find(&inProcess, srcid) ){
56193
db_multi_exec(
57194
"UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;"
58195
"DELETE FROM delta WHERE rid=%d;"
59196
"INSERT OR IGNORE INTO phantom VALUES(%d);",
@@ -61,10 +198,11 @@
61198
);
62199
blob_zero(pBlob);
63200
return 0;
64201
}
65202
bag_insert(&inProcess, srcid);
203
+
66204
if( content_get(srcid, &src) ){
67205
db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
68206
if( db_step(&q)==SQLITE_ROW ){
69207
Blob delta;
70208
db_ephemeral_blob(&q, 0, &delta);
@@ -73,29 +211,67 @@
73211
blob_delta_apply(&src, &delta, pBlob);
74212
blob_reset(&delta);
75213
rc = 1;
76214
}
77215
db_finalize(&q);
78
- blob_reset(&src);
216
+
217
+ /* Save the srcid artifact in the cache */
218
+ if( contentCache.n<MX_CACHE_CNT ){
219
+ i = contentCache.n++;
220
+ }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){
221
+ i = -1;
222
+ }else{
223
+ int j, best;
224
+ best = contentCache.nextAge+1;
225
+ i = -1;
226
+ for(j=0; j<contentCache.n; j++){
227
+ if( contentCache.a[j].age<best ){
228
+ i = j;
229
+ best = contentCache.a[j].age;
230
+ }
231
+ }
232
+ CONTENT_TRACE(("%*sexpell %d from cache\n",
233
+ bag_count(&inProcess), "", contentCache.a[i].rid))
234
+ blob_reset(&contentCache.a[i].content);
235
+ }
236
+ if( i>=0 ){
237
+ contentCache.a[i].content = src;
238
+ contentCache.a[i].age = contentCache.nextAge++;
239
+ contentCache.a[i].rid = srcid;
240
+ CONTENT_TRACE(("%*sadd %d to cache\n",
241
+ bag_count(&inProcess), "", srcid))
242
+ }else{
243
+ blob_reset(&src);
244
+ }
79245
}
80246
bag_remove(&inProcess, srcid);
81247
}else{
248
+ /* No delta required. Read content directly from the database */
82249
db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
83250
if( db_step(&q)==SQLITE_ROW ){
84251
db_ephemeral_blob(&q, 0, pBlob);
85252
blob_uncompress(pBlob, pBlob);
86253
rc = 1;
87254
}
88255
db_finalize(&q);
89256
}
257
+ if( rc==0 ){
258
+ bag_insert(&contentCache.missing, rid);
259
+ }else{
260
+ bag_insert(&contentCache.available, rid);
261
+ }
90262
return rc;
91263
}
92264
93265
/*
94
-** Get the contents of a file within a given revision.
266
+** Get the contents of a file within a given baseline.
95267
*/
96
-int content_get_historical_file(const char *revision, const char *file, Blob *content){
268
+int content_get_historical_file(
269
+ const char *revision, /* Name of the baseline containing the file */
270
+ const char *file, /* Name of the file */
271
+ Blob *content /* Write file content here */
272
+){
97273
Blob mfile;
98274
Manifest m;
99275
int i, rid=0;
100276
101277
rid = name_to_rid(revision);
@@ -194,10 +370,11 @@
194370
int rid;
195371
Stmt s1;
196372
Blob cmpr;
197373
Blob hash;
198374
int markAsUnclustered = 0;
375
+ int isDephantomize = 0;
199376
200377
assert( g.repositoryOpen );
201378
if( pBlob && srcId==0 ){
202379
sha1sum_blob(pBlob, &hash);
203380
}else{
@@ -249,12 +426,13 @@
249426
);
250427
blob_compress(pBlob, &cmpr);
251428
db_bind_blob(&s1, ":data", &cmpr);
252429
db_exec(&s1);
253430
db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
254
- if( srcId==0 || db_int(0, "SELECT size FROM blob WHERE rid=%d", srcId)>0 ){
255
- after_dephantomize(rid, 0);
431
+ if( srcId==0 || content_is_available(srcId) ){
432
+ isDephantomize = 1;
433
+ content_mark_available(rid);
256434
}
257435
}else{
258436
/* We are creating a new entry */
259437
db_prepare(&s1,
260438
"INSERT INTO blob(rcvid,size,uuid,content)"
@@ -275,10 +453,17 @@
275453
/* If the srcId is specified, then the data we just added is
276454
** really a delta. Record this fact in the delta table.
277455
*/
278456
if( srcId ){
279457
db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);
458
+ }
459
+ if( !isDephantomize && bag_find(&contentCache.missing, rid) &&
460
+ (srcId==0 || content_is_available(srcId)) ){
461
+ content_mark_available(rid);
462
+ }
463
+ if( isDephantomize ){
464
+ after_dephantomize(rid, 0);
280465
}
281466
282467
/* Add the element to the unclustered table if has never been
283468
** previously seen.
284469
*/
285470
--- src/content.c
+++ src/content.c
@@ -24,19 +24,126 @@
24 ** Procedures store and retrieve records from the repository
25 */
26 #include "config.h"
27 #include "content.h"
28 #include <assert.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
30 /*
31 ** Return the srcid associated with rid. Or return 0 if rid is
32 ** original content and not a delta.
33 */
34 static int findSrcid(int rid){
35 int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid);
36 return srcid;
37 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
39 /*
40 ** Extract the content for ID rid and put it into the
41 ** uninitialized blob. Return 1 on success. If the record
42 ** is a phantom, zero pBlob and return 0.
@@ -44,16 +151,46 @@
44 int content_get(int rid, Blob *pBlob){
45 Stmt q;
46 Blob src;
47 int srcid;
48 int rc = 0;
 
49 static Bag inProcess;
50
51 assert( g.repositoryOpen );
52 srcid = findSrcid(rid);
53 blob_zero(pBlob);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54 if( srcid ){
 
55 if( bag_find(&inProcess, srcid) ){
56 db_multi_exec(
57 "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;"
58 "DELETE FROM delta WHERE rid=%d;"
59 "INSERT OR IGNORE INTO phantom VALUES(%d);",
@@ -61,10 +198,11 @@
61 );
62 blob_zero(pBlob);
63 return 0;
64 }
65 bag_insert(&inProcess, srcid);
 
66 if( content_get(srcid, &src) ){
67 db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
68 if( db_step(&q)==SQLITE_ROW ){
69 Blob delta;
70 db_ephemeral_blob(&q, 0, &delta);
@@ -73,29 +211,67 @@
73 blob_delta_apply(&src, &delta, pBlob);
74 blob_reset(&delta);
75 rc = 1;
76 }
77 db_finalize(&q);
78 blob_reset(&src);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79 }
80 bag_remove(&inProcess, srcid);
81 }else{
 
82 db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
83 if( db_step(&q)==SQLITE_ROW ){
84 db_ephemeral_blob(&q, 0, pBlob);
85 blob_uncompress(pBlob, pBlob);
86 rc = 1;
87 }
88 db_finalize(&q);
89 }
 
 
 
 
 
90 return rc;
91 }
92
93 /*
94 ** Get the contents of a file within a given revision.
95 */
96 int content_get_historical_file(const char *revision, const char *file, Blob *content){
 
 
 
 
97 Blob mfile;
98 Manifest m;
99 int i, rid=0;
100
101 rid = name_to_rid(revision);
@@ -194,10 +370,11 @@
194 int rid;
195 Stmt s1;
196 Blob cmpr;
197 Blob hash;
198 int markAsUnclustered = 0;
 
199
200 assert( g.repositoryOpen );
201 if( pBlob && srcId==0 ){
202 sha1sum_blob(pBlob, &hash);
203 }else{
@@ -249,12 +426,13 @@
249 );
250 blob_compress(pBlob, &cmpr);
251 db_bind_blob(&s1, ":data", &cmpr);
252 db_exec(&s1);
253 db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
254 if( srcId==0 || db_int(0, "SELECT size FROM blob WHERE rid=%d", srcId)>0 ){
255 after_dephantomize(rid, 0);
 
256 }
257 }else{
258 /* We are creating a new entry */
259 db_prepare(&s1,
260 "INSERT INTO blob(rcvid,size,uuid,content)"
@@ -275,10 +453,17 @@
275 /* If the srcId is specified, then the data we just added is
276 ** really a delta. Record this fact in the delta table.
277 */
278 if( srcId ){
279 db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);
 
 
 
 
 
 
 
280 }
281
282 /* Add the element to the unclustered table if has never been
283 ** previously seen.
284 */
285
--- src/content.c
+++ src/content.c
@@ -24,19 +24,126 @@
24 ** Procedures store and retrieve records from the repository
25 */
26 #include "config.h"
27 #include "content.h"
28 #include <assert.h>
29
30 /*
31 ** Macros for debugging
32 */
33 #if 0
34 # define CONTENT_TRACE(X) printf X;
35 #else
36 # define CONTENT_TRACE(X)
37 #endif
38
39 /*
40 ** The artifact retrival cache
41 */
42 #define MX_CACHE_CNT 50 /* Maximum number of positive cache entries */
43 #define EXPELL_INTERVAL 5 /* How often to expell from a full cache */
44 static struct {
45 int n; /* Current number of positive cache entries */
46 int nextAge; /* Age counter for implementing LRU */
47 int skipCnt; /* Used to limit entries expelled from cache */
48 struct { /* One instance of this for each cache entry */
49 int rid; /* Artifact id */
50 int age; /* Age. Newer is larger */
51 Blob content; /* Content of the artifact */
52 } a[MX_CACHE_CNT]; /* The positive cache */
53
54 /*
55 ** The missing artifact cache.
56 **
57 ** Artifacts whose record ID are in missingCache cannot be retrieved
58 ** either because they are phantoms or because they are a delta that
59 ** depends on a phantom. Artifacts whose content we are certain is
60 ** available are in availableCache. If an artifact is in neither cache
61 ** then its current availablity is unknown.
62 */
63 Bag missing; /* Cache of artifacts that are incomplete */
64 Bag available; /* Cache of artifacts that are complete */
65 } contentCache;
66
67
68 /*
69 ** Clear the content cache.
70 */
71 void content_clear_cache(void){
72 int i;
73 for(i=0; i<contentCache.n; i++){
74 blob_reset(&contentCache.a[i].content);
75 }
76 bag_clear(&contentCache.missing);
77 bag_clear(&contentCache.available);
78 contentCache.n = 0;
79 }
80
81 /*
82 ** Return the srcid associated with rid. Or return 0 if rid is
83 ** original content and not a delta.
84 */
85 static int findSrcid(int rid){
86 int srcid = db_int(0, "SELECT srcid FROM delta WHERE rid=%d", rid);
87 return srcid;
88 }
89
90 /*
91 ** Check to see if content is available for artifact "rid". Return
92 ** true if it is. Return false if rid is a phantom or depends on
93 ** a phantom.
94 */
95 int content_is_available(int rid){
96 int srcid;
97 if( bag_find(&contentCache.missing, rid) ){
98 return 0;
99 }
100 if( bag_find(&contentCache.available, rid) ){
101 return 1;
102 }
103 if( db_int(-1, "SELECT size FROM blob WHERE rid=%d", rid)<0 ){
104 bag_insert(&contentCache.missing, rid);
105 return 0;
106 }
107 srcid = findSrcid(rid);
108 if( srcid==0 ){
109 bag_insert(&contentCache.available, rid);
110 return 1;
111 }
112 if( content_is_available(srcid) ){
113 bag_insert(&contentCache.available, rid);
114 return 1;
115 }else{
116 bag_insert(&contentCache.missing, rid);
117 return 0;
118 }
119 }
120
121 /*
122 ** Mark artifact rid as being available now. Update the cache to
123 ** show that everything that was formerly unavailable because rid
124 ** was missing is now available.
125 */
126 static void content_mark_available(int rid){
127 Bag pending;
128 Stmt q;
129 if( bag_find(&contentCache.available, rid) ) return;
130 bag_init(&pending);
131 bag_insert(&pending, rid);
132 while( (rid = bag_first(&pending))!=0 ){
133 bag_remove(&pending, rid);
134 bag_remove(&contentCache.missing, rid);
135 bag_insert(&contentCache.available, rid);
136 db_prepare(&q, "SELECT rid FROM delta WHERE srcid=%d", rid);
137 while( db_step(&q)==SQLITE_ROW ){
138 int nx = db_column_int(&q, 0);
139 bag_insert(&pending, nx);
140 }
141 db_finalize(&q);
142 }
143 bag_clear(&pending);
144 }
145
146 /*
147 ** Extract the content for ID rid and put it into the
148 ** uninitialized blob. Return 1 on success. If the record
149 ** is a phantom, zero pBlob and return 0.
@@ -44,16 +151,46 @@
151 int content_get(int rid, Blob *pBlob){
152 Stmt q;
153 Blob src;
154 int srcid;
155 int rc = 0;
156 int i;
157 static Bag inProcess;
158
159 assert( g.repositoryOpen );
 
160 blob_zero(pBlob);
161
162 /* Early out if we know the content is not available */
163 if( bag_find(&contentCache.missing, rid) ){
164 CONTENT_TRACE(("%*smiss from cache: %d\n",
165 bag_count(&inProcess), "", rid))
166 return 0;
167 }
168
169 /* Look for the artifact in the cache first */
170 for(i=0; i<contentCache.n; i++){
171 if( contentCache.a[i].rid==rid ){
172 *pBlob = contentCache.a[i].content;
173 blob_zero(&contentCache.a[i].content);
174 contentCache.n--;
175 if( i<contentCache.n ){
176 contentCache.a[i] = contentCache.a[contentCache.n];
177 }
178 CONTENT_TRACE(("%*shit cache: %d\n",
179 bag_count(&inProcess), "", rid))
180 return 1;
181 }
182 }
183
184 /* See if we need to apply a delta to find this artifact */
185 srcid = findSrcid(rid);
186 CONTENT_TRACE(("%*ssearching for %d. Need %d.\n",
187 bag_count(&inProcess), "", rid, srcid))
188
189
190 if( srcid ){
191 /* Yes, a delta is required */
192 if( bag_find(&inProcess, srcid) ){
193 db_multi_exec(
194 "UPDATE blob SET content=NULL, size=-1 WHERE rid=%d;"
195 "DELETE FROM delta WHERE rid=%d;"
196 "INSERT OR IGNORE INTO phantom VALUES(%d);",
@@ -61,10 +198,11 @@
198 );
199 blob_zero(pBlob);
200 return 0;
201 }
202 bag_insert(&inProcess, srcid);
203
204 if( content_get(srcid, &src) ){
205 db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
206 if( db_step(&q)==SQLITE_ROW ){
207 Blob delta;
208 db_ephemeral_blob(&q, 0, &delta);
@@ -73,29 +211,67 @@
211 blob_delta_apply(&src, &delta, pBlob);
212 blob_reset(&delta);
213 rc = 1;
214 }
215 db_finalize(&q);
216
217 /* Save the srcid artifact in the cache */
218 if( contentCache.n<MX_CACHE_CNT ){
219 i = contentCache.n++;
220 }else if( ((contentCache.skipCnt++)%EXPELL_INTERVAL)!=0 ){
221 i = -1;
222 }else{
223 int j, best;
224 best = contentCache.nextAge+1;
225 i = -1;
226 for(j=0; j<contentCache.n; j++){
227 if( contentCache.a[j].age<best ){
228 i = j;
229 best = contentCache.a[j].age;
230 }
231 }
232 CONTENT_TRACE(("%*sexpell %d from cache\n",
233 bag_count(&inProcess), "", contentCache.a[i].rid))
234 blob_reset(&contentCache.a[i].content);
235 }
236 if( i>=0 ){
237 contentCache.a[i].content = src;
238 contentCache.a[i].age = contentCache.nextAge++;
239 contentCache.a[i].rid = srcid;
240 CONTENT_TRACE(("%*sadd %d to cache\n",
241 bag_count(&inProcess), "", srcid))
242 }else{
243 blob_reset(&src);
244 }
245 }
246 bag_remove(&inProcess, srcid);
247 }else{
248 /* No delta required. Read content directly from the database */
249 db_prepare(&q, "SELECT content FROM blob WHERE rid=%d AND size>=0", rid);
250 if( db_step(&q)==SQLITE_ROW ){
251 db_ephemeral_blob(&q, 0, pBlob);
252 blob_uncompress(pBlob, pBlob);
253 rc = 1;
254 }
255 db_finalize(&q);
256 }
257 if( rc==0 ){
258 bag_insert(&contentCache.missing, rid);
259 }else{
260 bag_insert(&contentCache.available, rid);
261 }
262 return rc;
263 }
264
265 /*
266 ** Get the contents of a file within a given baseline.
267 */
268 int content_get_historical_file(
269 const char *revision, /* Name of the baseline containing the file */
270 const char *file, /* Name of the file */
271 Blob *content /* Write file content here */
272 ){
273 Blob mfile;
274 Manifest m;
275 int i, rid=0;
276
277 rid = name_to_rid(revision);
@@ -194,10 +370,11 @@
370 int rid;
371 Stmt s1;
372 Blob cmpr;
373 Blob hash;
374 int markAsUnclustered = 0;
375 int isDephantomize = 0;
376
377 assert( g.repositoryOpen );
378 if( pBlob && srcId==0 ){
379 sha1sum_blob(pBlob, &hash);
380 }else{
@@ -249,12 +426,13 @@
426 );
427 blob_compress(pBlob, &cmpr);
428 db_bind_blob(&s1, ":data", &cmpr);
429 db_exec(&s1);
430 db_multi_exec("DELETE FROM phantom WHERE rid=%d", rid);
431 if( srcId==0 || content_is_available(srcId) ){
432 isDephantomize = 1;
433 content_mark_available(rid);
434 }
435 }else{
436 /* We are creating a new entry */
437 db_prepare(&s1,
438 "INSERT INTO blob(rcvid,size,uuid,content)"
@@ -275,10 +453,17 @@
453 /* If the srcId is specified, then the data we just added is
454 ** really a delta. Record this fact in the delta table.
455 */
456 if( srcId ){
457 db_multi_exec("REPLACE INTO delta(rid,srcid) VALUES(%d,%d)", rid, srcId);
458 }
459 if( !isDephantomize && bag_find(&contentCache.missing, rid) &&
460 (srcId==0 || content_is_available(srcId)) ){
461 content_mark_available(rid);
462 }
463 if( isDephantomize ){
464 after_dephantomize(rid, 0);
465 }
466
467 /* Add the element to the unclustered table if has never been
468 ** previously seen.
469 */
470
+17 -10
--- src/delta.c
+++ src/delta.c
@@ -196,24 +196,31 @@
196196
}
197197
198198
/*
199199
** Compute a 32-bit checksum on the N-byte buffer. Return the result.
200200
*/
201
-static unsigned int checksum(const char *zIn, int N){
202
- const unsigned char *z = (const unsigned char*)zIn;
203
- unsigned int sum = 0;
204
- while( N>=4 ){
201
+static unsigned int checksum(const char *zIn, size_t N){
202
+ const unsigned char *z = (const unsigned char *)zIn;
203
+ unsigned sum = 0;
204
+ while(N >= 16){
205
+ sum += ((unsigned)z[0] + z[4] + z[8] + z[12]) << 24;
206
+ sum += ((unsigned)z[1] + z[5] + z[9] + z[13]) << 16;
207
+ sum += ((unsigned)z[2] + z[6] + z[10]+ z[14]) << 8;
208
+ sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
209
+ z += 16;
210
+ N -= 16;
211
+ }
212
+ while(N >= 4){
205213
sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
206214
z += 4;
207215
N -= 4;
208216
}
209
- if( N>0 ){
210
- unsigned char zBuf[4];
211
- memset(zBuf, 0, sizeof(zBuf));
212
- memcpy(zBuf, z, N);
213
- z = zBuf;
214
- sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
217
+ switch(N){
218
+ case 3: sum += (z[2] << 8);
219
+ case 2: sum += (z[1] << 16);
220
+ case 1: sum += (z[0] << 24);
221
+ default: ;
215222
}
216223
return sum;
217224
}
218225
219226
/*
220227
--- src/delta.c
+++ src/delta.c
@@ -196,24 +196,31 @@
196 }
197
198 /*
199 ** Compute a 32-bit checksum on the N-byte buffer. Return the result.
200 */
201 static unsigned int checksum(const char *zIn, int N){
202 const unsigned char *z = (const unsigned char*)zIn;
203 unsigned int sum = 0;
204 while( N>=4 ){
 
 
 
 
 
 
 
 
205 sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
206 z += 4;
207 N -= 4;
208 }
209 if( N>0 ){
210 unsigned char zBuf[4];
211 memset(zBuf, 0, sizeof(zBuf));
212 memcpy(zBuf, z, N);
213 z = zBuf;
214 sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
215 }
216 return sum;
217 }
218
219 /*
220
--- src/delta.c
+++ src/delta.c
@@ -196,24 +196,31 @@
196 }
197
198 /*
199 ** Compute a 32-bit checksum on the N-byte buffer. Return the result.
200 */
201 static unsigned int checksum(const char *zIn, size_t N){
202 const unsigned char *z = (const unsigned char *)zIn;
203 unsigned sum = 0;
204 while(N >= 16){
205 sum += ((unsigned)z[0] + z[4] + z[8] + z[12]) << 24;
206 sum += ((unsigned)z[1] + z[5] + z[9] + z[13]) << 16;
207 sum += ((unsigned)z[2] + z[6] + z[10]+ z[14]) << 8;
208 sum += ((unsigned)z[3] + z[7] + z[11]+ z[15]);
209 z += 16;
210 N -= 16;
211 }
212 while(N >= 4){
213 sum += (z[0]<<24) | (z[1]<<16) | (z[2]<<8) | z[3];
214 z += 4;
215 N -= 4;
216 }
217 switch(N){
218 case 3: sum += (z[2] << 8);
219 case 2: sum += (z[1] << 16);
220 case 1: sum += (z[0] << 24);
221 default: ;
 
222 }
223 return sum;
224 }
225
226 /*
227
+87 -12
--- src/rebuild.c
+++ src/rebuild.c
@@ -51,10 +51,87 @@
5151
@ title text, -- Title of this report
5252
@ cols text, -- A color-key specification
5353
@ sqlcode text -- An SQL SELECT statement for this report
5454
@ );
5555
;
56
+
57
+/*
58
+** Variables used for progress information
59
+*/
60
+static int totalSize; /* Total number of artifacts to process */
61
+static int processCnt; /* Number processed so far */
62
+static int ttyOutput; /* Do progress output */
63
+
64
+/*
65
+** Called after each artifact is processed
66
+*/
67
+static void rebuild_step_done(void){
68
+ if( ttyOutput ){
69
+ processCnt++;
70
+ printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize));
71
+ fflush(stdout);
72
+ }
73
+}
74
+
75
+/*
76
+** Rebuild cross-referencing information for the artifact
77
+** rid with content pBase and all of its descendents. This
78
+** routine clears the content buffer before returning.
79
+*/
80
+static void rebuild_step(int rid, Blob *pBase){
81
+ Stmt q1;
82
+ Bag children;
83
+ Blob copy;
84
+ Blob *pUse;
85
+ int nChild, i, cid;
86
+
87
+ /* Find all children of artifact rid */
88
+ db_prepare(&q1, "SELECT rid FROM delta WHERE srcid=%d", rid);
89
+ bag_init(&children);
90
+ while( db_step(&q1)==SQLITE_ROW ){
91
+ bag_insert(&children, db_column_int(&q1, 0));
92
+ }
93
+ nChild = bag_count(&children);
94
+ db_finalize(&q1);
95
+
96
+ /* Crosslink the artifact */
97
+ if( nChild==0 ){
98
+ pUse = pBase;
99
+ }else{
100
+ blob_copy(&copy, pBase);
101
+ pUse = &copy;
102
+ }
103
+ manifest_crosslink(rid, pUse);
104
+ blob_reset(pUse);
105
+
106
+ /* Call all children recursively */
107
+ for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){
108
+ Stmt q2;
109
+ int sz;
110
+ if( nChild==i ){
111
+ pUse = pBase;
112
+ }else{
113
+ blob_copy(&copy, pBase);
114
+ pUse = &copy;
115
+ }
116
+ db_prepare(&q2, "SELECT content, size FROM blob WHERE rid=%d", cid);
117
+ if( db_step(&q2)==SQLITE_ROW && (sz = db_column_int(&q2,1))>=0 ){
118
+ Blob delta;
119
+ db_ephemeral_blob(&q2, 0, &delta);
120
+ blob_uncompress(&delta, &delta);
121
+ blob_delta_apply(pUse, &delta, pUse);
122
+ blob_reset(&delta);
123
+ db_finalize(&q2);
124
+ rebuild_step(cid, pUse);
125
+ }else{
126
+ db_finalize(&q2);
127
+ blob_reset(pUse);
128
+ }
129
+ }
130
+ bag_clear(&children);
131
+ rebuild_step_done();
132
+}
56133
57134
/*
58135
** Core function to rebuild the infomration in the derived tables of a
59136
** fossil repository from the blobs. This function is shared between
60137
** 'rebuild_database' ('rebuild') and 'reconstruct_cmd'
@@ -64,16 +141,17 @@
64141
** If the randomize parameter is true, then the BLOBs are deliberately
65142
** extracted in a random order. This feature is used to test the
66143
** ability of fossil to accept records in any order and still
67144
** construct a sane repository.
68145
*/
69
-int rebuild_db(int randomize, int ttyOutput){
146
+int rebuild_db(int randomize, int doOut){
70147
Stmt s;
71148
int errCnt = 0;
72149
char *zTable;
73
- int cnt = 0;
74150
151
+ ttyOutput = doOut;
152
+ processCnt = 0;
75153
db_multi_exec(zSchemaUpdates);
76154
for(;;){
77155
zTable = db_text(0,
78156
"SELECT name FROM sqlite_master"
79157
" WHERE type='table'"
@@ -91,30 +169,26 @@
91169
" WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))"
92170
);
93171
db_multi_exec(
94172
"DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
95173
);
174
+ totalSize = db_int(0, "SELECT count(*) FROM blob");
96175
db_prepare(&s,
97
- "SELECT rid, size FROM blob %s"
98
- " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)",
99
- randomize ? "ORDER BY random()" : ""
176
+ "SELECT rid, size FROM blob"
177
+ " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)"
178
+ " AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
100179
);
101180
while( db_step(&s)==SQLITE_ROW ){
102181
int rid = db_column_int(&s, 0);
103182
int size = db_column_int(&s, 1);
104183
if( size>=0 ){
105184
Blob content;
106
- if( ttyOutput ){
107
- cnt++;
108
- printf("%d...\r", cnt);
109
- fflush(stdout);
110
- }
111185
content_get(rid, &content);
112
- manifest_crosslink(rid, &content);
113
- blob_reset(&content);
186
+ rebuild_step(rid, &content);
114187
}else{
115188
db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);
189
+ rebuild_step_done();
116190
}
117191
}
118192
db_finalize(&s);
119193
if( ttyOutput ){
120194
printf("\n");
@@ -141,14 +215,15 @@
141215
if( g.argc!=3 ){
142216
usage("REPOSITORY-FILENAME");
143217
}
144218
db_open_repository(g.argv[2]);
145219
db_begin_transaction();
220
+ ttyOutput = 1;
146221
errCnt = rebuild_db(randomizeFlag, 1);
147222
if( errCnt && !forceFlag ){
148223
printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
149224
errCnt);
150225
db_end_transaction(1);
151226
}else{
152227
db_end_transaction(0);
153228
}
154229
}
155230
--- src/rebuild.c
+++ src/rebuild.c
@@ -51,10 +51,87 @@
51 @ title text, -- Title of this report
52 @ cols text, -- A color-key specification
53 @ sqlcode text -- An SQL SELECT statement for this report
54 @ );
55 ;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
57 /*
58 ** Core function to rebuild the infomration in the derived tables of a
59 ** fossil repository from the blobs. This function is shared between
60 ** 'rebuild_database' ('rebuild') and 'reconstruct_cmd'
@@ -64,16 +141,17 @@
64 ** If the randomize parameter is true, then the BLOBs are deliberately
65 ** extracted in a random order. This feature is used to test the
66 ** ability of fossil to accept records in any order and still
67 ** construct a sane repository.
68 */
69 int rebuild_db(int randomize, int ttyOutput){
70 Stmt s;
71 int errCnt = 0;
72 char *zTable;
73 int cnt = 0;
74
 
 
75 db_multi_exec(zSchemaUpdates);
76 for(;;){
77 zTable = db_text(0,
78 "SELECT name FROM sqlite_master"
79 " WHERE type='table'"
@@ -91,30 +169,26 @@
91 " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))"
92 );
93 db_multi_exec(
94 "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
95 );
 
96 db_prepare(&s,
97 "SELECT rid, size FROM blob %s"
98 " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)",
99 randomize ? "ORDER BY random()" : ""
100 );
101 while( db_step(&s)==SQLITE_ROW ){
102 int rid = db_column_int(&s, 0);
103 int size = db_column_int(&s, 1);
104 if( size>=0 ){
105 Blob content;
106 if( ttyOutput ){
107 cnt++;
108 printf("%d...\r", cnt);
109 fflush(stdout);
110 }
111 content_get(rid, &content);
112 manifest_crosslink(rid, &content);
113 blob_reset(&content);
114 }else{
115 db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);
 
116 }
117 }
118 db_finalize(&s);
119 if( ttyOutput ){
120 printf("\n");
@@ -141,14 +215,15 @@
141 if( g.argc!=3 ){
142 usage("REPOSITORY-FILENAME");
143 }
144 db_open_repository(g.argv[2]);
145 db_begin_transaction();
 
146 errCnt = rebuild_db(randomizeFlag, 1);
147 if( errCnt && !forceFlag ){
148 printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
149 errCnt);
150 db_end_transaction(1);
151 }else{
152 db_end_transaction(0);
153 }
154 }
155
--- src/rebuild.c
+++ src/rebuild.c
@@ -51,10 +51,87 @@
51 @ title text, -- Title of this report
52 @ cols text, -- A color-key specification
53 @ sqlcode text -- An SQL SELECT statement for this report
54 @ );
55 ;
56
57 /*
58 ** Variables used for progress information
59 */
60 static int totalSize; /* Total number of artifacts to process */
61 static int processCnt; /* Number processed so far */
62 static int ttyOutput; /* Do progress output */
63
64 /*
65 ** Called after each artifact is processed
66 */
67 static void rebuild_step_done(void){
68 if( ttyOutput ){
69 processCnt++;
70 printf("%d (%d%%)...\r", processCnt, (processCnt*100/totalSize));
71 fflush(stdout);
72 }
73 }
74
75 /*
76 ** Rebuild cross-referencing information for the artifact
77 ** rid with content pBase and all of its descendents. This
78 ** routine clears the content buffer before returning.
79 */
80 static void rebuild_step(int rid, Blob *pBase){
81 Stmt q1;
82 Bag children;
83 Blob copy;
84 Blob *pUse;
85 int nChild, i, cid;
86
87 /* Find all children of artifact rid */
88 db_prepare(&q1, "SELECT rid FROM delta WHERE srcid=%d", rid);
89 bag_init(&children);
90 while( db_step(&q1)==SQLITE_ROW ){
91 bag_insert(&children, db_column_int(&q1, 0));
92 }
93 nChild = bag_count(&children);
94 db_finalize(&q1);
95
96 /* Crosslink the artifact */
97 if( nChild==0 ){
98 pUse = pBase;
99 }else{
100 blob_copy(&copy, pBase);
101 pUse = &copy;
102 }
103 manifest_crosslink(rid, pUse);
104 blob_reset(pUse);
105
106 /* Call all children recursively */
107 for(cid=bag_first(&children), i=1; cid; cid=bag_next(&children, cid), i++){
108 Stmt q2;
109 int sz;
110 if( nChild==i ){
111 pUse = pBase;
112 }else{
113 blob_copy(&copy, pBase);
114 pUse = &copy;
115 }
116 db_prepare(&q2, "SELECT content, size FROM blob WHERE rid=%d", cid);
117 if( db_step(&q2)==SQLITE_ROW && (sz = db_column_int(&q2,1))>=0 ){
118 Blob delta;
119 db_ephemeral_blob(&q2, 0, &delta);
120 blob_uncompress(&delta, &delta);
121 blob_delta_apply(pUse, &delta, pUse);
122 blob_reset(&delta);
123 db_finalize(&q2);
124 rebuild_step(cid, pUse);
125 }else{
126 db_finalize(&q2);
127 blob_reset(pUse);
128 }
129 }
130 bag_clear(&children);
131 rebuild_step_done();
132 }
133
134 /*
135 ** Core function to rebuild the infomration in the derived tables of a
136 ** fossil repository from the blobs. This function is shared between
137 ** 'rebuild_database' ('rebuild') and 'reconstruct_cmd'
@@ -64,16 +141,17 @@
141 ** If the randomize parameter is true, then the BLOBs are deliberately
142 ** extracted in a random order. This feature is used to test the
143 ** ability of fossil to accept records in any order and still
144 ** construct a sane repository.
145 */
146 int rebuild_db(int randomize, int doOut){
147 Stmt s;
148 int errCnt = 0;
149 char *zTable;
 
150
151 ttyOutput = doOut;
152 processCnt = 0;
153 db_multi_exec(zSchemaUpdates);
154 for(;;){
155 zTable = db_text(0,
156 "SELECT name FROM sqlite_master"
157 " WHERE type='table'"
@@ -91,30 +169,26 @@
169 " WHERE rid IN (SELECT rid FROM shun JOIN blob USING(uuid))"
170 );
171 db_multi_exec(
172 "DELETE FROM config WHERE name IN ('remote-code', 'remote-maxid')"
173 );
174 totalSize = db_int(0, "SELECT count(*) FROM blob");
175 db_prepare(&s,
176 "SELECT rid, size FROM blob"
177 " WHERE NOT EXISTS(SELECT 1 FROM shun WHERE uuid=blob.uuid)"
178 " AND NOT EXISTS(SELECT 1 FROM delta WHERE rid=blob.rid)"
179 );
180 while( db_step(&s)==SQLITE_ROW ){
181 int rid = db_column_int(&s, 0);
182 int size = db_column_int(&s, 1);
183 if( size>=0 ){
184 Blob content;
 
 
 
 
 
185 content_get(rid, &content);
186 rebuild_step(rid, &content);
 
187 }else{
188 db_multi_exec("INSERT OR IGNORE INTO phantom VALUES(%d)", rid);
189 rebuild_step_done();
190 }
191 }
192 db_finalize(&s);
193 if( ttyOutput ){
194 printf("\n");
@@ -141,14 +215,15 @@
215 if( g.argc!=3 ){
216 usage("REPOSITORY-FILENAME");
217 }
218 db_open_repository(g.argv[2]);
219 db_begin_transaction();
220 ttyOutput = 1;
221 errCnt = rebuild_db(randomizeFlag, 1);
222 if( errCnt && !forceFlag ){
223 printf("%d errors. Rolling back changes. Use --force to force a commit.\n",
224 errCnt);
225 db_end_transaction(1);
226 }else{
227 db_end_transaction(0);
228 }
229 }
230
--- src/verify.c
+++ src/verify.c
@@ -73,10 +73,11 @@
7373
/*
7474
** This routine is called just prior to each commit operation.
7575
*/
7676
static int verify_at_commit(void){
7777
int rid;
78
+ content_clear_cache();
7879
inFinalVerify = 1;
7980
rid = bag_first(&toVerify);
8081
while( rid>0 ){
8182
verify_rid(rid);
8283
rid = bag_next(&toVerify, rid);
8384
--- src/verify.c
+++ src/verify.c
@@ -73,10 +73,11 @@
73 /*
74 ** This routine is called just prior to each commit operation.
75 */
76 static int verify_at_commit(void){
77 int rid;
 
78 inFinalVerify = 1;
79 rid = bag_first(&toVerify);
80 while( rid>0 ){
81 verify_rid(rid);
82 rid = bag_next(&toVerify, rid);
83
--- src/verify.c
+++ src/verify.c
@@ -73,10 +73,11 @@
73 /*
74 ** This routine is called just prior to each commit operation.
75 */
76 static int verify_at_commit(void){
77 int rid;
78 content_clear_cache();
79 inFinalVerify = 1;
80 rid = bag_first(&toVerify);
81 while( rid>0 ){
82 verify_rid(rid);
83 rid = bag_next(&toVerify, rid);
84

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button