Fossil SCM

Improvements to the delta generator algorthm so that it runs much faster on large files with very few similarities. There is no change to the delta format generated, so this is fully backwards and forwards compatible.

drh 2009-03-24 22:03 trunk
Commit 522104c2cd244485f6a952641c05777864f888f8
1 file changed +10 -12
+10 -12
--- src/delta.c
+++ src/delta.c
@@ -221,15 +221,10 @@
221221
default: ;
222222
}
223223
return sum;
224224
}
225225
226
-/*
227
-** Maximum number of landmarks to set in the source file.
228
-*/
229
-#define MX_LANDMARK (1024*128)
230
-
231226
/*
232227
** Create a new delta.
233228
**
234229
** The delta is written into a preallocated buffer, zDelta, which
235230
** should be at least 60 bytes longer than the target file, zOut.
@@ -297,13 +292,14 @@
297292
char *zDelta /* Write the delta into this buffer */
298293
){
299294
int i, base;
300295
char *zOrigDelta = zDelta;
301296
hash h;
302
- int *collide;
297
+ int nHash; /* Number of hash table entries */
298
+ int *landmark; /* Primary hash table */
299
+ int *collide; /* Collision chain */
303300
int lastRead = -1; /* Last byte of zSrc read by a COPY command */
304
- int landmark[MX_LANDMARK];
305301
306302
/* Add the target file size to the beginning of the delta
307303
*/
308304
putInt(lenOut, &zDelta);
309305
*(zDelta++) = '\n';
@@ -323,18 +319,20 @@
323319
}
324320
325321
/* Compute the hash table used to locate matching sections in the
326322
** source file.
327323
*/
328
- collide = malloc( lenSrc*sizeof(int)/NHASH );
324
+ nHash = lenSrc/NHASH;
325
+ collide = malloc( nHash*2*sizeof(int) );
329326
if( collide==0 ) return -1;
330
- memset(landmark, -1, sizeof(landmark));
331
- memset(collide, -1, lenSrc*sizeof(int)/NHASH );
327
+ landmark = &collide[nHash];
328
+ memset(landmark, -1, nHash*sizeof(int));
329
+ memset(collide, -1, nHash*sizeof(int));
332330
for(i=0; i<lenSrc-NHASH; i+=NHASH){
333331
int hv;
334332
hash_init(&h, &zSrc[i]);
335
- hv = hash_32bit(&h) & (MX_LANDMARK-1);
333
+ hv = hash_32bit(&h) % nHash;
336334
collide[i/NHASH] = landmark[hv];
337335
landmark[hv] = i/NHASH;
338336
}
339337
340338
/* Begin scanning the target file and generating copy commands and
@@ -349,11 +347,11 @@
349347
bestCnt = 0;
350348
while( 1 ){
351349
int hv;
352350
int limit = 250;
353351
354
- hv = hash_32bit(&h) & (MX_LANDMARK-1);
352
+ hv = hash_32bit(&h) % nHash;
355353
DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
356354
iBlock = landmark[hv];
357355
while( iBlock>=0 && (limit--)>0 ){
358356
/*
359357
** The hash window has identified a potential match against
360358
--- src/delta.c
+++ src/delta.c
@@ -221,15 +221,10 @@
221 default: ;
222 }
223 return sum;
224 }
225
226 /*
227 ** Maximum number of landmarks to set in the source file.
228 */
229 #define MX_LANDMARK (1024*128)
230
231 /*
232 ** Create a new delta.
233 **
234 ** The delta is written into a preallocated buffer, zDelta, which
235 ** should be at least 60 bytes longer than the target file, zOut.
@@ -297,13 +292,14 @@
297 char *zDelta /* Write the delta into this buffer */
298 ){
299 int i, base;
300 char *zOrigDelta = zDelta;
301 hash h;
302 int *collide;
 
 
303 int lastRead = -1; /* Last byte of zSrc read by a COPY command */
304 int landmark[MX_LANDMARK];
305
306 /* Add the target file size to the beginning of the delta
307 */
308 putInt(lenOut, &zDelta);
309 *(zDelta++) = '\n';
@@ -323,18 +319,20 @@
323 }
324
325 /* Compute the hash table used to locate matching sections in the
326 ** source file.
327 */
328 collide = malloc( lenSrc*sizeof(int)/NHASH );
 
329 if( collide==0 ) return -1;
330 memset(landmark, -1, sizeof(landmark));
331 memset(collide, -1, lenSrc*sizeof(int)/NHASH );
 
332 for(i=0; i<lenSrc-NHASH; i+=NHASH){
333 int hv;
334 hash_init(&h, &zSrc[i]);
335 hv = hash_32bit(&h) & (MX_LANDMARK-1);
336 collide[i/NHASH] = landmark[hv];
337 landmark[hv] = i/NHASH;
338 }
339
340 /* Begin scanning the target file and generating copy commands and
@@ -349,11 +347,11 @@
349 bestCnt = 0;
350 while( 1 ){
351 int hv;
352 int limit = 250;
353
354 hv = hash_32bit(&h) & (MX_LANDMARK-1);
355 DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
356 iBlock = landmark[hv];
357 while( iBlock>=0 && (limit--)>0 ){
358 /*
359 ** The hash window has identified a potential match against
360
--- src/delta.c
+++ src/delta.c
@@ -221,15 +221,10 @@
221 default: ;
222 }
223 return sum;
224 }
225
 
 
 
 
 
226 /*
227 ** Create a new delta.
228 **
229 ** The delta is written into a preallocated buffer, zDelta, which
230 ** should be at least 60 bytes longer than the target file, zOut.
@@ -297,13 +292,14 @@
292 char *zDelta /* Write the delta into this buffer */
293 ){
294 int i, base;
295 char *zOrigDelta = zDelta;
296 hash h;
297 int nHash; /* Number of hash table entries */
298 int *landmark; /* Primary hash table */
299 int *collide; /* Collision chain */
300 int lastRead = -1; /* Last byte of zSrc read by a COPY command */
 
301
302 /* Add the target file size to the beginning of the delta
303 */
304 putInt(lenOut, &zDelta);
305 *(zDelta++) = '\n';
@@ -323,18 +319,20 @@
319 }
320
321 /* Compute the hash table used to locate matching sections in the
322 ** source file.
323 */
324 nHash = lenSrc/NHASH;
325 collide = malloc( nHash*2*sizeof(int) );
326 if( collide==0 ) return -1;
327 landmark = &collide[nHash];
328 memset(landmark, -1, nHash*sizeof(int));
329 memset(collide, -1, nHash*sizeof(int));
330 for(i=0; i<lenSrc-NHASH; i+=NHASH){
331 int hv;
332 hash_init(&h, &zSrc[i]);
333 hv = hash_32bit(&h) % nHash;
334 collide[i/NHASH] = landmark[hv];
335 landmark[hv] = i/NHASH;
336 }
337
338 /* Begin scanning the target file and generating copy commands and
@@ -349,11 +347,11 @@
347 bestCnt = 0;
348 while( 1 ){
349 int hv;
350 int limit = 250;
351
352 hv = hash_32bit(&h) % nHash;
353 DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); )
354 iBlock = landmark[hv];
355 while( iBlock>=0 && (limit--)>0 ){
356 /*
357 ** The hash window has identified a potential match against
358

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button