Fossil SCM
Improvements to the delta generator algorthm so that it runs much faster on large files with very few similarities. There is no change to the delta format generated, so this is fully backwards and forwards compatible.
Commit
522104c2cd244485f6a952641c05777864f888f8
Parent
d63f87c0030a109…
1 file changed
+10
-12
+10
-12
| --- src/delta.c | ||
| +++ src/delta.c | ||
| @@ -221,15 +221,10 @@ | ||
| 221 | 221 | default: ; |
| 222 | 222 | } |
| 223 | 223 | return sum; |
| 224 | 224 | } |
| 225 | 225 | |
| 226 | -/* | |
| 227 | -** Maximum number of landmarks to set in the source file. | |
| 228 | -*/ | |
| 229 | -#define MX_LANDMARK (1024*128) | |
| 230 | - | |
| 231 | 226 | /* |
| 232 | 227 | ** Create a new delta. |
| 233 | 228 | ** |
| 234 | 229 | ** The delta is written into a preallocated buffer, zDelta, which |
| 235 | 230 | ** should be at least 60 bytes longer than the target file, zOut. |
| @@ -297,13 +292,14 @@ | ||
| 297 | 292 | char *zDelta /* Write the delta into this buffer */ |
| 298 | 293 | ){ |
| 299 | 294 | int i, base; |
| 300 | 295 | char *zOrigDelta = zDelta; |
| 301 | 296 | hash h; |
| 302 | - int *collide; | |
| 297 | + int nHash; /* Number of hash table entries */ | |
| 298 | + int *landmark; /* Primary hash table */ | |
| 299 | + int *collide; /* Collision chain */ | |
| 303 | 300 | int lastRead = -1; /* Last byte of zSrc read by a COPY command */ |
| 304 | - int landmark[MX_LANDMARK]; | |
| 305 | 301 | |
| 306 | 302 | /* Add the target file size to the beginning of the delta |
| 307 | 303 | */ |
| 308 | 304 | putInt(lenOut, &zDelta); |
| 309 | 305 | *(zDelta++) = '\n'; |
| @@ -323,18 +319,20 @@ | ||
| 323 | 319 | } |
| 324 | 320 | |
| 325 | 321 | /* Compute the hash table used to locate matching sections in the |
| 326 | 322 | ** source file. |
| 327 | 323 | */ |
| 328 | - collide = malloc( lenSrc*sizeof(int)/NHASH ); | |
| 324 | + nHash = lenSrc/NHASH; | |
| 325 | + collide = malloc( nHash*2*sizeof(int) ); | |
| 329 | 326 | if( collide==0 ) return -1; |
| 330 | - memset(landmark, -1, sizeof(landmark)); | |
| 331 | - memset(collide, -1, lenSrc*sizeof(int)/NHASH ); | |
| 327 | + landmark = &collide[nHash]; | |
| 328 | + memset(landmark, -1, nHash*sizeof(int)); | |
| 329 | + memset(collide, -1, nHash*sizeof(int)); | |
| 332 | 330 | for(i=0; i<lenSrc-NHASH; i+=NHASH){ |
| 333 | 331 | int hv; |
| 334 | 332 | hash_init(&h, &zSrc[i]); |
| 335 | - hv = hash_32bit(&h) & (MX_LANDMARK-1); | |
| 333 | + hv = hash_32bit(&h) % nHash; | |
| 336 | 334 | collide[i/NHASH] = landmark[hv]; |
| 337 | 335 | landmark[hv] = i/NHASH; |
| 338 | 336 | } |
| 339 | 337 | |
| 340 | 338 | /* Begin scanning the target file and generating copy commands and |
| @@ -349,11 +347,11 @@ | ||
| 349 | 347 | bestCnt = 0; |
| 350 | 348 | while( 1 ){ |
| 351 | 349 | int hv; |
| 352 | 350 | int limit = 250; |
| 353 | 351 | |
| 354 | - hv = hash_32bit(&h) & (MX_LANDMARK-1); | |
| 352 | + hv = hash_32bit(&h) % nHash; | |
| 355 | 353 | DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); ) |
| 356 | 354 | iBlock = landmark[hv]; |
| 357 | 355 | while( iBlock>=0 && (limit--)>0 ){ |
| 358 | 356 | /* |
| 359 | 357 | ** The hash window has identified a potential match against |
| 360 | 358 |
| --- src/delta.c | |
| +++ src/delta.c | |
| @@ -221,15 +221,10 @@ | |
| 221 | default: ; |
| 222 | } |
| 223 | return sum; |
| 224 | } |
| 225 | |
| 226 | /* |
| 227 | ** Maximum number of landmarks to set in the source file. |
| 228 | */ |
| 229 | #define MX_LANDMARK (1024*128) |
| 230 | |
| 231 | /* |
| 232 | ** Create a new delta. |
| 233 | ** |
| 234 | ** The delta is written into a preallocated buffer, zDelta, which |
| 235 | ** should be at least 60 bytes longer than the target file, zOut. |
| @@ -297,13 +292,14 @@ | |
| 297 | char *zDelta /* Write the delta into this buffer */ |
| 298 | ){ |
| 299 | int i, base; |
| 300 | char *zOrigDelta = zDelta; |
| 301 | hash h; |
| 302 | int *collide; |
| 303 | int lastRead = -1; /* Last byte of zSrc read by a COPY command */ |
| 304 | int landmark[MX_LANDMARK]; |
| 305 | |
| 306 | /* Add the target file size to the beginning of the delta |
| 307 | */ |
| 308 | putInt(lenOut, &zDelta); |
| 309 | *(zDelta++) = '\n'; |
| @@ -323,18 +319,20 @@ | |
| 323 | } |
| 324 | |
| 325 | /* Compute the hash table used to locate matching sections in the |
| 326 | ** source file. |
| 327 | */ |
| 328 | collide = malloc( lenSrc*sizeof(int)/NHASH ); |
| 329 | if( collide==0 ) return -1; |
| 330 | memset(landmark, -1, sizeof(landmark)); |
| 331 | memset(collide, -1, lenSrc*sizeof(int)/NHASH ); |
| 332 | for(i=0; i<lenSrc-NHASH; i+=NHASH){ |
| 333 | int hv; |
| 334 | hash_init(&h, &zSrc[i]); |
| 335 | hv = hash_32bit(&h) & (MX_LANDMARK-1); |
| 336 | collide[i/NHASH] = landmark[hv]; |
| 337 | landmark[hv] = i/NHASH; |
| 338 | } |
| 339 | |
| 340 | /* Begin scanning the target file and generating copy commands and |
| @@ -349,11 +347,11 @@ | |
| 349 | bestCnt = 0; |
| 350 | while( 1 ){ |
| 351 | int hv; |
| 352 | int limit = 250; |
| 353 | |
| 354 | hv = hash_32bit(&h) & (MX_LANDMARK-1); |
| 355 | DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); ) |
| 356 | iBlock = landmark[hv]; |
| 357 | while( iBlock>=0 && (limit--)>0 ){ |
| 358 | /* |
| 359 | ** The hash window has identified a potential match against |
| 360 |
| --- src/delta.c | |
| +++ src/delta.c | |
| @@ -221,15 +221,10 @@ | |
| 221 | default: ; |
| 222 | } |
| 223 | return sum; |
| 224 | } |
| 225 | |
| 226 | /* |
| 227 | ** Create a new delta. |
| 228 | ** |
| 229 | ** The delta is written into a preallocated buffer, zDelta, which |
| 230 | ** should be at least 60 bytes longer than the target file, zOut. |
| @@ -297,13 +292,14 @@ | |
| 292 | char *zDelta /* Write the delta into this buffer */ |
| 293 | ){ |
| 294 | int i, base; |
| 295 | char *zOrigDelta = zDelta; |
| 296 | hash h; |
| 297 | int nHash; /* Number of hash table entries */ |
| 298 | int *landmark; /* Primary hash table */ |
| 299 | int *collide; /* Collision chain */ |
| 300 | int lastRead = -1; /* Last byte of zSrc read by a COPY command */ |
| 301 | |
| 302 | /* Add the target file size to the beginning of the delta |
| 303 | */ |
| 304 | putInt(lenOut, &zDelta); |
| 305 | *(zDelta++) = '\n'; |
| @@ -323,18 +319,20 @@ | |
| 319 | } |
| 320 | |
| 321 | /* Compute the hash table used to locate matching sections in the |
| 322 | ** source file. |
| 323 | */ |
| 324 | nHash = lenSrc/NHASH; |
| 325 | collide = malloc( nHash*2*sizeof(int) ); |
| 326 | if( collide==0 ) return -1; |
| 327 | landmark = &collide[nHash]; |
| 328 | memset(landmark, -1, nHash*sizeof(int)); |
| 329 | memset(collide, -1, nHash*sizeof(int)); |
| 330 | for(i=0; i<lenSrc-NHASH; i+=NHASH){ |
| 331 | int hv; |
| 332 | hash_init(&h, &zSrc[i]); |
| 333 | hv = hash_32bit(&h) % nHash; |
| 334 | collide[i/NHASH] = landmark[hv]; |
| 335 | landmark[hv] = i/NHASH; |
| 336 | } |
| 337 | |
| 338 | /* Begin scanning the target file and generating copy commands and |
| @@ -349,11 +347,11 @@ | |
| 347 | bestCnt = 0; |
| 348 | while( 1 ){ |
| 349 | int hv; |
| 350 | int limit = 250; |
| 351 | |
| 352 | hv = hash_32bit(&h) % nHash; |
| 353 | DEBUG2( printf("LOOKING: %4d [%s]\n", base+i, print16(&zOut[base+i])); ) |
| 354 | iBlock = landmark[hv]; |
| 355 | while( iBlock>=0 && (limit--)>0 ){ |
| 356 | /* |
| 357 | ** The hash window has identified a potential match against |
| 358 |