Fossil SCM

Bug fixes in the Myers diff algorithm.

drh 2007-11-16 03:17 trunk
Commit f1b55da0ac3d3c33077b2586585a920663b33b91
1 file changed +98 -72
+98 -72
--- src/diff.c
+++ src/diff.c
@@ -51,11 +51,11 @@
5151
5252
/* Count the number of lines. Allocate space to hold
5353
** the returned array.
5454
*/
5555
for(i=0, nLine=1; z[i]; i++){
56
- if( z[i]=='\n' ) nLine++;
56
+ if( z[i]=='\n' && z[i+1]!=0 ) nLine++;
5757
}
5858
a = malloc( nLine*sizeof(a[0]) );
5959
if( a==0 ) fossil_panic("out of memory");
6060
6161
/* Fill in the array */
@@ -183,19 +183,21 @@
183183
int k, q; /* Diagonal number and distinct from (0,0) */
184184
int K, D; /* The diagonal and d for the final solution */
185185
int *R; /* Result vector */
186186
int r; /* Loop variables */
187187
int go = 1; /* Outer loop control */
188
+ int MAX; /* Largest of X and Y */
188189
189190
/* Break the two files being diffed into individual lines.
190191
** Compute hashes on each line for fast comparison.
191192
*/
192193
A = break_into_lines(blob_str(pA_Blob), &X);
193194
B = break_into_lines(blob_str(pB_Blob), &Y);
194195
195196
szM = 0;
196
- for(d=0; go; d++){
197
+ MAX = X>Y ? X : Y;
198
+ for(d=0; go && d<=MAX; d++){
197199
if( szM<d+1 ){
198200
szM += szM + 10;
199201
M = realloc(M, sizeof(M[0])*szM);
200202
if( M==0 ){
201203
fossil_panic("out of memory");
@@ -216,83 +218,106 @@
216218
}else{
217219
q = M[d-1][i-1];
218220
}
219221
x = (k + q + 1)/2;
220222
y = x - k;
221
- while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
223
+ if( x<0 || x>X || y<0 || y>Y ){
224
+ x = y = 0;
225
+ }else{
226
+ while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
227
+ }
222228
M[d][i] = x + y;
229
+ /* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */
223230
if( x==X && y==Y ){
224231
go = 0;
225232
break;
226233
}
227234
}
228235
}
229
-
230
- /* Reuse M[] as follows:
231
- **
232
- ** M[d][1] = 1 if a line is inserted or 1 if a line is deleted.
233
- ** M[d][0] = number of lines copied at this step.
234
- **
235
- */
236
- D = d - 1;
237
- K = X - Y;
238
- for(d=D, i=(K+D)/2; d>0; d--){
239
- if( i==d || M[d-1][i-1] > M[d-1][i] ){
240
- M[d][0] = M[d][i] - M[d-1][i-1] - 1;
241
- M[d][1] = 0;
242
- i--;
243
- }else{
244
- M[d][0] = M[d][i] - M[d-1][i] - 1;
245
- M[d][1] = 1;
246
- }
247
- }
248
-
249
- /* Allocate the output vector
250
- */
251
- R = malloc( sizeof(R[0])*(D+2)*3 );
252
- if( R==0 ){
253
- fossil_fatal("out of memory");
254
- }
255
-
256
- /* Populate the output vector
257
- */
258
- d = r = 0;
259
- while( d<=D ){
260
- int n;
261
- R[r++] = M[d++][0]/2; /* COPY */
262
- if( d>D ){
263
- R[r++] = 0;
264
- R[r++] = 0;
265
- break;
266
- }
267
- if( M[d][1]==0 ){
268
- n = 1;
269
- while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
270
- d++;
271
- n++;
272
- }
273
- R[r++] = n; /* DELETE */
274
- if( d==D || M[d][0]>0 ){
275
- R[r++] = 0; /* INSERT */
276
- continue;
277
- }
278
- d++;
279
- }else{
280
- R[r++] = 0; /* DELETE */
281
- }
282
- assert( M[d][1]==1 );
283
- n = 1;
284
- while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
285
- d++;
286
- n++;
287
- }
288
- R[r++] = n; /* INSERT */
289
- }
290
- R[r++] = 0;
291
- R[r++] = 0;
292
- R[r++] = 0;
293
-
236
+ if( d>MAX ){
237
+ R = malloc( sizeof(R[0])*6 );
238
+ R[0] = 0;
239
+ R[1] = X;
240
+ R[2] = Y;
241
+ R[3] = 0;
242
+ R[4] = 0;
243
+ R[5] = 0;
244
+ }else{
245
+ /* Reuse M[] as follows:
246
+ **
247
+ ** M[d][1] = 1 if a line is inserted or 0 if a line is deleted.
248
+ ** M[d][0] = number of lines copied after the ins or del above.
249
+ **
250
+ */
251
+ D = d - 1;
252
+ K = X - Y;
253
+ for(d=D, i=(K+D)/2; d>0; d--){
254
+ /* printf("d=%d i=%d\n", d, i); */
255
+ if( i==d || (i>0 && M[d-1][i-1] > M[d-1][i]) ){
256
+ M[d][0] = M[d][i] - M[d-1][i-1] - 1;
257
+ M[d][1] = 0;
258
+ i--;
259
+ }else{
260
+ M[d][0] = M[d][i] - M[d-1][i] - 1;
261
+ M[d][1] = 1;
262
+ }
263
+ }
264
+
265
+
266
+ /*
267
+ printf("---------------\nM[0][0] = %5d\n", M[0][0]);
268
+ for(d=1; d<=D; d++){
269
+ printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
270
+ }
271
+ */
272
+
273
+ /* Allocate the output vector
274
+ */
275
+ R = malloc( sizeof(R[0])*(D+2)*3 );
276
+ if( R==0 ){
277
+ fossil_fatal("out of memory");
278
+ }
279
+
280
+ /* Populate the output vector
281
+ */
282
+ d = r = 0;
283
+ while( d<=D ){
284
+ int n;
285
+ R[r++] = M[d++][0]/2; /* COPY */
286
+ if( d>D ){
287
+ R[r++] = 0;
288
+ R[r++] = 0;
289
+ break;
290
+ }
291
+ if( M[d][1]==0 ){
292
+ n = 1;
293
+ while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
294
+ d++;
295
+ n++;
296
+ }
297
+ R[r++] = n; /* DELETE */
298
+ if( d==D || M[d][0]>0 ){
299
+ R[r++] = 0; /* INSERT */
300
+ continue;
301
+ }
302
+ d++;
303
+ }else{
304
+ R[r++] = 0; /* DELETE */
305
+ }
306
+ assert( M[d][1]==1 );
307
+ n = 1;
308
+ while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
309
+ d++;
310
+ n++;
311
+ }
312
+ R[r++] = n; /* INSERT */
313
+ }
314
+ R[r++] = 0;
315
+ R[r++] = 0;
316
+ R[r++] = 0;
317
+ }
318
+
294319
/* Free the Myers matrix */
295320
for(d=0; d<=D; d++){
296321
free(M[d]);
297322
}
298323
free(M);
@@ -307,13 +332,14 @@
307332
int na, nb; /* Number of lines shown from A and B */
308333
int i, j; /* Loop counters */
309334
int m; /* Number of lines to output */
310335
int skip; /* Number of lines to skip */
311336
312
- for(r=0; R[r+3]; r += 3*nr){
337
+ for(r=0; R[r+1] || R[r+2] || R[r+3]; r += 3*nr){
313338
/* Figure out how many triples to show in a single block */
314339
for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
340
+ /* printf("r=%d nr=%d\n", r, nr); */
315341
316342
/* For the current block comprising nr triples, figure out
317343
** how many lines of A and B are to be displayed
318344
*/
319345
if( R[r]>nContext ){
@@ -325,16 +351,16 @@
325351
}
326352
for(i=0; i<nr; i++){
327353
na += R[r+i*3+1];
328354
nb += R[r+i*3+2];
329355
}
330
- if( R[r+i*3]>nContext ){
356
+ if( R[r+nr*3]>nContext ){
331357
na += nContext;
332358
nb += nContext;
333359
}else{
334
- na += R[r+i*3];
335
- nb += R[r+i*3];
360
+ na += R[r+nr*3];
361
+ nb += R[r+nr*3];
336362
}
337363
for(i=1; i<nr; i++){
338364
na += R[r+i*3];
339365
nb += R[r+i*3];
340366
}
341367
--- src/diff.c
+++ src/diff.c
@@ -51,11 +51,11 @@
51
52 /* Count the number of lines. Allocate space to hold
53 ** the returned array.
54 */
55 for(i=0, nLine=1; z[i]; i++){
56 if( z[i]=='\n' ) nLine++;
57 }
58 a = malloc( nLine*sizeof(a[0]) );
59 if( a==0 ) fossil_panic("out of memory");
60
61 /* Fill in the array */
@@ -183,19 +183,21 @@
183 int k, q; /* Diagonal number and distinct from (0,0) */
184 int K, D; /* The diagonal and d for the final solution */
185 int *R; /* Result vector */
186 int r; /* Loop variables */
187 int go = 1; /* Outer loop control */
 
188
189 /* Break the two files being diffed into individual lines.
190 ** Compute hashes on each line for fast comparison.
191 */
192 A = break_into_lines(blob_str(pA_Blob), &X);
193 B = break_into_lines(blob_str(pB_Blob), &Y);
194
195 szM = 0;
196 for(d=0; go; d++){
 
197 if( szM<d+1 ){
198 szM += szM + 10;
199 M = realloc(M, sizeof(M[0])*szM);
200 if( M==0 ){
201 fossil_panic("out of memory");
@@ -216,83 +218,106 @@
216 }else{
217 q = M[d-1][i-1];
218 }
219 x = (k + q + 1)/2;
220 y = x - k;
221 while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
 
 
 
 
222 M[d][i] = x + y;
 
223 if( x==X && y==Y ){
224 go = 0;
225 break;
226 }
227 }
228 }
229
230 /* Reuse M[] as follows:
231 **
232 ** M[d][1] = 1 if a line is inserted or 1 if a line is deleted.
233 ** M[d][0] = number of lines copied at this step.
234 **
235 */
236 D = d - 1;
237 K = X - Y;
238 for(d=D, i=(K+D)/2; d>0; d--){
239 if( i==d || M[d-1][i-1] > M[d-1][i] ){
240 M[d][0] = M[d][i] - M[d-1][i-1] - 1;
241 M[d][1] = 0;
242 i--;
243 }else{
244 M[d][0] = M[d][i] - M[d-1][i] - 1;
245 M[d][1] = 1;
246 }
247 }
248
249 /* Allocate the output vector
250 */
251 R = malloc( sizeof(R[0])*(D+2)*3 );
252 if( R==0 ){
253 fossil_fatal("out of memory");
254 }
255
256 /* Populate the output vector
257 */
258 d = r = 0;
259 while( d<=D ){
260 int n;
261 R[r++] = M[d++][0]/2; /* COPY */
262 if( d>D ){
263 R[r++] = 0;
264 R[r++] = 0;
265 break;
266 }
267 if( M[d][1]==0 ){
268 n = 1;
269 while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
270 d++;
271 n++;
272 }
273 R[r++] = n; /* DELETE */
274 if( d==D || M[d][0]>0 ){
275 R[r++] = 0; /* INSERT */
276 continue;
277 }
278 d++;
279 }else{
280 R[r++] = 0; /* DELETE */
281 }
282 assert( M[d][1]==1 );
283 n = 1;
284 while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
285 d++;
286 n++;
287 }
288 R[r++] = n; /* INSERT */
289 }
290 R[r++] = 0;
291 R[r++] = 0;
292 R[r++] = 0;
293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294 /* Free the Myers matrix */
295 for(d=0; d<=D; d++){
296 free(M[d]);
297 }
298 free(M);
@@ -307,13 +332,14 @@
307 int na, nb; /* Number of lines shown from A and B */
308 int i, j; /* Loop counters */
309 int m; /* Number of lines to output */
310 int skip; /* Number of lines to skip */
311
312 for(r=0; R[r+3]; r += 3*nr){
313 /* Figure out how many triples to show in a single block */
314 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
 
315
316 /* For the current block comprising nr triples, figure out
317 ** how many lines of A and B are to be displayed
318 */
319 if( R[r]>nContext ){
@@ -325,16 +351,16 @@
325 }
326 for(i=0; i<nr; i++){
327 na += R[r+i*3+1];
328 nb += R[r+i*3+2];
329 }
330 if( R[r+i*3]>nContext ){
331 na += nContext;
332 nb += nContext;
333 }else{
334 na += R[r+i*3];
335 nb += R[r+i*3];
336 }
337 for(i=1; i<nr; i++){
338 na += R[r+i*3];
339 nb += R[r+i*3];
340 }
341
--- src/diff.c
+++ src/diff.c
@@ -51,11 +51,11 @@
51
52 /* Count the number of lines. Allocate space to hold
53 ** the returned array.
54 */
55 for(i=0, nLine=1; z[i]; i++){
56 if( z[i]=='\n' && z[i+1]!=0 ) nLine++;
57 }
58 a = malloc( nLine*sizeof(a[0]) );
59 if( a==0 ) fossil_panic("out of memory");
60
61 /* Fill in the array */
@@ -183,19 +183,21 @@
183 int k, q; /* Diagonal number and distinct from (0,0) */
184 int K, D; /* The diagonal and d for the final solution */
185 int *R; /* Result vector */
186 int r; /* Loop variables */
187 int go = 1; /* Outer loop control */
188 int MAX; /* Largest of X and Y */
189
190 /* Break the two files being diffed into individual lines.
191 ** Compute hashes on each line for fast comparison.
192 */
193 A = break_into_lines(blob_str(pA_Blob), &X);
194 B = break_into_lines(blob_str(pB_Blob), &Y);
195
196 szM = 0;
197 MAX = X>Y ? X : Y;
198 for(d=0; go && d<=MAX; d++){
199 if( szM<d+1 ){
200 szM += szM + 10;
201 M = realloc(M, sizeof(M[0])*szM);
202 if( M==0 ){
203 fossil_panic("out of memory");
@@ -216,83 +218,106 @@
218 }else{
219 q = M[d-1][i-1];
220 }
221 x = (k + q + 1)/2;
222 y = x - k;
223 if( x<0 || x>X || y<0 || y>Y ){
224 x = y = 0;
225 }else{
226 while( x<X && y<Y && same_dline(&A[x],&B[y]) ){ x++; y++; }
227 }
228 M[d][i] = x + y;
229 /* printf("M[%d][%i] = %d k=%d (%d,%d)\n", d, i, x+y, k, x, y); */
230 if( x==X && y==Y ){
231 go = 0;
232 break;
233 }
234 }
235 }
236 if( d>MAX ){
237 R = malloc( sizeof(R[0])*6 );
238 R[0] = 0;
239 R[1] = X;
240 R[2] = Y;
241 R[3] = 0;
242 R[4] = 0;
243 R[5] = 0;
244 }else{
245 /* Reuse M[] as follows:
246 **
247 ** M[d][1] = 1 if a line is inserted or 0 if a line is deleted.
248 ** M[d][0] = number of lines copied after the ins or del above.
249 **
250 */
251 D = d - 1;
252 K = X - Y;
253 for(d=D, i=(K+D)/2; d>0; d--){
254 /* printf("d=%d i=%d\n", d, i); */
255 if( i==d || (i>0 && M[d-1][i-1] > M[d-1][i]) ){
256 M[d][0] = M[d][i] - M[d-1][i-1] - 1;
257 M[d][1] = 0;
258 i--;
259 }else{
260 M[d][0] = M[d][i] - M[d-1][i] - 1;
261 M[d][1] = 1;
262 }
263 }
264
265
266 /*
267 printf("---------------\nM[0][0] = %5d\n", M[0][0]);
268 for(d=1; d<=D; d++){
269 printf("M[%d][0] = %5d M[%d][1] = %d\n",d,M[d][0],d,M[d][1]);
270 }
271 */
272
273 /* Allocate the output vector
274 */
275 R = malloc( sizeof(R[0])*(D+2)*3 );
276 if( R==0 ){
277 fossil_fatal("out of memory");
278 }
279
280 /* Populate the output vector
281 */
282 d = r = 0;
283 while( d<=D ){
284 int n;
285 R[r++] = M[d++][0]/2; /* COPY */
286 if( d>D ){
287 R[r++] = 0;
288 R[r++] = 0;
289 break;
290 }
291 if( M[d][1]==0 ){
292 n = 1;
293 while( M[d][0]==0 && d<D && M[d+1][1]==0 ){
294 d++;
295 n++;
296 }
297 R[r++] = n; /* DELETE */
298 if( d==D || M[d][0]>0 ){
299 R[r++] = 0; /* INSERT */
300 continue;
301 }
302 d++;
303 }else{
304 R[r++] = 0; /* DELETE */
305 }
306 assert( M[d][1]==1 );
307 n = 1;
308 while( M[d][0]==0 && d<D && M[d+1][1]==1 ){
309 d++;
310 n++;
311 }
312 R[r++] = n; /* INSERT */
313 }
314 R[r++] = 0;
315 R[r++] = 0;
316 R[r++] = 0;
317 }
318
319 /* Free the Myers matrix */
320 for(d=0; d<=D; d++){
321 free(M[d]);
322 }
323 free(M);
@@ -307,13 +332,14 @@
332 int na, nb; /* Number of lines shown from A and B */
333 int i, j; /* Loop counters */
334 int m; /* Number of lines to output */
335 int skip; /* Number of lines to skip */
336
337 for(r=0; R[r+1] || R[r+2] || R[r+3]; r += 3*nr){
338 /* Figure out how many triples to show in a single block */
339 for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
340 /* printf("r=%d nr=%d\n", r, nr); */
341
342 /* For the current block comprising nr triples, figure out
343 ** how many lines of A and B are to be displayed
344 */
345 if( R[r]>nContext ){
@@ -325,16 +351,16 @@
351 }
352 for(i=0; i<nr; i++){
353 na += R[r+i*3+1];
354 nb += R[r+i*3+2];
355 }
356 if( R[r+nr*3]>nContext ){
357 na += nContext;
358 nb += nContext;
359 }else{
360 na += R[r+nr*3];
361 nb += R[r+nr*3];
362 }
363 for(i=1; i<nr; i++){
364 na += R[r+i*3];
365 nb += R[r+i*3];
366 }
367

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button