Fossil SCM

If the left/right alignment in side-by-side diff becomes too busy and hard for a human to read, then show it simplified: as inserting one side and then deleting the other.

drh 2012-12-15 00:59 UTC trunk
Commit 52db049b8922fae36577305a4ab8e21129c2f553
2 files changed +35 -12 +7
+35 -12
--- src/diff.c
+++ src/diff.c
@@ -1043,13 +1043,15 @@
10431043
**
10441044
** The return value is a buffer of unsigned characters, obtained from
10451045
** fossil_malloc(). (The caller needs to free the return value using
10461046
** fossil_free().) Entries in the returned array have values as follows:
10471047
**
1048
-** 1. Delete the next line of pLeft.
1049
-** 2. The next line of pLeft changes into the next line of pRight.
1050
-** 3. Insert the next line of pRight.
1048
+** 1. Delete the next line of pLeft.
1049
+** 2. Insert the next line of pRight.
1050
+** 3... The next line of pLeft changes into the next line of pRight.
1051
+**
1052
+** Values larger than three indicate better matches.
10511053
**
10521054
** The length of the returned array will be just large enough to cause
10531055
** all elements of pLeft and pRight to be consumed.
10541056
**
10551057
** Algorithm: Wagner's minimum edit-distance algorithm, modified by
@@ -1064,15 +1066,17 @@
10641066
){
10651067
int i, j, k; /* Loop counters */
10661068
int *a; /* One row of the Wagner matrix */
10671069
int *pToFree; /* Space that needs to be freed */
10681070
unsigned char *aM; /* Wagner result matrix */
1071
+ int nMatch, iMatch; /* Number of matching lines and match score */
1072
+ int mnLen; /* MIN(nLeft, nRight) */
10691073
int aBuf[100]; /* Stack space for a[] if nRight not to big */
10701074
10711075
aM = fossil_malloc( (nLeft+1)*(nRight+1) );
10721076
if( nLeft==0 ){
1073
- memset(aM, 3, nRight);
1077
+ memset(aM, 2, nRight);
10741078
return aM;
10751079
}
10761080
if( nRight==0 ){
10771081
memset(aM, 1, nLeft);
10781082
return aM;
@@ -1079,11 +1083,11 @@
10791083
}
10801084
10811085
/* This algorithm is O(N**2). So if N is too big, bail out with a
10821086
** simple (but stupid and ugly) result that doesn't take too long. */
10831087
if( nLeft*nRight>100000 ){
1084
- memset(aM, 3, nRight);
1088
+ memset(aM, 2, nRight);
10851089
memset(aM+nRight, 1, nLeft);
10861090
return aM;
10871091
}
10881092
10891093
if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){
@@ -1093,30 +1097,30 @@
10931097
a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
10941098
}
10951099
10961100
/* Compute the best alignment */
10971101
for(i=0; i<=nRight; i++){
1098
- aM[i] = 3;
1102
+ aM[i] = 2;
10991103
a[i] = i*50;
11001104
}
11011105
aM[0] = 0;
11021106
for(j=1; j<=nLeft; j++){
11031107
int p = a[0];
11041108
a[0] = p+50;
11051109
aM[j*(nRight+1)] = 1;
11061110
for(i=1; i<=nRight; i++){
11071111
int m = a[i-1]+50;
1108
- int d = 3;
1112
+ int d = 2;
11091113
if( m>a[i]+50 ){
11101114
m = a[i]+50;
11111115
d = 1;
11121116
}
11131117
if( m>p ){
11141118
int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1115
- if( (score<66 || (i<j+1 && i>j-1)) && m>p+score ){
1119
+ if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
11161120
m = p+score;
1117
- d = 2;
1121
+ d = 3 | score*4;
11181122
}
11191123
}
11201124
p = a[i];
11211125
a[i] = m;
11221126
aM[j*(nRight+1)+i] = d;
@@ -1125,17 +1129,20 @@
11251129
11261130
/* Compute the lowest-cost path back through the matrix */
11271131
i = nRight;
11281132
j = nLeft;
11291133
k = (nRight+1)*(nLeft+1)-1;
1134
+ nMatch = iMatch = 0;
11301135
while( i+j>0 ){
11311136
unsigned char c = aM[k--];
1132
- if( c==2 ){
1137
+ if( c>=3 ){
11331138
assert( i>0 && j>0 );
11341139
i--;
11351140
j--;
1136
- }else if( c==3 ){
1141
+ nMatch++;
1142
+ iMatch += (c>>2);
1143
+ }else if( c==2 ){
11371144
assert( i>0 );
11381145
i--;
11391146
}else{
11401147
assert( j>0 );
11411148
j--;
@@ -1143,10 +1150,26 @@
11431150
aM[k] = aM[j*(nRight+1)+i];
11441151
}
11451152
k++;
11461153
i = (nRight+1)*(nLeft+1) - k;
11471154
memmove(aM, &aM[k], i);
1155
+
1156
+ /* If:
1157
+ ** (1) the alignment is more than 25% longer than the longest side, and
1158
+ ** (2) the average match cost exceeds 15
1159
+ ** Then this is probably an alignment that will be difficult for humans
1160
+ ** to read. So instead, just show all of the right side inserted followed
1161
+ ** by all of the left side deleted.
1162
+ **
1163
+ ** The coefficients for conditions (1) and (2) above are determined by
1164
+ ** experimentation.
1165
+ */
1166
+ mnLen = nLeft>nRight ? nLeft : nRight;
1167
+ if( i*4>mnLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1168
+ memset(aM, 2, nRight);
1169
+ memset(aM+nRight, 1, nLeft);
1170
+ }
11481171
11491172
/* Return the result */
11501173
fossil_free(pToFree);
11511174
return aM;
11521175
}
@@ -1294,11 +1317,11 @@
12941317
}
12951318
blob_append(pOut, s.zLine, s.n);
12961319
assert( ma>0 );
12971320
ma--;
12981321
a++;
1299
- }else if( alignment[j]==2 ){
1322
+ }else if( alignment[j]>=3 ){
13001323
s.n = 0;
13011324
sbsWriteLineChange(&s, &A[a], a, &B[b], b);
13021325
blob_append(pOut, s.zLine, s.n);
13031326
assert( ma>0 && mb>0 );
13041327
ma--;
13051328
--- src/diff.c
+++ src/diff.c
@@ -1043,13 +1043,15 @@
1043 **
1044 ** The return value is a buffer of unsigned characters, obtained from
1045 ** fossil_malloc(). (The caller needs to free the return value using
1046 ** fossil_free().) Entries in the returned array have values as follows:
1047 **
1048 ** 1. Delete the next line of pLeft.
1049 ** 2. The next line of pLeft changes into the next line of pRight.
1050 ** 3. Insert the next line of pRight.
 
 
1051 **
1052 ** The length of the returned array will be just large enough to cause
1053 ** all elements of pLeft and pRight to be consumed.
1054 **
1055 ** Algorithm: Wagner's minimum edit-distance algorithm, modified by
@@ -1064,15 +1066,17 @@
1064 ){
1065 int i, j, k; /* Loop counters */
1066 int *a; /* One row of the Wagner matrix */
1067 int *pToFree; /* Space that needs to be freed */
1068 unsigned char *aM; /* Wagner result matrix */
 
 
1069 int aBuf[100]; /* Stack space for a[] if nRight not to big */
1070
1071 aM = fossil_malloc( (nLeft+1)*(nRight+1) );
1072 if( nLeft==0 ){
1073 memset(aM, 3, nRight);
1074 return aM;
1075 }
1076 if( nRight==0 ){
1077 memset(aM, 1, nLeft);
1078 return aM;
@@ -1079,11 +1083,11 @@
1079 }
1080
1081 /* This algorithm is O(N**2). So if N is too big, bail out with a
1082 ** simple (but stupid and ugly) result that doesn't take too long. */
1083 if( nLeft*nRight>100000 ){
1084 memset(aM, 3, nRight);
1085 memset(aM+nRight, 1, nLeft);
1086 return aM;
1087 }
1088
1089 if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){
@@ -1093,30 +1097,30 @@
1093 a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
1094 }
1095
1096 /* Compute the best alignment */
1097 for(i=0; i<=nRight; i++){
1098 aM[i] = 3;
1099 a[i] = i*50;
1100 }
1101 aM[0] = 0;
1102 for(j=1; j<=nLeft; j++){
1103 int p = a[0];
1104 a[0] = p+50;
1105 aM[j*(nRight+1)] = 1;
1106 for(i=1; i<=nRight; i++){
1107 int m = a[i-1]+50;
1108 int d = 3;
1109 if( m>a[i]+50 ){
1110 m = a[i]+50;
1111 d = 1;
1112 }
1113 if( m>p ){
1114 int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1115 if( (score<66 || (i<j+1 && i>j-1)) && m>p+score ){
1116 m = p+score;
1117 d = 2;
1118 }
1119 }
1120 p = a[i];
1121 a[i] = m;
1122 aM[j*(nRight+1)+i] = d;
@@ -1125,17 +1129,20 @@
1125
1126 /* Compute the lowest-cost path back through the matrix */
1127 i = nRight;
1128 j = nLeft;
1129 k = (nRight+1)*(nLeft+1)-1;
 
1130 while( i+j>0 ){
1131 unsigned char c = aM[k--];
1132 if( c==2 ){
1133 assert( i>0 && j>0 );
1134 i--;
1135 j--;
1136 }else if( c==3 ){
 
 
1137 assert( i>0 );
1138 i--;
1139 }else{
1140 assert( j>0 );
1141 j--;
@@ -1143,10 +1150,26 @@
1143 aM[k] = aM[j*(nRight+1)+i];
1144 }
1145 k++;
1146 i = (nRight+1)*(nLeft+1) - k;
1147 memmove(aM, &aM[k], i);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
1149 /* Return the result */
1150 fossil_free(pToFree);
1151 return aM;
1152 }
@@ -1294,11 +1317,11 @@
1294 }
1295 blob_append(pOut, s.zLine, s.n);
1296 assert( ma>0 );
1297 ma--;
1298 a++;
1299 }else if( alignment[j]==2 ){
1300 s.n = 0;
1301 sbsWriteLineChange(&s, &A[a], a, &B[b], b);
1302 blob_append(pOut, s.zLine, s.n);
1303 assert( ma>0 && mb>0 );
1304 ma--;
1305
--- src/diff.c
+++ src/diff.c
@@ -1043,13 +1043,15 @@
1043 **
1044 ** The return value is a buffer of unsigned characters, obtained from
1045 ** fossil_malloc(). (The caller needs to free the return value using
1046 ** fossil_free().) Entries in the returned array have values as follows:
1047 **
1048 ** 1. Delete the next line of pLeft.
1049 ** 2. Insert the next line of pRight.
1050 ** 3... The next line of pLeft changes into the next line of pRight.
1051 **
1052 ** Values larger than three indicate better matches.
1053 **
1054 ** The length of the returned array will be just large enough to cause
1055 ** all elements of pLeft and pRight to be consumed.
1056 **
1057 ** Algorithm: Wagner's minimum edit-distance algorithm, modified by
@@ -1064,15 +1066,17 @@
1066 ){
1067 int i, j, k; /* Loop counters */
1068 int *a; /* One row of the Wagner matrix */
1069 int *pToFree; /* Space that needs to be freed */
1070 unsigned char *aM; /* Wagner result matrix */
1071 int nMatch, iMatch; /* Number of matching lines and match score */
1072 int mnLen; /* MIN(nLeft, nRight) */
1073 int aBuf[100]; /* Stack space for a[] if nRight not to big */
1074
1075 aM = fossil_malloc( (nLeft+1)*(nRight+1) );
1076 if( nLeft==0 ){
1077 memset(aM, 2, nRight);
1078 return aM;
1079 }
1080 if( nRight==0 ){
1081 memset(aM, 1, nLeft);
1082 return aM;
@@ -1079,11 +1083,11 @@
1083 }
1084
1085 /* This algorithm is O(N**2). So if N is too big, bail out with a
1086 ** simple (but stupid and ugly) result that doesn't take too long. */
1087 if( nLeft*nRight>100000 ){
1088 memset(aM, 2, nRight);
1089 memset(aM+nRight, 1, nLeft);
1090 return aM;
1091 }
1092
1093 if( nRight < (sizeof(aBuf)/sizeof(aBuf[0]))-1 ){
@@ -1093,30 +1097,30 @@
1097 a = pToFree = fossil_malloc( sizeof(a[0])*(nRight+1) );
1098 }
1099
1100 /* Compute the best alignment */
1101 for(i=0; i<=nRight; i++){
1102 aM[i] = 2;
1103 a[i] = i*50;
1104 }
1105 aM[0] = 0;
1106 for(j=1; j<=nLeft; j++){
1107 int p = a[0];
1108 a[0] = p+50;
1109 aM[j*(nRight+1)] = 1;
1110 for(i=1; i<=nRight; i++){
1111 int m = a[i-1]+50;
1112 int d = 2;
1113 if( m>a[i]+50 ){
1114 m = a[i]+50;
1115 d = 1;
1116 }
1117 if( m>p ){
1118 int score = match_dline(&aLeft[j-1], &aRight[i-1]);
1119 if( (score<=63 || (i<j+1 && i>j-1)) && m>p+score ){
1120 m = p+score;
1121 d = 3 | score*4;
1122 }
1123 }
1124 p = a[i];
1125 a[i] = m;
1126 aM[j*(nRight+1)+i] = d;
@@ -1125,17 +1129,20 @@
1129
1130 /* Compute the lowest-cost path back through the matrix */
1131 i = nRight;
1132 j = nLeft;
1133 k = (nRight+1)*(nLeft+1)-1;
1134 nMatch = iMatch = 0;
1135 while( i+j>0 ){
1136 unsigned char c = aM[k--];
1137 if( c>=3 ){
1138 assert( i>0 && j>0 );
1139 i--;
1140 j--;
1141 nMatch++;
1142 iMatch += (c>>2);
1143 }else if( c==2 ){
1144 assert( i>0 );
1145 i--;
1146 }else{
1147 assert( j>0 );
1148 j--;
@@ -1143,10 +1150,26 @@
1150 aM[k] = aM[j*(nRight+1)+i];
1151 }
1152 k++;
1153 i = (nRight+1)*(nLeft+1) - k;
1154 memmove(aM, &aM[k], i);
1155
1156 /* If:
1157 ** (1) the alignment is more than 25% longer than the longest side, and
1158 ** (2) the average match cost exceeds 15
1159 ** Then this is probably an alignment that will be difficult for humans
1160 ** to read. So instead, just show all of the right side inserted followed
1161 ** by all of the left side deleted.
1162 **
1163 ** The coefficients for conditions (1) and (2) above are determined by
1164 ** experimentation.
1165 */
1166 mnLen = nLeft>nRight ? nLeft : nRight;
1167 if( i*4>mnLen*5 && (nMatch==0 || iMatch/nMatch>15) ){
1168 memset(aM, 2, nRight);
1169 memset(aM+nRight, 1, nLeft);
1170 }
1171
1172 /* Return the result */
1173 fossil_free(pToFree);
1174 return aM;
1175 }
@@ -1294,11 +1317,11 @@
1317 }
1318 blob_append(pOut, s.zLine, s.n);
1319 assert( ma>0 );
1320 ma--;
1321 a++;
1322 }else if( alignment[j]>=3 ){
1323 s.n = 0;
1324 sbsWriteLineChange(&s, &A[a], a, &B[b], b);
1325 blob_append(pOut, s.zLine, s.n);
1326 assert( ma>0 && mb>0 );
1327 ma--;
1328
--- test/diff-test-1.wiki
+++ test/diff-test-1.wiki
@@ -23,5 +23,12 @@
2323
2424
External:
2525
2626
* <a href="http://www.sqlite.org/src/fdiff?v1=aafcb21a74e41f9a&v2=a6d127dd05daf0f9#chunk3" target="testwindow">
2727
Code indentation change.</a>
28
+ * <a href="http://www.sqlite.org/src/info/52e755943f">
29
+ A complex change (chunk 1) in which the alignment becomes so complex
30
+ that it is better for clarity to abandon it and just show the left
31
+ and right sides contiguously.</a>
32
+ * <a href="http://www.sqlite.org/src/info/3d65c70343#chunk5">
33
+ An indentation change. See especially lines 2313 and 2317 on the right,
34
+ that their green indentation addition is left-justified.</a>
2835
--- test/diff-test-1.wiki
+++ test/diff-test-1.wiki
@@ -23,5 +23,12 @@
23
24 External:
25
26 * <a href="http://www.sqlite.org/src/fdiff?v1=aafcb21a74e41f9a&v2=a6d127dd05daf0f9#chunk3" target="testwindow">
27 Code indentation change.</a>
 
 
 
 
 
 
 
28
--- test/diff-test-1.wiki
+++ test/diff-test-1.wiki
@@ -23,5 +23,12 @@
23
24 External:
25
26 * <a href="http://www.sqlite.org/src/fdiff?v1=aafcb21a74e41f9a&v2=a6d127dd05daf0f9#chunk3" target="testwindow">
27 Code indentation change.</a>
28 * <a href="http://www.sqlite.org/src/info/52e755943f">
29 A complex change (chunk 1) in which the alignment becomes so complex
30 that it is better for clarity to abandon it and just show the left
31 and right sides contiguously.</a>
32 * <a href="http://www.sqlite.org/src/info/3d65c70343#chunk5">
33 An indentation change. See especially lines 2313 and 2317 on the right,
34 that their green indentation addition is left-justified.</a>
35

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button