Fossil SCM

merge trunk, some improvements in messages (not yet ready yet)

jan.nijtmans 2012-11-04 10:48 improve_commit_warning
Commit 517a6f7a53b6e04821aafd6ad90f37abdbadbee9
2 files changed +67 -10 +22 -19
+67 -10
--- src/checkin.c
+++ src/checkin.c
@@ -412,20 +412,23 @@
412412
const char *zIgnoreFlag;
413413
Blob path, repo;
414414
Stmt q;
415415
int n;
416416
Glob *pIgnore;
417
+ int testFlag = 0;
417418
418419
allFlag = find_option("force","f",0)!=0;
419420
if( find_option("dotfiles",0,0)!=0 ) scanFlags |= SCAN_ALL;
420421
if( find_option("temp",0,0)!=0 ) scanFlags |= SCAN_TEMP;
421422
zIgnoreFlag = find_option("ignore",0,1);
423
+ testFlag = find_option("test",0,0)!=0;
422424
db_must_be_within_tree();
423425
if( zIgnoreFlag==0 ){
424426
zIgnoreFlag = db_get("ignore-glob", 0);
425427
}
426
- db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY)");
428
+ db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY %s)",
429
+ filename_collation());
427430
n = strlen(g.zLocalRoot);
428431
blob_init(&path, g.zLocalRoot, n-1);
429432
pIgnore = glob_create(zIgnoreFlag);
430433
vfile_scan(&path, blob_size(&path), scanFlags, pIgnore);
431434
glob_free(pIgnore);
@@ -436,12 +439,15 @@
436439
g.zLocalRoot, fossil_all_reserved_names()
437440
);
438441
if( file_tree_name(g.zRepositoryName, &repo, 0) ){
439442
db_multi_exec("DELETE FROM sfile WHERE x=%B", &repo);
440443
}
444
+ db_multi_exec("DELETE FROM sfile WHERE x IN (SELECT pathname FROM vfile)");
441445
while( db_step(&q)==SQLITE_ROW ){
442
- if( allFlag ){
446
+ if( testFlag ){
447
+ fossil_print("%s\n", db_column_text(&q,0));
448
+ }else if( allFlag ){
443449
file_delete(db_column_text(&q, 0));
444450
}else{
445451
Blob ans;
446452
char cReply;
447453
char *prompt = mprintf("remove unmanaged file \"%s\" (y/N)? ",
@@ -885,21 +891,45 @@
885891
/*
886892
** Issue a warning and give the user an opportunity to abandon out
887893
** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888894
** is seen in a text file.
889895
*/
890
-static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
896
+static void commit_warning(
897
+ const Blob *p, /* The content of the file being committed. */
898
+ int crnlOk, /* Non-zero if CR/NL warnings should be disabled. */
899
+ int binOk, /* Non-zero if binary warnings should be disabled. */
900
+ const char *zFilename /* The full name of the file being committed. */
901
+){
891902
int eType; /* return value of looks_like_utf8/utf16() */
892903
int fUnicode; /* return value of starts_with_utf16_bom() */
893904
char *zMsg; /* Warning message */
894905
Blob fname; /* Relative pathname of the file */
895906
static int allOk = 0; /* Set to true to disable this routine */
896907
897908
if( allOk ) return;
898909
fUnicode = starts_with_utf16_bom(p);
899910
eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
900
- if( eType<0 || fUnicode ){
911
+ if( eType<3){
912
+ Blob ans;
913
+ char cReply;
914
+
915
+ blob_zero(&ans);
916
+ file_relative_name(zFilename, &fname, 0);
917
+ zMsg = mprintf(
918
+ "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (a=all/y/N)? ",
919
+ blob_str(&fname));
920
+ prompt_user(zMsg, &ans);
921
+ fossil_free(zMsg);
922
+ cReply = blob_str(&ans)[0];
923
+ if( cReply!='y' && cReply!='Y' ){
924
+ fossil_fatal("Abandoning commit due to non-UTF-8 in %s",
925
+ blob_str(&fname));
926
+ }
927
+ blob_reset(&ans);
928
+ eType +=4 ;
929
+ }
930
+ if( eType==0 || eType==-1 || fUnicode ){
901931
const char *zWarning;
902932
Blob ans;
903933
char cReply;
904934
905935
if( eType==-1 && fUnicode ){
@@ -907,12 +937,15 @@
907937
}else if( eType==-1 ){
908938
if( crnlOk ){
909939
return; /* We don't want CR/NL warnings for this file. */
910940
}
911941
zWarning = "CR/NL line endings";
912
- }else if( eType==-2 ){
913
- zWarning = "invalid UTF-8 or ASCII";
942
+ }else if( eType==0 ){
943
+ if( binOk ){
944
+ return; /* We don't want binary warnings for this file. */
945
+ }
946
+ zWarning = "binary data";
914947
}else{
915948
zWarning = "Unicode";
916949
}
917950
file_relative_name(zFilename, &fname, 0);
918951
blob_zero(&ans);
@@ -1132,10 +1165,32 @@
11321165
select_commit_files();
11331166
isAMerge = db_exists("SELECT 1 FROM vmerge WHERE id=0");
11341167
if( g.aCommitFile && isAMerge ){
11351168
fossil_fatal("cannot do a partial commit of a merge");
11361169
}
1170
+
1171
+ /* Doing "fossil mv fileA fileB; fossil add fileA; fossil commit fileA"
1172
+ ** will generate a manifest that has two fileA entries, which is illegal.
1173
+ ** When you think about it, the sequence above makes no sense. So detect
1174
+ ** it and disallow it. Ticket [0ff64b0a5fc8].
1175
+ */
1176
+ if( g.aCommitFile ){
1177
+ Stmt qRename;
1178
+ db_prepare(&qRename,
1179
+ "SELECT v1.pathname, v2.pathname"
1180
+ " FROM vfile AS v2 CROSS JOIN vfile AS v1"
1181
+ " WHERE is_selected(v1.id)"
1182
+ " AND v2.origname IS NOT NULL"
1183
+ " AND v2.origname=v1.pathname");
1184
+ if( db_step(&qRename)==SQLITE_ROW ){
1185
+ const char *zFrom = db_column_text(&qRename, 0);
1186
+ const char *zTo = db_column_text(&qRename, 1);
1187
+ fossil_fatal("cannot do a partial commit of '%s' because "
1188
+ "'%s' was renamed to '%s'", zFrom, zFrom, zTo);
1189
+ }
1190
+ db_finalize(&qRename);
1191
+ }
11371192
11381193
user_select();
11391194
/*
11401195
** Check that the user exists.
11411196
*/
@@ -1216,34 +1271,36 @@
12161271
/* Step 1: Insert records for all modified files into the blob
12171272
** table. If there were arguments passed to this command, only
12181273
** the identified fils are inserted (if they have been modified).
12191274
*/
12201275
db_prepare(&q,
1221
- "SELECT id, %Q || pathname, mrid, %s, chnged FROM vfile "
1276
+ "SELECT id, %Q || pathname, mrid, %s, chnged, %s FROM vfile "
12221277
"WHERE chnged==1 AND NOT deleted AND is_selected(id)",
1223
- g.zLocalRoot, glob_expr("pathname", db_get("crnl-glob",""))
1278
+ g.zLocalRoot, glob_expr("pathname", db_get("crnl-glob","")),
1279
+ glob_expr("pathname", db_get("binary-glob",""))
12241280
);
12251281
while( db_step(&q)==SQLITE_ROW ){
12261282
int id, rid;
12271283
const char *zFullname;
12281284
Blob content;
1229
- int crnlOk, chnged;
1285
+ int crnlOk, binOk, chnged;
12301286
12311287
id = db_column_int(&q, 0);
12321288
zFullname = db_column_text(&q, 1);
12331289
rid = db_column_int(&q, 2);
12341290
crnlOk = db_column_int(&q, 3);
12351291
chnged = db_column_int(&q, 4);
1292
+ binOk = db_column_int(&q, 5);
12361293
12371294
blob_zero(&content);
12381295
if( file_wd_islink(zFullname) ){
12391296
/* Instead of file content, put link destination path */
12401297
blob_read_link(&content, zFullname);
12411298
}else{
12421299
blob_read_from_file(&content, zFullname);
12431300
}
1244
- commit_warning(&content, crnlOk, zFullname);
1301
+ commit_warning(&content, crnlOk, binOk, zFullname);
12451302
if( chnged==1 && contains_merge_marker(&content) ){
12461303
Blob fname; /* Relative pathname of the file */
12471304
12481305
nConflict++;
12491306
file_relative_name(zFullname, &fname, 0);
12501307
--- src/checkin.c
+++ src/checkin.c
@@ -412,20 +412,23 @@
412 const char *zIgnoreFlag;
413 Blob path, repo;
414 Stmt q;
415 int n;
416 Glob *pIgnore;
 
417
418 allFlag = find_option("force","f",0)!=0;
419 if( find_option("dotfiles",0,0)!=0 ) scanFlags |= SCAN_ALL;
420 if( find_option("temp",0,0)!=0 ) scanFlags |= SCAN_TEMP;
421 zIgnoreFlag = find_option("ignore",0,1);
 
422 db_must_be_within_tree();
423 if( zIgnoreFlag==0 ){
424 zIgnoreFlag = db_get("ignore-glob", 0);
425 }
426 db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY)");
 
427 n = strlen(g.zLocalRoot);
428 blob_init(&path, g.zLocalRoot, n-1);
429 pIgnore = glob_create(zIgnoreFlag);
430 vfile_scan(&path, blob_size(&path), scanFlags, pIgnore);
431 glob_free(pIgnore);
@@ -436,12 +439,15 @@
436 g.zLocalRoot, fossil_all_reserved_names()
437 );
438 if( file_tree_name(g.zRepositoryName, &repo, 0) ){
439 db_multi_exec("DELETE FROM sfile WHERE x=%B", &repo);
440 }
 
441 while( db_step(&q)==SQLITE_ROW ){
442 if( allFlag ){
 
 
443 file_delete(db_column_text(&q, 0));
444 }else{
445 Blob ans;
446 char cReply;
447 char *prompt = mprintf("remove unmanaged file \"%s\" (y/N)? ",
@@ -885,21 +891,45 @@
885 /*
886 ** Issue a warning and give the user an opportunity to abandon out
887 ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
888 ** is seen in a text file.
889 */
890 static void commit_warning(const Blob *p, int crnlOk, const char *zFilename){
 
 
 
 
 
891 int eType; /* return value of looks_like_utf8/utf16() */
892 int fUnicode; /* return value of starts_with_utf16_bom() */
893 char *zMsg; /* Warning message */
894 Blob fname; /* Relative pathname of the file */
895 static int allOk = 0; /* Set to true to disable this routine */
896
897 if( allOk ) return;
898 fUnicode = starts_with_utf16_bom(p);
899 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
900 if( eType<0 || fUnicode ){
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901 const char *zWarning;
902 Blob ans;
903 char cReply;
904
905 if( eType==-1 && fUnicode ){
@@ -907,12 +937,15 @@
907 }else if( eType==-1 ){
908 if( crnlOk ){
909 return; /* We don't want CR/NL warnings for this file. */
910 }
911 zWarning = "CR/NL line endings";
912 }else if( eType==-2 ){
913 zWarning = "invalid UTF-8 or ASCII";
 
 
 
914 }else{
915 zWarning = "Unicode";
916 }
917 file_relative_name(zFilename, &fname, 0);
918 blob_zero(&ans);
@@ -1132,10 +1165,32 @@
1132 select_commit_files();
1133 isAMerge = db_exists("SELECT 1 FROM vmerge WHERE id=0");
1134 if( g.aCommitFile && isAMerge ){
1135 fossil_fatal("cannot do a partial commit of a merge");
1136 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1137
1138 user_select();
1139 /*
1140 ** Check that the user exists.
1141 */
@@ -1216,34 +1271,36 @@
1216 /* Step 1: Insert records for all modified files into the blob
1217 ** table. If there were arguments passed to this command, only
1218 ** the identified fils are inserted (if they have been modified).
1219 */
1220 db_prepare(&q,
1221 "SELECT id, %Q || pathname, mrid, %s, chnged FROM vfile "
1222 "WHERE chnged==1 AND NOT deleted AND is_selected(id)",
1223 g.zLocalRoot, glob_expr("pathname", db_get("crnl-glob",""))
 
1224 );
1225 while( db_step(&q)==SQLITE_ROW ){
1226 int id, rid;
1227 const char *zFullname;
1228 Blob content;
1229 int crnlOk, chnged;
1230
1231 id = db_column_int(&q, 0);
1232 zFullname = db_column_text(&q, 1);
1233 rid = db_column_int(&q, 2);
1234 crnlOk = db_column_int(&q, 3);
1235 chnged = db_column_int(&q, 4);
 
1236
1237 blob_zero(&content);
1238 if( file_wd_islink(zFullname) ){
1239 /* Instead of file content, put link destination path */
1240 blob_read_link(&content, zFullname);
1241 }else{
1242 blob_read_from_file(&content, zFullname);
1243 }
1244 commit_warning(&content, crnlOk, zFullname);
1245 if( chnged==1 && contains_merge_marker(&content) ){
1246 Blob fname; /* Relative pathname of the file */
1247
1248 nConflict++;
1249 file_relative_name(zFullname, &fname, 0);
1250
--- src/checkin.c
+++ src/checkin.c
@@ -412,20 +412,23 @@
412 const char *zIgnoreFlag;
413 Blob path, repo;
414 Stmt q;
415 int n;
416 Glob *pIgnore;
417 int testFlag = 0;
418
419 allFlag = find_option("force","f",0)!=0;
420 if( find_option("dotfiles",0,0)!=0 ) scanFlags |= SCAN_ALL;
421 if( find_option("temp",0,0)!=0 ) scanFlags |= SCAN_TEMP;
422 zIgnoreFlag = find_option("ignore",0,1);
423 testFlag = find_option("test",0,0)!=0;
424 db_must_be_within_tree();
425 if( zIgnoreFlag==0 ){
426 zIgnoreFlag = db_get("ignore-glob", 0);
427 }
428 db_multi_exec("CREATE TEMP TABLE sfile(x TEXT PRIMARY KEY %s)",
429 filename_collation());
430 n = strlen(g.zLocalRoot);
431 blob_init(&path, g.zLocalRoot, n-1);
432 pIgnore = glob_create(zIgnoreFlag);
433 vfile_scan(&path, blob_size(&path), scanFlags, pIgnore);
434 glob_free(pIgnore);
@@ -436,12 +439,15 @@
439 g.zLocalRoot, fossil_all_reserved_names()
440 );
441 if( file_tree_name(g.zRepositoryName, &repo, 0) ){
442 db_multi_exec("DELETE FROM sfile WHERE x=%B", &repo);
443 }
444 db_multi_exec("DELETE FROM sfile WHERE x IN (SELECT pathname FROM vfile)");
445 while( db_step(&q)==SQLITE_ROW ){
446 if( testFlag ){
447 fossil_print("%s\n", db_column_text(&q,0));
448 }else if( allFlag ){
449 file_delete(db_column_text(&q, 0));
450 }else{
451 Blob ans;
452 char cReply;
453 char *prompt = mprintf("remove unmanaged file \"%s\" (y/N)? ",
@@ -885,21 +891,45 @@
891 /*
892 ** Issue a warning and give the user an opportunity to abandon out
893 ** if a Unicode (UTF-16) byte-order-mark (BOM) or a \r\n line ending
894 ** is seen in a text file.
895 */
896 static void commit_warning(
897 const Blob *p, /* The content of the file being committed. */
898 int crnlOk, /* Non-zero if CR/NL warnings should be disabled. */
899 int binOk, /* Non-zero if binary warnings should be disabled. */
900 const char *zFilename /* The full name of the file being committed. */
901 ){
902 int eType; /* return value of looks_like_utf8/utf16() */
903 int fUnicode; /* return value of starts_with_utf16_bom() */
904 char *zMsg; /* Warning message */
905 Blob fname; /* Relative pathname of the file */
906 static int allOk = 0; /* Set to true to disable this routine */
907
908 if( allOk ) return;
909 fUnicode = starts_with_utf16_bom(p);
910 eType = fUnicode ? looks_like_utf16(p) : looks_like_utf8(p);
911 if( eType<3){
912 Blob ans;
913 char cReply;
914
915 blob_zero(&ans);
916 file_relative_name(zFilename, &fname, 0);
917 zMsg = mprintf(
918 "%s appears to be text, but not UTF-8 or ASCII. commit anyhow (a=all/y/N)? ",
919 blob_str(&fname));
920 prompt_user(zMsg, &ans);
921 fossil_free(zMsg);
922 cReply = blob_str(&ans)[0];
923 if( cReply!='y' && cReply!='Y' ){
924 fossil_fatal("Abandoning commit due to non-UTF-8 in %s",
925 blob_str(&fname));
926 }
927 blob_reset(&ans);
928 eType +=4 ;
929 }
930 if( eType==0 || eType==-1 || fUnicode ){
931 const char *zWarning;
932 Blob ans;
933 char cReply;
934
935 if( eType==-1 && fUnicode ){
@@ -907,12 +937,15 @@
937 }else if( eType==-1 ){
938 if( crnlOk ){
939 return; /* We don't want CR/NL warnings for this file. */
940 }
941 zWarning = "CR/NL line endings";
942 }else if( eType==0 ){
943 if( binOk ){
944 return; /* We don't want binary warnings for this file. */
945 }
946 zWarning = "binary data";
947 }else{
948 zWarning = "Unicode";
949 }
950 file_relative_name(zFilename, &fname, 0);
951 blob_zero(&ans);
@@ -1132,10 +1165,32 @@
1165 select_commit_files();
1166 isAMerge = db_exists("SELECT 1 FROM vmerge WHERE id=0");
1167 if( g.aCommitFile && isAMerge ){
1168 fossil_fatal("cannot do a partial commit of a merge");
1169 }
1170
1171 /* Doing "fossil mv fileA fileB; fossil add fileA; fossil commit fileA"
1172 ** will generate a manifest that has two fileA entries, which is illegal.
1173 ** When you think about it, the sequence above makes no sense. So detect
1174 ** it and disallow it. Ticket [0ff64b0a5fc8].
1175 */
1176 if( g.aCommitFile ){
1177 Stmt qRename;
1178 db_prepare(&qRename,
1179 "SELECT v1.pathname, v2.pathname"
1180 " FROM vfile AS v2 CROSS JOIN vfile AS v1"
1181 " WHERE is_selected(v1.id)"
1182 " AND v2.origname IS NOT NULL"
1183 " AND v2.origname=v1.pathname");
1184 if( db_step(&qRename)==SQLITE_ROW ){
1185 const char *zFrom = db_column_text(&qRename, 0);
1186 const char *zTo = db_column_text(&qRename, 1);
1187 fossil_fatal("cannot do a partial commit of '%s' because "
1188 "'%s' was renamed to '%s'", zFrom, zFrom, zTo);
1189 }
1190 db_finalize(&qRename);
1191 }
1192
1193 user_select();
1194 /*
1195 ** Check that the user exists.
1196 */
@@ -1216,34 +1271,36 @@
1271 /* Step 1: Insert records for all modified files into the blob
1272 ** table. If there were arguments passed to this command, only
1273 ** the identified fils are inserted (if they have been modified).
1274 */
1275 db_prepare(&q,
1276 "SELECT id, %Q || pathname, mrid, %s, chnged, %s FROM vfile "
1277 "WHERE chnged==1 AND NOT deleted AND is_selected(id)",
1278 g.zLocalRoot, glob_expr("pathname", db_get("crnl-glob","")),
1279 glob_expr("pathname", db_get("binary-glob",""))
1280 );
1281 while( db_step(&q)==SQLITE_ROW ){
1282 int id, rid;
1283 const char *zFullname;
1284 Blob content;
1285 int crnlOk, binOk, chnged;
1286
1287 id = db_column_int(&q, 0);
1288 zFullname = db_column_text(&q, 1);
1289 rid = db_column_int(&q, 2);
1290 crnlOk = db_column_int(&q, 3);
1291 chnged = db_column_int(&q, 4);
1292 binOk = db_column_int(&q, 5);
1293
1294 blob_zero(&content);
1295 if( file_wd_islink(zFullname) ){
1296 /* Instead of file content, put link destination path */
1297 blob_read_link(&content, zFullname);
1298 }else{
1299 blob_read_from_file(&content, zFullname);
1300 }
1301 commit_warning(&content, crnlOk, binOk, zFullname);
1302 if( chnged==1 && contains_merge_marker(&content) ){
1303 Blob fname; /* Relative pathname of the file */
1304
1305 nConflict++;
1306 file_relative_name(zFullname, &fname, 0);
1307
+22 -19
--- src/diff.c
+++ src/diff.c
@@ -185,92 +185,95 @@
185185
** to be binary.
186186
**
187187
** (-1) -- The content appears to consist entirely of text, with lines
188188
** delimited by carriage-return, line-feed pairs.
189189
**
190
-** (-2) -- The content appears to consist entirely of text, with lines
191
-** delimited by line-feed characters or carriage-return,
192
-** line-feed pairs; however, the encoding is not UTF-8 or ASCII.
190
+** (-3) -- The content appears to consist entirely of text, with lines
191
+** delimited by line-feed characters; however, the encoding is
192
+** not UTF-8 or ASCII.
193
+**
194
+** (-5) -- The content appears to consist entirely of text, with lines
195
+** delimited by carriage-return, line-feed pairs; however, the
196
+** encoding is not UTF-8 or ASCII.
193197
**
194198
*/
195199
196200
int looks_like_utf8(const Blob *pContent){
197201
unsigned char *z = (unsigned char *) blob_buffer(pContent);
198202
unsigned int n = blob_size(pContent);
199203
unsigned int j;
200204
unsigned char c;
201
- int result = 1; /* Assume UTF-8 text with no CR/NL */
205
+ int result = 0; /* Assume UTF-8 text with no CR/NL */
202206
203207
/* Check individual lines.
204208
*/
205
- if( n==0 ) return result; /* Empty file -> text */
209
+ if( n==0 ) return 1; /* Empty file -> text */
206210
c = *z;
207211
j = (c!='\n');
208212
if( c<0x80 ){
209213
if( c==0 ) return 0; /* Zero byte in a file -> binary */
210214
}else if( c<0xC0 ){
211
- result = -2; /* Invalid UTF-8, continue */
215
+ result |= 4; /* Invalid UTF-8, continue */
212216
}else if( c<0xE0 ){
213217
if( n<2 || ((z[1]&0xC0)!=0x80) ){
214
- result = -2; /* Invalid 2-byte UTF-8, continue */
218
+ result |= 4; /* Invalid 2-byte UTF-8, continue */
215219
}else{
216220
--n; ++j; ++z;
217221
}
218222
}else if( c<0xF0 ){
219223
if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
220
- result = -2; /* Invalid 3-byte UTF-8, continue */
224
+ result |= 4; /* Invalid 3-byte UTF-8, continue */
221225
}else{
222226
n-=2; j+=2; z+=2;
223227
}
224228
}else if( c<0xF8 ){
225229
if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
226
- result = -2; /* Invalid 4-byte UTF-8, continue */
230
+ result |= 4; /* Invalid 4-byte UTF-8, continue */
227231
}else{
228232
n-=3; j+=3; z+=3;
229233
}
230234
}else{
231
- result = -2; /* Invalid multi-byte UTF-8, continue */
235
+ result |= 4; /* Invalid multi-byte UTF-8, continue */
232236
}
233237
while( --n>0 ){
234238
c = *++z; ++j;
235239
if( c<0x80 ){
236240
if( c==0 ) return 0; /* Zero byte in a file -> binary */
237241
if( c=='\n' ){
238
- unsigned char c2 = z[-1];
239
- if( c2=='\r' && result>0 ){
240
- result = -1; /* Contains CR/NL, continue */
242
+ if( z[-1]=='\r'){
243
+ result |= 2; /* Contains CR/NL, continue */
241244
}
242245
if( j>LENGTH_MASK ){
243246
return 0; /* Very long line -> binary */
244247
}
245248
j = 0;
246249
}
247250
}else if( c<0xC0 ){
248
- result = -2; /* Invalid UTF-8, continue */
251
+ result |= 4; /* Invalid UTF-8, continue */
249252
}else if( c<0xE0 ){
250253
if( n<2 || ((z[1]&0xC0)!=0x80) ){
251
- result = -2; continue; /* Invalid 2-byte UTF-8, continue */
254
+ result |= 4; continue; /* Invalid 2-byte UTF-8, continue */
252255
}
253256
--n; ++j; ++z;
254257
}else if( c<0xF0 ){
255258
if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
256
- result = -2; continue; /* Invalid 3-byte UTF-8, continue */
259
+ result |= 4; continue; /* Invalid 3-byte UTF-8, continue */
257260
}
258261
n-=2; j+=2; z+=2;
259262
}else if( c<0xF8 ){
260263
if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
261
- result = -2; continue; /* Invalid 4-byte UTF-8, continue */
264
+ result |= 4; continue; /* Invalid 4-byte UTF-8, continue */
262265
}
263266
n-=3; j+=3; z+=3;
264267
}else{
265
- result = -2; /* Invalid multi-byte UTF-8, continue */
268
+ result |= 4; /* Invalid multi-byte UTF-8, continue */
266269
}
267270
}
268271
if( j>LENGTH_MASK ){
269272
return 0; /* Very long line -> binary */
270273
}
271
- return result; /* No problems seen -> not binary */
274
+ return 1-result; /* No problems seen -> not binary */
272275
}
273276
274277
/*
275278
** Maximum length of a line in a text file, in UTF-16 characters. (2731)
276279
** The number of bytes represented by this value after conversion to
277280
--- src/diff.c
+++ src/diff.c
@@ -185,92 +185,95 @@
185 ** to be binary.
186 **
187 ** (-1) -- The content appears to consist entirely of text, with lines
188 ** delimited by carriage-return, line-feed pairs.
189 **
190 ** (-2) -- The content appears to consist entirely of text, with lines
191 ** delimited by line-feed characters or carriage-return,
192 ** line-feed pairs; however, the encoding is not UTF-8 or ASCII.
 
 
 
 
193 **
194 */
195
196 int looks_like_utf8(const Blob *pContent){
197 unsigned char *z = (unsigned char *) blob_buffer(pContent);
198 unsigned int n = blob_size(pContent);
199 unsigned int j;
200 unsigned char c;
201 int result = 1; /* Assume UTF-8 text with no CR/NL */
202
203 /* Check individual lines.
204 */
205 if( n==0 ) return result; /* Empty file -> text */
206 c = *z;
207 j = (c!='\n');
208 if( c<0x80 ){
209 if( c==0 ) return 0; /* Zero byte in a file -> binary */
210 }else if( c<0xC0 ){
211 result = -2; /* Invalid UTF-8, continue */
212 }else if( c<0xE0 ){
213 if( n<2 || ((z[1]&0xC0)!=0x80) ){
214 result = -2; /* Invalid 2-byte UTF-8, continue */
215 }else{
216 --n; ++j; ++z;
217 }
218 }else if( c<0xF0 ){
219 if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
220 result = -2; /* Invalid 3-byte UTF-8, continue */
221 }else{
222 n-=2; j+=2; z+=2;
223 }
224 }else if( c<0xF8 ){
225 if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
226 result = -2; /* Invalid 4-byte UTF-8, continue */
227 }else{
228 n-=3; j+=3; z+=3;
229 }
230 }else{
231 result = -2; /* Invalid multi-byte UTF-8, continue */
232 }
233 while( --n>0 ){
234 c = *++z; ++j;
235 if( c<0x80 ){
236 if( c==0 ) return 0; /* Zero byte in a file -> binary */
237 if( c=='\n' ){
238 unsigned char c2 = z[-1];
239 if( c2=='\r' && result>0 ){
240 result = -1; /* Contains CR/NL, continue */
241 }
242 if( j>LENGTH_MASK ){
243 return 0; /* Very long line -> binary */
244 }
245 j = 0;
246 }
247 }else if( c<0xC0 ){
248 result = -2; /* Invalid UTF-8, continue */
249 }else if( c<0xE0 ){
250 if( n<2 || ((z[1]&0xC0)!=0x80) ){
251 result = -2; continue; /* Invalid 2-byte UTF-8, continue */
252 }
253 --n; ++j; ++z;
254 }else if( c<0xF0 ){
255 if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
256 result = -2; continue; /* Invalid 3-byte UTF-8, continue */
257 }
258 n-=2; j+=2; z+=2;
259 }else if( c<0xF8 ){
260 if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
261 result = -2; continue; /* Invalid 4-byte UTF-8, continue */
262 }
263 n-=3; j+=3; z+=3;
264 }else{
265 result = -2; /* Invalid multi-byte UTF-8, continue */
266 }
267 }
268 if( j>LENGTH_MASK ){
269 return 0; /* Very long line -> binary */
270 }
271 return result; /* No problems seen -> not binary */
272 }
273
274 /*
275 ** Maximum length of a line in a text file, in UTF-16 characters. (2731)
276 ** The number of bytes represented by this value after conversion to
277
--- src/diff.c
+++ src/diff.c
@@ -185,92 +185,95 @@
185 ** to be binary.
186 **
187 ** (-1) -- The content appears to consist entirely of text, with lines
188 ** delimited by carriage-return, line-feed pairs.
189 **
190 ** (-3) -- The content appears to consist entirely of text, with lines
191 ** delimited by line-feed characters; however, the encoding is
192 ** not UTF-8 or ASCII.
193 **
194 ** (-5) -- The content appears to consist entirely of text, with lines
195 ** delimited by carriage-return, line-feed pairs; however, the
196 ** encoding is not UTF-8 or ASCII.
197 **
198 */
199
200 int looks_like_utf8(const Blob *pContent){
201 unsigned char *z = (unsigned char *) blob_buffer(pContent);
202 unsigned int n = blob_size(pContent);
203 unsigned int j;
204 unsigned char c;
205 int result = 0; /* Assume UTF-8 text with no CR/NL */
206
207 /* Check individual lines.
208 */
209 if( n==0 ) return 1; /* Empty file -> text */
210 c = *z;
211 j = (c!='\n');
212 if( c<0x80 ){
213 if( c==0 ) return 0; /* Zero byte in a file -> binary */
214 }else if( c<0xC0 ){
215 result |= 4; /* Invalid UTF-8, continue */
216 }else if( c<0xE0 ){
217 if( n<2 || ((z[1]&0xC0)!=0x80) ){
218 result |= 4; /* Invalid 2-byte UTF-8, continue */
219 }else{
220 --n; ++j; ++z;
221 }
222 }else if( c<0xF0 ){
223 if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
224 result |= 4; /* Invalid 3-byte UTF-8, continue */
225 }else{
226 n-=2; j+=2; z+=2;
227 }
228 }else if( c<0xF8 ){
229 if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
230 result |= 4; /* Invalid 4-byte UTF-8, continue */
231 }else{
232 n-=3; j+=3; z+=3;
233 }
234 }else{
235 result |= 4; /* Invalid multi-byte UTF-8, continue */
236 }
237 while( --n>0 ){
238 c = *++z; ++j;
239 if( c<0x80 ){
240 if( c==0 ) return 0; /* Zero byte in a file -> binary */
241 if( c=='\n' ){
242 if( z[-1]=='\r'){
243 result |= 2; /* Contains CR/NL, continue */
 
244 }
245 if( j>LENGTH_MASK ){
246 return 0; /* Very long line -> binary */
247 }
248 j = 0;
249 }
250 }else if( c<0xC0 ){
251 result |= 4; /* Invalid UTF-8, continue */
252 }else if( c<0xE0 ){
253 if( n<2 || ((z[1]&0xC0)!=0x80) ){
254 result |= 4; continue; /* Invalid 2-byte UTF-8, continue */
255 }
256 --n; ++j; ++z;
257 }else if( c<0xF0 ){
258 if( n<3 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) ){
259 result |= 4; continue; /* Invalid 3-byte UTF-8, continue */
260 }
261 n-=2; j+=2; z+=2;
262 }else if( c<0xF8 ){
263 if( n<4 || ((z[1]&0xC0)!=0x80) || ((z[2]&0xC0)!=0x80) || ((z[3]&0xC0)!=0x80) ){
264 result |= 4; continue; /* Invalid 4-byte UTF-8, continue */
265 }
266 n-=3; j+=3; z+=3;
267 }else{
268 result |= 4; /* Invalid multi-byte UTF-8, continue */
269 }
270 }
271 if( j>LENGTH_MASK ){
272 return 0; /* Very long line -> binary */
273 }
274 return 1-result; /* No problems seen -> not binary */
275 }
276
277 /*
278 ** Maximum length of a line in a text file, in UTF-16 characters. (2731)
279 ** The number of bytes represented by this value after conversion to
280

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button