Fossil SCM

Give a warning when a to-be-committed text file has byte sequences which are not valid UTF-8. Can be disabled with "encoding-glob" setting.

jan.nijtmans 2014-05-26 07:45 trunk merge
Commit 0cb00c0b8f4e5b03112e5b805dfe21668bfa7374
1 file changed +12 -1
+12 -1
--- src/checkin.c
+++ src/checkin.c
@@ -1239,10 +1239,11 @@
12391239
int fBinary; /* does the blob content appear to be binary? */
12401240
int lookFlags; /* output flags from looks_like_utf8/utf16() */
12411241
int fHasAnyCr; /* the blob contains one or more CR chars */
12421242
int fHasLoneCrOnly; /* all detected line endings are CR only */
12431243
int fHasCrLfOnly; /* all detected line endings are CR/LF pairs */
1244
+ int fHasInvalidUtf8 = 0;/* contains byte-sequence which is invalid for UTF-8 */
12441245
char *zMsg; /* Warning message */
12451246
Blob fname; /* Relative pathname of the file */
12461247
static int allOk = 0; /* Set to true to disable this routine */
12471248
12481249
if( allOk ) return 0;
@@ -1249,16 +1250,19 @@
12491250
fUnicode = could_be_utf16(p, &bReverse);
12501251
if( fUnicode ){
12511252
lookFlags = looks_like_utf16(p, bReverse, LOOK_NUL);
12521253
}else{
12531254
lookFlags = looks_like_utf8(p, LOOK_NUL);
1255
+ if( !(lookFlags & LOOK_BINARY) && invalid_utf8(p) ){
1256
+ fHasInvalidUtf8 = 1;
1257
+ }
12541258
}
12551259
fHasAnyCr = (lookFlags & LOOK_CR);
12561260
fBinary = (lookFlags & LOOK_BINARY);
12571261
fHasLoneCrOnly = ((lookFlags & LOOK_EOL) == LOOK_LONE_CR);
12581262
fHasCrLfOnly = ((lookFlags & LOOK_EOL) == LOOK_CRLF);
1259
- if( fUnicode || fHasAnyCr || fBinary ){
1263
+ if( fUnicode || fHasAnyCr || fBinary || fHasInvalidUtf8){
12601264
const char *zWarning;
12611265
const char *zDisable;
12621266
const char *zConvert = "c=convert/";
12631267
Blob ans;
12641268
char cReply;
@@ -1287,10 +1291,17 @@
12871291
zWarning = "CR/NL line endings and Unicode";
12881292
}else{
12891293
zWarning = "mixed line endings and Unicode";
12901294
}
12911295
zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
1296
+ }else if( fHasInvalidUtf8 ){
1297
+ if( encodingOk ){
1298
+ return 0; /* We don't want encoding warnings for this file. */
1299
+ }
1300
+ zWarning = "invalid UTF-8";
1301
+ zConvert = ""; /* Possible conversion to UTF-8 not yet implemented. */
1302
+ zDisable = "\"encoding-glob\" setting";
12921303
}else if( fHasAnyCr ){
12931304
if( crnlOk ){
12941305
return 0; /* We don't want CR/NL warnings for this file. */
12951306
}
12961307
if( fHasLoneCrOnly ){
12971308
--- src/checkin.c
+++ src/checkin.c
@@ -1239,10 +1239,11 @@
1239 int fBinary; /* does the blob content appear to be binary? */
1240 int lookFlags; /* output flags from looks_like_utf8/utf16() */
1241 int fHasAnyCr; /* the blob contains one or more CR chars */
1242 int fHasLoneCrOnly; /* all detected line endings are CR only */
1243 int fHasCrLfOnly; /* all detected line endings are CR/LF pairs */
 
1244 char *zMsg; /* Warning message */
1245 Blob fname; /* Relative pathname of the file */
1246 static int allOk = 0; /* Set to true to disable this routine */
1247
1248 if( allOk ) return 0;
@@ -1249,16 +1250,19 @@
1249 fUnicode = could_be_utf16(p, &bReverse);
1250 if( fUnicode ){
1251 lookFlags = looks_like_utf16(p, bReverse, LOOK_NUL);
1252 }else{
1253 lookFlags = looks_like_utf8(p, LOOK_NUL);
 
 
 
1254 }
1255 fHasAnyCr = (lookFlags & LOOK_CR);
1256 fBinary = (lookFlags & LOOK_BINARY);
1257 fHasLoneCrOnly = ((lookFlags & LOOK_EOL) == LOOK_LONE_CR);
1258 fHasCrLfOnly = ((lookFlags & LOOK_EOL) == LOOK_CRLF);
1259 if( fUnicode || fHasAnyCr || fBinary ){
1260 const char *zWarning;
1261 const char *zDisable;
1262 const char *zConvert = "c=convert/";
1263 Blob ans;
1264 char cReply;
@@ -1287,10 +1291,17 @@
1287 zWarning = "CR/NL line endings and Unicode";
1288 }else{
1289 zWarning = "mixed line endings and Unicode";
1290 }
1291 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
 
 
 
 
 
 
 
1292 }else if( fHasAnyCr ){
1293 if( crnlOk ){
1294 return 0; /* We don't want CR/NL warnings for this file. */
1295 }
1296 if( fHasLoneCrOnly ){
1297
--- src/checkin.c
+++ src/checkin.c
@@ -1239,10 +1239,11 @@
1239 int fBinary; /* does the blob content appear to be binary? */
1240 int lookFlags; /* output flags from looks_like_utf8/utf16() */
1241 int fHasAnyCr; /* the blob contains one or more CR chars */
1242 int fHasLoneCrOnly; /* all detected line endings are CR only */
1243 int fHasCrLfOnly; /* all detected line endings are CR/LF pairs */
1244 int fHasInvalidUtf8 = 0;/* contains byte-sequence which is invalid for UTF-8 */
1245 char *zMsg; /* Warning message */
1246 Blob fname; /* Relative pathname of the file */
1247 static int allOk = 0; /* Set to true to disable this routine */
1248
1249 if( allOk ) return 0;
@@ -1249,16 +1250,19 @@
1250 fUnicode = could_be_utf16(p, &bReverse);
1251 if( fUnicode ){
1252 lookFlags = looks_like_utf16(p, bReverse, LOOK_NUL);
1253 }else{
1254 lookFlags = looks_like_utf8(p, LOOK_NUL);
1255 if( !(lookFlags & LOOK_BINARY) && invalid_utf8(p) ){
1256 fHasInvalidUtf8 = 1;
1257 }
1258 }
1259 fHasAnyCr = (lookFlags & LOOK_CR);
1260 fBinary = (lookFlags & LOOK_BINARY);
1261 fHasLoneCrOnly = ((lookFlags & LOOK_EOL) == LOOK_LONE_CR);
1262 fHasCrLfOnly = ((lookFlags & LOOK_EOL) == LOOK_CRLF);
1263 if( fUnicode || fHasAnyCr || fBinary || fHasInvalidUtf8){
1264 const char *zWarning;
1265 const char *zDisable;
1266 const char *zConvert = "c=convert/";
1267 Blob ans;
1268 char cReply;
@@ -1287,10 +1291,17 @@
1291 zWarning = "CR/NL line endings and Unicode";
1292 }else{
1293 zWarning = "mixed line endings and Unicode";
1294 }
1295 zDisable = "\"crnl-glob\" and \"encoding-glob\" settings";
1296 }else if( fHasInvalidUtf8 ){
1297 if( encodingOk ){
1298 return 0; /* We don't want encoding warnings for this file. */
1299 }
1300 zWarning = "invalid UTF-8";
1301 zConvert = ""; /* Possible conversion to UTF-8 not yet implemented. */
1302 zDisable = "\"encoding-glob\" setting";
1303 }else if( fHasAnyCr ){
1304 if( crnlOk ){
1305 return 0; /* We don't want CR/NL warnings for this file. */
1306 }
1307 if( fHasLoneCrOnly ){
1308

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button