Fossil SCM

merge trunk <p>let looks_like_text() give different values for UTF-16 BE/LE. Not used yet.

jan.nijtmans 2012-11-01 11:48 UTC improve_looks_like_binary merge
Commit 348637dedfdef286ad80d1cb9002dec242fef3b1
+1 -1
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898898
if( eType<0 ){
899899
const char *zWarning ;
900900
Blob ans;
901901
char cReply;
902902
903
- if( eType&1 ){
903
+ if( eType==-3 ){
904904
if( crnlOk ){
905905
return; /* We don't want CR/NL warnings for this file. */
906906
}
907907
zWarning = "CR/NL line endings";
908908
}else{
909909
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898 if( eType<0 ){
899 const char *zWarning ;
900 Blob ans;
901 char cReply;
902
903 if( eType&1 ){
904 if( crnlOk ){
905 return; /* We don't want CR/NL warnings for this file. */
906 }
907 zWarning = "CR/NL line endings";
908 }else{
909
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898 if( eType<0 ){
899 const char *zWarning ;
900 Blob ans;
901 char cReply;
902
903 if( eType==-3 ){
904 if( crnlOk ){
905 return; /* We don't want CR/NL warnings for this file. */
906 }
907 zWarning = "CR/NL line endings";
908 }else{
909
+1 -1
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898898
if( eType<0 ){
899899
const char *zWarning ;
900900
Blob ans;
901901
char cReply;
902902
903
- if( eType&1 ){
903
+ if( eType==-3 ){
904904
if( crnlOk ){
905905
return; /* We don't want CR/NL warnings for this file. */
906906
}
907907
zWarning = "CR/NL line endings";
908908
}else{
909909
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898 if( eType<0 ){
899 const char *zWarning ;
900 Blob ans;
901 char cReply;
902
903 if( eType&1 ){
904 if( crnlOk ){
905 return; /* We don't want CR/NL warnings for this file. */
906 }
907 zWarning = "CR/NL line endings";
908 }else{
909
--- src/checkin.c
+++ src/checkin.c
@@ -898,11 +898,11 @@
898 if( eType<0 ){
899 const char *zWarning ;
900 Blob ans;
901 char cReply;
902
903 if( eType==-3 ){
904 if( crnlOk ){
905 return; /* We don't want CR/NL warnings for this file. */
906 }
907 zWarning = "CR/NL line endings";
908 }else{
909
+10 -6
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
4848
"cannot compute difference between binary files\n"
4949
5050
#define DIFF_CANNOT_COMPUTE_SYMLINK \
5151
"cannot compute difference between symlink and regular file\n"
5252
53
-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
53
+#define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1)
5454
#endif /* INTERFACE */
5555
5656
/*
5757
** Maximum length of a line in a text file. (8192)
5858
*/
@@ -181,16 +181,20 @@
181181
** not be UTF-8.
182182
**
183183
** (0) -- The content appears to be binary because it contains embedded
184184
** NUL (\000) characters or an extremely long line.
185185
**
186
-** (-1) -- The content appears to consist entirely of text, with lines
186
+** (-1) -- The content appears to consist entirely of text, in the
187
+** UTF-16 (LE) encoding.
188
+**
189
+** (-2) -- The content appears to consist entirely of text, in the
190
+** UTF-16 (BE) encoding.
191
+**
192
+** (-3) -- The content appears to consist entirely of text, with lines
187193
** delimited by carriage-return, line-feed pairs; however, the
188194
** encoding may not be UTF-8.
189195
**
190
-** (-2) -- The content appears to consist entirely of text, in the
191
-** UTF-16 (BE or LE) encoding.
192196
*/
193197
int looks_like_text(const Blob *pContent){
194198
unsigned char *z = (unsigned char *) blob_buffer(pContent);
195199
unsigned int n = blob_size(pContent);
196200
int j;
@@ -202,11 +206,11 @@
202206
if( n==0 ) return result; /* Empty file -> text */
203207
c = *z;
204208
if( c==0 ) return 0; /* \000 byte in a file -> binary */
205209
if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
206210
if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
207
- result = -2;
211
+ result = -1;
208212
j = LENGTH_MASK/3;
209213
while( (n-=2)>0 ){
210214
c = *(z+=2);
211215
if( z[1]==0 ){ /* High-byte must be 0 for further checks */
212216
if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
241245
while( --n>0 ){
242246
c = *++z;
243247
if( c==0 ) return 0; /* \000 byte in a file -> binary */
244248
if( c=='\n' ){
245249
if( z[-1]=='\r' ){
246
- result = -1; /* Contains CR/NL, continue */
250
+ result = -3; /* Contains CR/NL, continue */
247251
}
248252
j = LENGTH_MASK;
249253
}
250254
if( --j==0 ){
251255
return 0; /* Very long line -> binary */
252256
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -181,16 +181,20 @@
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line.
185 **
186 ** (-1) -- The content appears to consist entirely of text, with lines
 
 
 
 
 
 
187 ** delimited by carriage-return, line-feed pairs; however, the
188 ** encoding may not be UTF-8.
189 **
190 ** (-2) -- The content appears to consist entirely of text, in the
191 ** UTF-16 (BE or LE) encoding.
192 */
193 int looks_like_text(const Blob *pContent){
194 unsigned char *z = (unsigned char *) blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j;
@@ -202,11 +206,11 @@
202 if( n==0 ) return result; /* Empty file -> text */
203 c = *z;
204 if( c==0 ) return 0; /* \000 byte in a file -> binary */
205 if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
206 if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
207 result = -2;
208 j = LENGTH_MASK/3;
209 while( (n-=2)>0 ){
210 c = *(z+=2);
211 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
212 if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
241 while( --n>0 ){
242 c = *++z;
243 if( c==0 ) return 0; /* \000 byte in a file -> binary */
244 if( c=='\n' ){
245 if( z[-1]=='\r' ){
246 result = -1; /* Contains CR/NL, continue */
247 }
248 j = LENGTH_MASK;
249 }
250 if( --j==0 ){
251 return 0; /* Very long line -> binary */
252
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -181,16 +181,20 @@
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line.
185 **
186 ** (-1) -- The content appears to consist entirely of text, in the
187 ** UTF-16 (LE) encoding.
188 **
189 ** (-2) -- The content appears to consist entirely of text, in the
190 ** UTF-16 (BE) encoding.
191 **
192 ** (-3) -- The content appears to consist entirely of text, with lines
193 ** delimited by carriage-return, line-feed pairs; however, the
194 ** encoding may not be UTF-8.
195 **
 
 
196 */
197 int looks_like_text(const Blob *pContent){
198 unsigned char *z = (unsigned char *) blob_buffer(pContent);
199 unsigned int n = blob_size(pContent);
200 int j;
@@ -202,11 +206,11 @@
206 if( n==0 ) return result; /* Empty file -> text */
207 c = *z;
208 if( c==0 ) return 0; /* \000 byte in a file -> binary */
209 if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
210 if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
211 result = -1;
212 j = LENGTH_MASK/3;
213 while( (n-=2)>0 ){
214 c = *(z+=2);
215 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
216 if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
245 while( --n>0 ){
246 c = *++z;
247 if( c==0 ) return 0; /* \000 byte in a file -> binary */
248 if( c=='\n' ){
249 if( z[-1]=='\r' ){
250 result = -3; /* Contains CR/NL, continue */
251 }
252 j = LENGTH_MASK;
253 }
254 if( --j==0 ){
255 return 0; /* Very long line -> binary */
256
+10 -6
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
4848
"cannot compute difference between binary files\n"
4949
5050
#define DIFF_CANNOT_COMPUTE_SYMLINK \
5151
"cannot compute difference between symlink and regular file\n"
5252
53
-#define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
53
+#define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1)
5454
#endif /* INTERFACE */
5555
5656
/*
5757
** Maximum length of a line in a text file. (8192)
5858
*/
@@ -181,16 +181,20 @@
181181
** not be UTF-8.
182182
**
183183
** (0) -- The content appears to be binary because it contains embedded
184184
** NUL (\000) characters or an extremely long line.
185185
**
186
-** (-1) -- The content appears to consist entirely of text, with lines
186
+** (-1) -- The content appears to consist entirely of text, in the
187
+** UTF-16 (LE) encoding.
188
+**
189
+** (-2) -- The content appears to consist entirely of text, in the
190
+** UTF-16 (BE) encoding.
191
+**
192
+** (-3) -- The content appears to consist entirely of text, with lines
187193
** delimited by carriage-return, line-feed pairs; however, the
188194
** encoding may not be UTF-8.
189195
**
190
-** (-2) -- The content appears to consist entirely of text, in the
191
-** UTF-16 (BE or LE) encoding.
192196
*/
193197
int looks_like_text(const Blob *pContent){
194198
unsigned char *z = (unsigned char *) blob_buffer(pContent);
195199
unsigned int n = blob_size(pContent);
196200
int j;
@@ -202,11 +206,11 @@
202206
if( n==0 ) return result; /* Empty file -> text */
203207
c = *z;
204208
if( c==0 ) return 0; /* \000 byte in a file -> binary */
205209
if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
206210
if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
207
- result = -2;
211
+ result = -1;
208212
j = LENGTH_MASK/3;
209213
while( (n-=2)>0 ){
210214
c = *(z+=2);
211215
if( z[1]==0 ){ /* High-byte must be 0 for further checks */
212216
if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
241245
while( --n>0 ){
242246
c = *++z;
243247
if( c==0 ) return 0; /* \000 byte in a file -> binary */
244248
if( c=='\n' ){
245249
if( z[-1]=='\r' ){
246
- result = -1; /* Contains CR/NL, continue */
250
+ result = -3; /* Contains CR/NL, continue */
247251
}
248252
j = LENGTH_MASK;
249253
}
250254
if( --j==0 ){
251255
return 0; /* Very long line -> binary */
252256
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&1) == 0)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -181,16 +181,20 @@
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line.
185 **
186 ** (-1) -- The content appears to consist entirely of text, with lines
 
 
 
 
 
 
187 ** delimited by carriage-return, line-feed pairs; however, the
188 ** encoding may not be UTF-8.
189 **
190 ** (-2) -- The content appears to consist entirely of text, in the
191 ** UTF-16 (BE or LE) encoding.
192 */
193 int looks_like_text(const Blob *pContent){
194 unsigned char *z = (unsigned char *) blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j;
@@ -202,11 +206,11 @@
202 if( n==0 ) return result; /* Empty file -> text */
203 c = *z;
204 if( c==0 ) return 0; /* \000 byte in a file -> binary */
205 if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
206 if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
207 result = -2;
208 j = LENGTH_MASK/3;
209 while( (n-=2)>0 ){
210 c = *(z+=2);
211 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
212 if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
241 while( --n>0 ){
242 c = *++z;
243 if( c==0 ) return 0; /* \000 byte in a file -> binary */
244 if( c=='\n' ){
245 if( z[-1]=='\r' ){
246 result = -1; /* Contains CR/NL, continue */
247 }
248 j = LENGTH_MASK;
249 }
250 if( --j==0 ){
251 return 0; /* Very long line -> binary */
252
--- src/diff.c
+++ src/diff.c
@@ -48,11 +48,11 @@
48 "cannot compute difference between binary files\n"
49
50 #define DIFF_CANNOT_COMPUTE_SYMLINK \
51 "cannot compute difference between symlink and regular file\n"
52
53 #define looks_like_binary(blob) ((looks_like_text(blob)&3) == 1)
54 #endif /* INTERFACE */
55
56 /*
57 ** Maximum length of a line in a text file. (8192)
58 */
@@ -181,16 +181,20 @@
181 ** not be UTF-8.
182 **
183 ** (0) -- The content appears to be binary because it contains embedded
184 ** NUL (\000) characters or an extremely long line.
185 **
186 ** (-1) -- The content appears to consist entirely of text, in the
187 ** UTF-16 (LE) encoding.
188 **
189 ** (-2) -- The content appears to consist entirely of text, in the
190 ** UTF-16 (BE) encoding.
191 **
192 ** (-3) -- The content appears to consist entirely of text, with lines
193 ** delimited by carriage-return, line-feed pairs; however, the
194 ** encoding may not be UTF-8.
195 **
 
 
196 */
197 int looks_like_text(const Blob *pContent){
198 unsigned char *z = (unsigned char *) blob_buffer(pContent);
199 unsigned int n = blob_size(pContent);
200 int j;
@@ -202,11 +206,11 @@
206 if( n==0 ) return result; /* Empty file -> text */
207 c = *z;
208 if( c==0 ) return 0; /* \000 byte in a file -> binary */
209 if ( (n&1)==0 ){ /* UTF-16 must have an even blob length */
210 if ( (c==0xff) && (z[1]==0xfe) ){ /* UTF-16 LE BOM */
211 result = -1;
212 j = LENGTH_MASK/3;
213 while( (n-=2)>0 ){
214 c = *(z+=2);
215 if( z[1]==0 ){ /* High-byte must be 0 for further checks */
216 if( c==0 ) return 0; /* \000 char in a file -> binary */
@@ -241,11 +245,11 @@
245 while( --n>0 ){
246 c = *++z;
247 if( c==0 ) return 0; /* \000 byte in a file -> binary */
248 if( c=='\n' ){
249 if( z[-1]=='\r' ){
250 result = -3; /* Contains CR/NL, continue */
251 }
252 j = LENGTH_MASK;
253 }
254 if( --j==0 ){
255 return 0; /* Very long line -> binary */
256

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button