Fossil SCM

Faster determination of binary files, by not only checking for NUL <p>re-use looks_like_blob

jan.nijtmans 2012-10-30 20:10 UTC trunk
Commit 0ba08f9d26ecb3a53d79d05d3f95dfc8cc36c248
2 files changed +12 -6 +1 -12
+12 -6
--- src/diff.c
+++ src/diff.c
@@ -189,31 +189,37 @@
189189
**
190190
** (-2) -- The content appears to consist entirely of text, in the
191191
** UTF-16 (BE or LE) encoding.
192192
*/
193193
int looks_like_text(const Blob *pContent){
194
- const char *z = blob_buffer(pContent);
194
+ const unsigned char *z = blob_buffer(pContent);
195195
unsigned int n = blob_size(pContent);
196
- int j, c;
196
+ int j;
197
+ unsigned char c;
197198
int result = 1; /* Assume text with no CR/NL */
199
+ static const char isBinary[256] = {
200
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
201
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1
202
+ };
203
+
198204
199205
/* Check individual lines.
200206
*/
201207
if( n==0 ) return result; /* Empty file -> text */
202208
c = *z;
203
- if( c==0 ) return 0; /* \000 byte in a file -> binary */
209
+ if( isBinary[c] ) return 0; /* non-text byte in a file -> binary */
204210
if ( n > 1 ){
205
- if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
211
+ if ( (c==0xff) && (z[1]==0xfe) ){
206212
return -2;
207
- } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
213
+ } else if ( (c==0xfe) && (z[1]==0xff) ){
208214
return -2;
209215
}
210216
}
211217
j = (c!='\n');
212218
while( --n>0 ){
213219
c = *++z; ++j;
214
- if( c==0 ) return 0; /* \000 byte in a file -> binary */
220
+ if( isBinary[c] ) return 0; /* \000 byte in a file -> binary */
215221
if( c=='\n' ){
216222
if( z[-1]=='\r' ){
217223
result = -1; /* Contains CR/NL, continue */
218224
}
219225
if( j>LENGTH_MASK ){
220226
--- src/diff.c
+++ src/diff.c
@@ -189,31 +189,37 @@
189 **
190 ** (-2) -- The content appears to consist entirely of text, in the
191 ** UTF-16 (BE or LE) encoding.
192 */
193 int looks_like_text(const Blob *pContent){
194 const char *z = blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j, c;
 
197 int result = 1; /* Assume text with no CR/NL */
 
 
 
 
 
198
199 /* Check individual lines.
200 */
201 if( n==0 ) return result; /* Empty file -> text */
202 c = *z;
203 if( c==0 ) return 0; /* \000 byte in a file -> binary */
204 if ( n > 1 ){
205 if ( (c==(char)0xff) && (z[1]==(char)0xfe) ){
206 return -2;
207 } else if ( (c==(char)0xfe) && (z[1]==(char)0xff) ){
208 return -2;
209 }
210 }
211 j = (c!='\n');
212 while( --n>0 ){
213 c = *++z; ++j;
214 if( c==0 ) return 0; /* \000 byte in a file -> binary */
215 if( c=='\n' ){
216 if( z[-1]=='\r' ){
217 result = -1; /* Contains CR/NL, continue */
218 }
219 if( j>LENGTH_MASK ){
220
--- src/diff.c
+++ src/diff.c
@@ -189,31 +189,37 @@
189 **
190 ** (-2) -- The content appears to consist entirely of text, in the
191 ** UTF-16 (BE or LE) encoding.
192 */
193 int looks_like_text(const Blob *pContent){
194 const unsigned char *z = blob_buffer(pContent);
195 unsigned int n = blob_size(pContent);
196 int j;
197 unsigned char c;
198 int result = 1; /* Assume text with no CR/NL */
199 static const char isBinary[256] = {
200 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
201 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1
202 };
203
204
205 /* Check individual lines.
206 */
207 if( n==0 ) return result; /* Empty file -> text */
208 c = *z;
209 if( isBinary[c] ) return 0; /* non-text byte in a file -> binary */
210 if ( n > 1 ){
211 if ( (c==0xff) && (z[1]==0xfe) ){
212 return -2;
213 } else if ( (c==0xfe) && (z[1]==0xff) ){
214 return -2;
215 }
216 }
217 j = (c!='\n');
218 while( --n>0 ){
219 c = *++z; ++j;
220 if( isBinary[c] ) return 0; /* \000 byte in a file -> binary */
221 if( c=='\n' ){
222 if( z[-1]=='\r' ){
223 result = -1; /* Contains CR/NL, continue */
224 }
225 if( j>LENGTH_MASK ){
226
+1 -12
--- src/doc.c
+++ src/doc.c
@@ -35,15 +35,10 @@
3535
const char *mimetype_from_content(Blob *pBlob){
3636
int i;
3737
int n;
3838
const unsigned char *x;
3939
40
- static const char isBinary[] = {
41
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1,
42
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
43
- };
44
-
4540
/* A table of mimetypes based on file content prefixes
4641
*/
4742
static const struct {
4843
const char *zPrefix; /* The file prefix */
4944
int size; /* Length of the prefix */
@@ -56,17 +51,11 @@
5651
{ "\377\330\377", 3, "image/jpeg" },
5752
};
5853
5954
x = (const unsigned char*)blob_buffer(pBlob);
6055
n = blob_size(pBlob);
61
- for(i=0; i<n; i++){
62
- unsigned char c = x[i];
63
- if( c<=0x1f && isBinary[c] ){
64
- break;
65
- }
66
- }
67
- if( i>=n ){
56
+ if( looks_like_text(pBlob) ){
6857
return 0; /* Plain text */
6958
}
7059
for(i=0; i<sizeof(aMime)/sizeof(aMime[0]); i++){
7160
if( n>=aMime[i].size && memcmp(x, aMime[i].zPrefix, aMime[i].size)==0 ){
7261
return aMime[i].zMimetype;
7362
--- src/doc.c
+++ src/doc.c
@@ -35,15 +35,10 @@
35 const char *mimetype_from_content(Blob *pBlob){
36 int i;
37 int n;
38 const unsigned char *x;
39
40 static const char isBinary[] = {
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
43 };
44
45 /* A table of mimetypes based on file content prefixes
46 */
47 static const struct {
48 const char *zPrefix; /* The file prefix */
49 int size; /* Length of the prefix */
@@ -56,17 +51,11 @@
56 { "\377\330\377", 3, "image/jpeg" },
57 };
58
59 x = (const unsigned char*)blob_buffer(pBlob);
60 n = blob_size(pBlob);
61 for(i=0; i<n; i++){
62 unsigned char c = x[i];
63 if( c<=0x1f && isBinary[c] ){
64 break;
65 }
66 }
67 if( i>=n ){
68 return 0; /* Plain text */
69 }
70 for(i=0; i<sizeof(aMime)/sizeof(aMime[0]); i++){
71 if( n>=aMime[i].size && memcmp(x, aMime[i].zPrefix, aMime[i].size)==0 ){
72 return aMime[i].zMimetype;
73
--- src/doc.c
+++ src/doc.c
@@ -35,15 +35,10 @@
35 const char *mimetype_from_content(Blob *pBlob){
36 int i;
37 int n;
38 const unsigned char *x;
39
 
 
 
 
 
40 /* A table of mimetypes based on file content prefixes
41 */
42 static const struct {
43 const char *zPrefix; /* The file prefix */
44 int size; /* Length of the prefix */
@@ -56,17 +51,11 @@
51 { "\377\330\377", 3, "image/jpeg" },
52 };
53
54 x = (const unsigned char*)blob_buffer(pBlob);
55 n = blob_size(pBlob);
56 if( looks_like_text(pBlob) ){
 
 
 
 
 
 
57 return 0; /* Plain text */
58 }
59 for(i=0; i<sizeof(aMime)/sizeof(aMime[0]); i++){
60 if( n>=aMime[i].size && memcmp(x, aMime[i].zPrefix, aMime[i].size)==0 ){
61 return aMime[i].zMimetype;
62

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button