Fossil SCM
Custom mimetype case-folding is now performed only on the file extensions and only on demand, rather than lower-casing the whole input in advance.
Commit
e34ea5b6b80ab7afc9fe7faef3b2db502d3c8284069875db11421504c833d216
Parent
b55dfc97a1d8a1c…
1 file changed
+24
-11
+24
-11
| --- src/doc.c | ||
| +++ src/doc.c | ||
| @@ -310,10 +310,15 @@ | ||
| 310 | 310 | /* |
| 311 | 311 | ** Looks in the contents of the "mimetypes" setting for a suffix |
| 312 | 312 | ** matching zSuffix. If found, it returns the configured value |
| 313 | 313 | ** in memory owned by the app (i.e. do not free() it), else it |
| 314 | 314 | ** returns 0. |
| 315 | +** | |
| 316 | +** The mimetypes setting is expected to be a list of file extensions | |
| 317 | +** and mimetypes, with one such mapping per line. A leading '.' on | |
| 318 | +** extensions is permitted for compatibility with lists imported from | |
| 319 | +** other tools which require them. | |
| 315 | 320 | */ |
| 316 | 321 | static const char *mimetype_from_name_custom(const char *zSuffix){ |
| 317 | 322 | static char * zList = 0; |
| 318 | 323 | static char const * zEnd = 0; |
| 319 | 324 | static int once = 0; |
| @@ -324,19 +329,17 @@ | ||
| 324 | 329 | once = 1; |
| 325 | 330 | zList = db_get("mimetypes",0); |
| 326 | 331 | if(zList==0){ |
| 327 | 332 | return 0; |
| 328 | 333 | } |
| 329 | - /* Initialize zList and transform it to simplify | |
| 330 | - the main loop. */ | |
| 334 | + /* Transform zList to simplify the main loop: | |
| 335 | + replace non-newline spaces with NUL bytes. */ | |
| 331 | 336 | zEnd = zList + strlen(zList); |
| 332 | 337 | for(z = zList; z<zEnd; ++z){ |
| 333 | 338 | if('\n'==*z) continue; |
| 334 | 339 | else if(fossil_isspace(*z)){ |
| 335 | 340 | *z = 0; |
| 336 | - }else if(!(0x80 & *z)){ | |
| 337 | - *z = (char)fossil_tolower(*z); | |
| 338 | 341 | } |
| 339 | 342 | } |
| 340 | 343 | }else if(zList==0){ |
| 341 | 344 | return 0; |
| 342 | 345 | } |
| @@ -346,41 +349,51 @@ | ||
| 346 | 349 | if(*z==0){ |
| 347 | 350 | ++z; |
| 348 | 351 | continue; |
| 349 | 352 | } |
| 350 | 353 | else if('\n'==*z){ |
| 351 | - /* May happen on malformed inputs. Skip this record. */ | |
| 352 | 354 | if(2==tokenizerState){ |
| 353 | 355 | /* We were expecting a value for a successful match |
| 354 | 356 | here, but got no value. Bail out. */ |
| 355 | 357 | break; |
| 356 | 358 | }else{ |
| 359 | + /* May happen on malformed inputs. Skip this record. */ | |
| 357 | 360 | tokenizerState = 0; |
| 358 | 361 | ++z; |
| 359 | 362 | continue; |
| 360 | 363 | } |
| 361 | 364 | } |
| 362 | 365 | switch(tokenizerState){ |
| 363 | - case 0: /* This is a file extension */ | |
| 366 | + case 0:{ /* This is a file extension */ | |
| 367 | + static char * zCase = 0; | |
| 364 | 368 | if('.'==*z){ |
| 365 | 369 | /*ignore an optional leading dot, for compatibility |
| 366 | 370 | with some external mimetype lists*/; |
| 367 | 371 | if(++z==zEnd){ |
| 368 | 372 | break; |
| 369 | 373 | } |
| 370 | - } | |
| 374 | + } | |
| 375 | + if(zCase<z){ | |
| 376 | + /*we have not yet case-folded this section: lower-case it*/ | |
| 377 | + for(zCase = z; zCase<zEnd && *zCase!=0; ++zCase){ | |
| 378 | + if(!(0x80 & *zCase)){ | |
| 379 | + *zCase = (char)fossil_tolower(*zCase); | |
| 380 | + } | |
| 381 | + } | |
| 382 | + } | |
| 371 | 383 | if(strcmp(z,zSuffix)==0){ |
| 372 | - tokenizerState = 2 /*Match: accept the next value. */; | |
| 384 | + tokenizerState = 2 /* Match: accept the next value. */; | |
| 373 | 385 | }else{ |
| 374 | - tokenizerState = 1 /* No match: skip the next value */; | |
| 386 | + tokenizerState = 1 /* No match: skip the next value. */; | |
| 375 | 387 | } |
| 376 | 388 | z += strlen(z); |
| 377 | 389 | break; |
| 390 | + } | |
| 378 | 391 | case 1: /* This is a value, but not a match. Skip it. */ |
| 379 | 392 | z += strlen(z); |
| 380 | 393 | break; |
| 381 | - case 2: /* This is the value which matched the previous key */; | |
| 394 | + case 2: /* This is the value which matched the previous key. */; | |
| 382 | 395 | return z; |
| 383 | 396 | default: |
| 384 | 397 | assert(!"cannot happen - invalid tokenizerState value."); |
| 385 | 398 | } |
| 386 | 399 | } |
| @@ -413,15 +426,15 @@ | ||
| 413 | 426 | if( zName[i]=='.' ) z = &zName[i+1]; |
| 414 | 427 | } |
| 415 | 428 | len = strlen(z); |
| 416 | 429 | if( len<sizeof(zSuffix)-1 ){ |
| 417 | 430 | sqlite3_snprintf(sizeof(zSuffix), zSuffix, "%s", z); |
| 431 | + for(i=0; zSuffix[i]; i++) zSuffix[i] = fossil_tolower(zSuffix[i]); | |
| 418 | 432 | z = mimetype_from_name_custom(zSuffix); |
| 419 | 433 | if(z!=0){ |
| 420 | 434 | return z; |
| 421 | 435 | } |
| 422 | - for(i=0; zSuffix[i]; i++) zSuffix[i] = fossil_tolower(zSuffix[i]); | |
| 423 | 436 | first = 0; |
| 424 | 437 | last = count(aMime) - 1; |
| 425 | 438 | while( first<=last ){ |
| 426 | 439 | int c; |
| 427 | 440 | i = (first+last)/2; |
| 428 | 441 |
| --- src/doc.c | |
| +++ src/doc.c | |
| @@ -310,10 +310,15 @@ | |
| 310 | /* |
| 311 | ** Looks in the contents of the "mimetypes" setting for a suffix |
| 312 | ** matching zSuffix. If found, it returns the configured value |
| 313 | ** in memory owned by the app (i.e. do not free() it), else it |
| 314 | ** returns 0. |
| 315 | */ |
| 316 | static const char *mimetype_from_name_custom(const char *zSuffix){ |
| 317 | static char * zList = 0; |
| 318 | static char const * zEnd = 0; |
| 319 | static int once = 0; |
| @@ -324,19 +329,17 @@ | |
| 324 | once = 1; |
| 325 | zList = db_get("mimetypes",0); |
| 326 | if(zList==0){ |
| 327 | return 0; |
| 328 | } |
| 329 | /* Initialize zList and transform it to simplify |
| 330 | the main loop. */ |
| 331 | zEnd = zList + strlen(zList); |
| 332 | for(z = zList; z<zEnd; ++z){ |
| 333 | if('\n'==*z) continue; |
| 334 | else if(fossil_isspace(*z)){ |
| 335 | *z = 0; |
| 336 | }else if(!(0x80 & *z)){ |
| 337 | *z = (char)fossil_tolower(*z); |
| 338 | } |
| 339 | } |
| 340 | }else if(zList==0){ |
| 341 | return 0; |
| 342 | } |
| @@ -346,41 +349,51 @@ | |
| 346 | if(*z==0){ |
| 347 | ++z; |
| 348 | continue; |
| 349 | } |
| 350 | else if('\n'==*z){ |
| 351 | /* May happen on malformed inputs. Skip this record. */ |
| 352 | if(2==tokenizerState){ |
| 353 | /* We were expecting a value for a successful match |
| 354 | here, but got no value. Bail out. */ |
| 355 | break; |
| 356 | }else{ |
| 357 | tokenizerState = 0; |
| 358 | ++z; |
| 359 | continue; |
| 360 | } |
| 361 | } |
| 362 | switch(tokenizerState){ |
| 363 | case 0: /* This is a file extension */ |
| 364 | if('.'==*z){ |
| 365 | /*ignore an optional leading dot, for compatibility |
| 366 | with some external mimetype lists*/; |
| 367 | if(++z==zEnd){ |
| 368 | break; |
| 369 | } |
| 370 | } |
| 371 | if(strcmp(z,zSuffix)==0){ |
| 372 | tokenizerState = 2 /*Match: accept the next value. */; |
| 373 | }else{ |
| 374 | tokenizerState = 1 /* No match: skip the next value */; |
| 375 | } |
| 376 | z += strlen(z); |
| 377 | break; |
| 378 | case 1: /* This is a value, but not a match. Skip it. */ |
| 379 | z += strlen(z); |
| 380 | break; |
| 381 | case 2: /* This is the value which matched the previous key */; |
| 382 | return z; |
| 383 | default: |
| 384 | assert(!"cannot happen - invalid tokenizerState value."); |
| 385 | } |
| 386 | } |
| @@ -413,15 +426,15 @@ | |
| 413 | if( zName[i]=='.' ) z = &zName[i+1]; |
| 414 | } |
| 415 | len = strlen(z); |
| 416 | if( len<sizeof(zSuffix)-1 ){ |
| 417 | sqlite3_snprintf(sizeof(zSuffix), zSuffix, "%s", z); |
| 418 | z = mimetype_from_name_custom(zSuffix); |
| 419 | if(z!=0){ |
| 420 | return z; |
| 421 | } |
| 422 | for(i=0; zSuffix[i]; i++) zSuffix[i] = fossil_tolower(zSuffix[i]); |
| 423 | first = 0; |
| 424 | last = count(aMime) - 1; |
| 425 | while( first<=last ){ |
| 426 | int c; |
| 427 | i = (first+last)/2; |
| 428 |
| --- src/doc.c | |
| +++ src/doc.c | |
| @@ -310,10 +310,15 @@ | |
| 310 | /* |
| 311 | ** Looks in the contents of the "mimetypes" setting for a suffix |
| 312 | ** matching zSuffix. If found, it returns the configured value |
| 313 | ** in memory owned by the app (i.e. do not free() it), else it |
| 314 | ** returns 0. |
| 315 | ** |
| 316 | ** The mimetypes setting is expected to be a list of file extensions |
| 317 | ** and mimetypes, with one such mapping per line. A leading '.' on |
| 318 | ** extensions is permitted for compatibility with lists imported from |
| 319 | ** other tools which require them. |
| 320 | */ |
| 321 | static const char *mimetype_from_name_custom(const char *zSuffix){ |
| 322 | static char * zList = 0; |
| 323 | static char const * zEnd = 0; |
| 324 | static int once = 0; |
| @@ -324,19 +329,17 @@ | |
| 329 | once = 1; |
| 330 | zList = db_get("mimetypes",0); |
| 331 | if(zList==0){ |
| 332 | return 0; |
| 333 | } |
| 334 | /* Transform zList to simplify the main loop: |
| 335 | replace non-newline spaces with NUL bytes. */ |
| 336 | zEnd = zList + strlen(zList); |
| 337 | for(z = zList; z<zEnd; ++z){ |
| 338 | if('\n'==*z) continue; |
| 339 | else if(fossil_isspace(*z)){ |
| 340 | *z = 0; |
| 341 | } |
| 342 | } |
| 343 | }else if(zList==0){ |
| 344 | return 0; |
| 345 | } |
| @@ -346,41 +349,51 @@ | |
| 349 | if(*z==0){ |
| 350 | ++z; |
| 351 | continue; |
| 352 | } |
| 353 | else if('\n'==*z){ |
| 354 | if(2==tokenizerState){ |
| 355 | /* We were expecting a value for a successful match |
| 356 | here, but got no value. Bail out. */ |
| 357 | break; |
| 358 | }else{ |
| 359 | /* May happen on malformed inputs. Skip this record. */ |
| 360 | tokenizerState = 0; |
| 361 | ++z; |
| 362 | continue; |
| 363 | } |
| 364 | } |
| 365 | switch(tokenizerState){ |
| 366 | case 0:{ /* This is a file extension */ |
| 367 | static char * zCase = 0; |
| 368 | if('.'==*z){ |
| 369 | /*ignore an optional leading dot, for compatibility |
| 370 | with some external mimetype lists*/; |
| 371 | if(++z==zEnd){ |
| 372 | break; |
| 373 | } |
| 374 | } |
| 375 | if(zCase<z){ |
| 376 | /*we have not yet case-folded this section: lower-case it*/ |
| 377 | for(zCase = z; zCase<zEnd && *zCase!=0; ++zCase){ |
| 378 | if(!(0x80 & *zCase)){ |
| 379 | *zCase = (char)fossil_tolower(*zCase); |
| 380 | } |
| 381 | } |
| 382 | } |
| 383 | if(strcmp(z,zSuffix)==0){ |
| 384 | tokenizerState = 2 /* Match: accept the next value. */; |
| 385 | }else{ |
| 386 | tokenizerState = 1 /* No match: skip the next value. */; |
| 387 | } |
| 388 | z += strlen(z); |
| 389 | break; |
| 390 | } |
| 391 | case 1: /* This is a value, but not a match. Skip it. */ |
| 392 | z += strlen(z); |
| 393 | break; |
| 394 | case 2: /* This is the value which matched the previous key. */; |
| 395 | return z; |
| 396 | default: |
| 397 | assert(!"cannot happen - invalid tokenizerState value."); |
| 398 | } |
| 399 | } |
| @@ -413,15 +426,15 @@ | |
| 426 | if( zName[i]=='.' ) z = &zName[i+1]; |
| 427 | } |
| 428 | len = strlen(z); |
| 429 | if( len<sizeof(zSuffix)-1 ){ |
| 430 | sqlite3_snprintf(sizeof(zSuffix), zSuffix, "%s", z); |
| 431 | for(i=0; zSuffix[i]; i++) zSuffix[i] = fossil_tolower(zSuffix[i]); |
| 432 | z = mimetype_from_name_custom(zSuffix); |
| 433 | if(z!=0){ |
| 434 | return z; |
| 435 | } |
| 436 | first = 0; |
| 437 | last = count(aMime) - 1; |
| 438 | while( first<=last ){ |
| 439 | int c; |
| 440 | i = (first+last)/2; |
| 441 |