| | @@ -336,10 +336,100 @@ |
| 336 | 336 | z[j++] = c; |
| 337 | 337 | } |
| 338 | 338 | if( z[j] ) z[j] = 0; |
| 339 | 339 | } |
| 340 | 340 | |
| 341 | + |
| 342 | +/* |
| 343 | +** The *pz variable points to a UTF8 string. Read the next character |
| 344 | +** off of that string and return its codepoint value. Advance *pz to the |
| 345 | +** next character |
| 346 | +*/ |
| 347 | +u32 fossil_utf8_read( |
| 348 | + const unsigned char **pz /* Pointer to string from which to read char */ |
| 349 | +){ |
| 350 | + unsigned int c; |
| 351 | + |
| 352 | + /* |
| 353 | + ** This lookup table is used to help decode the first byte of |
| 354 | + ** a multi-byte UTF8 character. |
| 355 | + */ |
| 356 | + static const unsigned char utf8Trans1[] = { |
| 357 | + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 358 | + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 359 | + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
| 360 | + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
| 361 | + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 362 | + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
| 363 | + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 364 | + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
| 365 | + }; |
| 366 | + |
| 367 | + c = *((*pz)++); |
| 368 | + if( c>=0xc0 ){ |
| 369 | + c = utf8Trans1[c-0xc0]; |
| 370 | + while( (*(*pz) & 0xc0)==0x80 ){ |
| 371 | + c = (c<<6) + (0x3f & *((*pz)++)); |
| 372 | + } |
| 373 | + if( c<0x80 |
| 374 | + || (c&0xFFFFF800)==0xD800 |
| 375 | + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } |
| 376 | + } |
| 377 | + return c; |
| 378 | +} |
| 379 | + |
| 380 | +/* |
| 381 | +** Encode a UTF8 string for JSON. All special characters are escaped. |
| 382 | +*/ |
| 383 | +void blob_append_json_string(Blob *pBlob, const char *zStr){ |
| 384 | + const unsigned char *z; |
| 385 | + char *zOut; |
| 386 | + u32 c; |
| 387 | + int n, i, j; |
| 388 | + z = (const unsigned char*)zStr; |
| 389 | + n = 0; |
| 390 | + while( (c = fossil_utf8_read(&z))!=0 ){ |
| 391 | + if( c=='\\' || c=='"' ){ |
| 392 | + n += 2; |
| 393 | + }else if( c<' ' || c>=0x7f ){ |
| 394 | + if( c=='\n' || c=='\r' ){ |
| 395 | + n += 2; |
| 396 | + }else{ |
| 397 | + n += 6; |
| 398 | + } |
| 399 | + }else{ |
| 400 | + n++; |
| 401 | + } |
| 402 | + } |
| 403 | + i = blob_size(pBlob); |
| 404 | + blob_resize(pBlob, i+n); |
| 405 | + zOut = blob_buffer(pBlob); |
| 406 | + z = (const unsigned char*)zStr; |
| 407 | + while( (c = fossil_utf8_read(&z))!=0 ){ |
| 408 | + if( c=='\\' ){ |
| 409 | + zOut[i++] = '\\'; |
| 410 | + zOut[i++] = c; |
| 411 | + }else if( c<' ' || c>=0x7f ){ |
| 412 | + zOut[i++] = '\\'; |
| 413 | + if( c=='\n' ){ |
| 414 | + zOut[i++] = 'n'; |
| 415 | + }else if( c=='\r' ){ |
| 416 | + zOut[i++] = 'r'; |
| 417 | + }else{ |
| 418 | + zOut[i++] = 'u'; |
| 419 | + for(j=3; j>=0; j--){ |
| 420 | + zOut[i+j] = "0123456789abcdef"[c&0xf]; |
| 421 | + c >>= 4; |
| 422 | + } |
| 423 | + i += 4; |
| 424 | + } |
| 425 | + }else{ |
| 426 | + zOut[i++] = c; |
| 427 | + } |
| 428 | + } |
| 429 | + zOut[i] = 0; |
| 430 | +} |
| 341 | 431 | |
| 342 | 432 | /* |
| 343 | 433 | ** The characters used for HTTP base64 encoding. |
| 344 | 434 | */ |
| 345 | 435 | static unsigned char zBase[] = |
| 346 | 436 | |