Fossil SCM
Rework the algorithm to consume a certain number of bits from the string, rather than using modulus arithmetic --- this is much friendlier on partial hashes, at the expense of being a bit less efficient.
Commit
0160d40b5d0c345b2efb27e5f6e1a0e4716efb88
Parent
15090b9450aa34e…
1 file changed
+113
-204
+113
-204
| --- src/mnemonic.c | ||
| +++ src/mnemonic.c | ||
| @@ -2,11 +2,10 @@ | ||
| 2 | 2 | ** Copyright (c) 2008 D. Richard Hipp |
| 3 | 3 | ** |
| 4 | 4 | ** This program is free software; you can redistribute it and/or |
| 5 | 5 | ** modify it under the terms of the Simplified BSD License (also |
| 6 | 6 | ** known as the "2-Clause License" or "FreeBSD License".) |
| 7 | - | |
| 8 | 7 | ** This program is distributed in the hope that it will be useful, |
| 9 | 8 | ** but without any warranty; without even the implied warranty of |
| 10 | 9 | ** merchantability or fitness for a particular purpose. |
| 11 | 10 | ** |
| 12 | 11 | ** Author contact information: |
| @@ -14,23 +13,33 @@ | ||
| 14 | 13 | ** http://www.hwaci.com/drh/ |
| 15 | 14 | ** |
| 16 | 15 | ******************************************************************************* |
| 17 | 16 | ** |
| 18 | 17 | ** This file contains the code to do mnemonic encoding of commit IDs. The |
| 19 | -** wordlist and algorithm is taken from: | |
| 18 | +** wordlist and algorithm were originally taken from: | |
| 20 | 19 | ** |
| 21 | 20 | ** https://github.com/singpolyma/mnemonicode |
| 22 | 21 | ** |
| 23 | -** The original code is MIT licensed, but this code uses a reimplementation | |
| 24 | -** from scratch anyway. | |
| 22 | +** ...which is BSD licensed. However, the algorithm has since been heavily | |
| 23 | +** modified to more fit Fossil's special needs. | |
| 24 | +** | |
| 25 | +** The new algorithm works by reading hex characters one at a time and | |
| 26 | +** accumulating bits into a ring buffer. When available, 10 bits are consumed | |
| 27 | +** and used as a word index. This only uses 1024 of the available 1626 words | |
| 28 | +** but preserves the property that encoded strings may be truncated at any | |
| 29 | +** point and the resulting hex string, when decoded, is a truncated form of | |
| 30 | +** the original hex string. It is, unfortunately, slightly less efficient, and | |
| 31 | +** three words can only encode seven hex digits. | |
| 25 | 32 | */ |
| 26 | 33 | |
| 27 | 34 | #include "config.h" |
| 28 | 35 | #include "mnemonic.h" |
| 29 | 36 | |
| 30 | 37 | #define MN_BASE 1626 |
| 31 | -#define MN_REMAINDER 7 | |
| 38 | + | |
| 39 | +/* Number of bits to encode as a word. 10 is the maximum. */ | |
| 40 | +#define CHUNK_SIZE 10 | |
| 32 | 41 | |
| 33 | 42 | static const char* basewordlist[MN_BASE] = |
| 34 | 43 | { |
| 35 | 44 | "ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL", |
| 36 | 45 | "ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT", |
| @@ -303,37 +312,28 @@ | ||
| 303 | 312 | "WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL", |
| 304 | 313 | "WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC", |
| 305 | 314 | "RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN" |
| 306 | 315 | }; |
| 307 | 316 | |
| 308 | -static const char* extwordlist[MN_REMAINDER] = | |
| 309 | -{ | |
| 310 | - "EGO", "FAX", "JET", "JOB", "RIO", "SKI", | |
| 311 | - "YES" | |
| 312 | -}; | |
| 313 | - | |
| 314 | 317 | /* |
| 315 | -** String comparator which supports both '\0' and ' ' as a terminator | |
| 316 | -** character. | |
| 318 | +** Special string comparator. | |
| 317 | 319 | */ |
| 318 | 320 | |
| 319 | -static int strspacecmp(const char* s1, const char* s2) | |
| 321 | +static int strspecialcmp(const char* s1, const char* s2) | |
| 320 | 322 | { |
| 321 | 323 | int c1, c2; |
| 322 | 324 | |
| 323 | 325 | do |
| 324 | 326 | { |
| 325 | - c1 = *s1++; | |
| 326 | - c2 = *s2++; | |
| 327 | - if ((c1 == '\0') || (c2 == ' ')) | |
| 328 | - { | |
| 329 | - if (c1 == ' ') | |
| 330 | - c1 = '\0'; | |
| 331 | - if (c2 == ' ') | |
| 332 | - c2 = '\0'; | |
| 333 | - break; | |
| 334 | - } | |
| 327 | + c1 = toupper(*s1++); | |
| 328 | + c2 = toupper(*s2++); | |
| 329 | + if ((c1 == ' ') || (c1 == '-') || (c1 == '_')) | |
| 330 | + c1 = 0; | |
| 331 | + if ((c2 == ' ') || (c2 == '-') || (c2 == '_')) | |
| 332 | + c2 = 0; | |
| 333 | + if (!c1 || !c2) | |
| 334 | + break; | |
| 335 | 335 | } |
| 336 | 336 | while (c1 == c2); |
| 337 | 337 | |
| 338 | 338 | return c1 - c2; |
| 339 | 339 | } |
| @@ -346,11 +346,11 @@ | ||
| 346 | 346 | static int baseword_to_index(const char* word) |
| 347 | 347 | { |
| 348 | 348 | int i; |
| 349 | 349 | |
| 350 | 350 | for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++) |
| 351 | - if (strspacecmp(basewordlist[i], word) == 0) | |
| 351 | + if (strspecialcmp(basewordlist[i], word) == 0) | |
| 352 | 352 | return i; |
| 353 | 353 | |
| 354 | 354 | return -1; |
| 355 | 355 | } |
| 356 | 356 | |
| @@ -361,104 +361,61 @@ | ||
| 361 | 361 | static const char* index_to_baseword(int index) |
| 362 | 362 | { |
| 363 | 363 | return basewordlist[index]; |
| 364 | 364 | } |
| 365 | 365 | |
| 366 | -/* | |
| 367 | -** Look up a word in the extended dictionary and return its index; -1 if the | |
| 368 | -** word is not in the dictionary. | |
| 369 | -*/ | |
| 370 | - | |
| 371 | -static int extword_to_index(const char* word) | |
| 372 | -{ | |
| 373 | - int i; | |
| 374 | - | |
| 375 | - for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++) | |
| 376 | - if (strspacecmp(extwordlist[i], word) == 0) | |
| 377 | - return i; | |
| 378 | - | |
| 379 | - return -1; | |
| 380 | -} | |
| 381 | - | |
| 382 | -/* | |
| 383 | -** Look up a word by index in the extended dictionary and return the string. | |
| 384 | -*/ | |
| 385 | - | |
| 386 | -static const char* index_to_extword(int index) | |
| 387 | -{ | |
| 388 | - return extwordlist[index]; | |
| 389 | -} | |
| 390 | - | |
| 391 | 366 | /* |
| 392 | 367 | ** Encode a hex-character string using mnemonic encoding, writing the |
| 393 | 368 | ** result into the supplied blob. |
| 394 | 369 | */ |
| 395 | 370 | |
| 396 | 371 | int mnemonic_encode(const char* zString, Blob* xBlob) |
| 397 | 372 | { |
| 398 | - int stringlen = strlen(zString); | |
| 399 | - unsigned int accumulator; | |
| 400 | - int charsread; | |
| 373 | + unsigned int buffer = 0; | |
| 374 | + int buflen = 0; | |
| 401 | 375 | int first = 1; |
| 402 | - | |
| 403 | - while (stringlen > 0) | |
| 404 | - { | |
| 405 | - /* Read up to four bytes worth of digest into the accumulator. */ | |
| 406 | - | |
| 407 | - { | |
| 408 | - int i = sscanf(zString, "%8x%n", &accumulator, &charsread); | |
| 409 | - if (i == 0) | |
| 410 | - return 0; | |
| 411 | - if (i == EOF) | |
| 412 | - break; | |
| 413 | - if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */ | |
| 414 | - return 0; | |
| 415 | - | |
| 416 | - zString += charsread; | |
| 417 | - } | |
| 376 | + int value; | |
| 377 | + | |
| 378 | + for (;;) | |
| 379 | + { | |
| 380 | + /* Read hex characters into the buffer until we have at least CHUNK_SIZE | |
| 381 | + * bits. */ | |
| 382 | + | |
| 383 | + while (buflen < CHUNK_SIZE) | |
| 384 | + { | |
| 385 | + int i = sscanf(zString, "%1x", &value); | |
| 386 | + if (i == 0) /* Parse error? */ | |
| 387 | + return 0; | |
| 388 | + if (i == EOF) /* End of line */ | |
| 389 | + break; | |
| 390 | + buffer <<= 4; | |
| 391 | + buffer |= value; | |
| 392 | + buflen += 4; | |
| 393 | + zString++; | |
| 394 | + } | |
| 395 | + | |
| 396 | + if (buflen < CHUNK_SIZE) | |
| 397 | + { | |
| 398 | + /* Run out of available characters --- give up. */ | |
| 399 | + break; | |
| 400 | + } | |
| 401 | + | |
| 402 | + /* Extract ten bits of data. */ | |
| 403 | + | |
| 404 | + value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1); | |
| 405 | + buflen -= CHUNK_SIZE; | |
| 418 | 406 | |
| 419 | 407 | /* Insert a leading space, if necessary. */ |
| 420 | 408 | |
| 421 | 409 | if (!first) |
| 422 | 410 | blob_appendf(xBlob, " "); |
| 423 | 411 | first = 0; |
| 424 | 412 | |
| 425 | - /* Actually do the encoding. */ | |
| 426 | - | |
| 427 | - { | |
| 428 | - switch (charsread) | |
| 429 | - { | |
| 430 | - /* 24-bit encodings are special. */ | |
| 431 | - | |
| 432 | - case 6: | |
| 433 | - blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); | |
| 434 | - accumulator /= MN_BASE; | |
| 435 | - | |
| 436 | - blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); | |
| 437 | - accumulator /= MN_BASE; | |
| 438 | - | |
| 439 | - blob_appendf(xBlob, "%s", index_to_extword(accumulator)); | |
| 440 | - break; | |
| 441 | - | |
| 442 | - /* Otherwise, just follow the common pattern. */ | |
| 443 | - | |
| 444 | - case 8: | |
| 445 | - blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); | |
| 446 | - accumulator /= MN_BASE; | |
| 447 | - /* fall through */ | |
| 448 | - | |
| 449 | - case 4: | |
| 450 | - blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); | |
| 451 | - accumulator /= MN_BASE; | |
| 452 | - /* fall through */ | |
| 453 | - | |
| 454 | - case 2: | |
| 455 | - blob_appendf(xBlob, "%s", index_to_baseword(accumulator)); | |
| 456 | - break; | |
| 457 | - } | |
| 458 | - } | |
| 459 | - } | |
| 413 | + /* Actually encode a word. */ | |
| 414 | + | |
| 415 | + blob_appendf(xBlob, "%s", index_to_baseword(value)); | |
| 416 | + } | |
| 460 | 417 | |
| 461 | 418 | return 1; |
| 462 | 419 | } |
| 463 | 420 | |
| 464 | 421 | /* |
| @@ -466,104 +423,55 @@ | ||
| 466 | 423 | ** string into the supplied blob. |
| 467 | 424 | */ |
| 468 | 425 | |
| 469 | 426 | int mnemonic_decode(const char* zString, Blob* xBlob) |
| 470 | 427 | { |
| 471 | - const char* word1; | |
| 472 | - const char* word2; | |
| 473 | - const char* word3; | |
| 474 | - const char* p; | |
| 475 | - int i; | |
| 476 | - unsigned int value; | |
| 477 | - | |
| 478 | - do | |
| 479 | - { | |
| 480 | - word1 = word2 = word3 = NULL; | |
| 481 | - | |
| 482 | - /* Read up to three words from the string. */ | |
| 483 | - | |
| 484 | - word1 = zString; | |
| 485 | - p = strchr(zString, ' '); | |
| 486 | - if (!p) | |
| 487 | - zString = NULL; | |
| 488 | - else | |
| 489 | - { | |
| 490 | - zString = p+1; | |
| 491 | - word2 = zString; | |
| 492 | - p = strchr(zString, ' '); | |
| 493 | - if (!p) | |
| 494 | - zString = NULL; | |
| 495 | - else | |
| 496 | - { | |
| 497 | - zString = p+1; | |
| 498 | - word3 = zString; | |
| 499 | - p = strchr(zString, ' '); | |
| 500 | - if (!p) | |
| 501 | - zString = NULL; | |
| 502 | - else | |
| 503 | - zString = p+1; | |
| 504 | - } | |
| 505 | - } | |
| 506 | - | |
| 507 | - if (word1 && !word2 && !word3) | |
| 508 | - { | |
| 509 | - /* One word means one byte. */ | |
| 510 | - | |
| 511 | - i = baseword_to_index(word1); | |
| 512 | - if (i == -1) | |
| 513 | - return 0; | |
| 514 | - | |
| 515 | - blob_appendf(xBlob, "%02x", i); | |
| 516 | - } | |
| 517 | - else if (word1 && word2 && !word3) | |
| 518 | - { | |
| 519 | - /* Two words means two bytes. */ | |
| 520 | - | |
| 521 | - i = baseword_to_index(word1); | |
| 522 | - if (i == -1) | |
| 523 | - return 0; | |
| 524 | - value = i; | |
| 525 | - | |
| 526 | - i = baseword_to_index(word2); | |
| 527 | - if (i == -1) | |
| 528 | - return 0; | |
| 529 | - value += i*MN_BASE; | |
| 530 | - | |
| 531 | - blob_appendf(xBlob, "%04x", value); | |
| 532 | - } | |
| 533 | - else | |
| 534 | - { | |
| 535 | - /* Three words means either three or four bytes, depending on what | |
| 536 | - * the final word is. */ | |
| 537 | - | |
| 538 | - i = baseword_to_index(word1); | |
| 539 | - if (i == -1) | |
| 540 | - return 0; | |
| 541 | - value = i; | |
| 542 | - | |
| 543 | - i = baseword_to_index(word2); | |
| 544 | - if (i == -1) | |
| 545 | - return 0; | |
| 546 | - value += i*MN_BASE; | |
| 547 | - | |
| 548 | - i = baseword_to_index(word3); | |
| 549 | - if (i == -1) | |
| 550 | - { | |
| 551 | - i = extword_to_index(word3); | |
| 552 | - if (i == -1) | |
| 553 | - return 0; | |
| 554 | - value += i*MN_BASE*MN_BASE; | |
| 555 | - blob_appendf(xBlob, "%06x", value); | |
| 556 | - } | |
| 557 | - else | |
| 558 | - { | |
| 559 | - value += i*MN_BASE*MN_BASE; | |
| 560 | - blob_appendf(xBlob, "%08x", value); | |
| 561 | - } | |
| 562 | - } | |
| 563 | - } | |
| 564 | - while (zString); | |
| 428 | + unsigned int buffer = 0; | |
| 429 | + int buflen = 0; | |
| 430 | + int first = 1; | |
| 431 | + int value; | |
| 432 | + | |
| 433 | + while (zString) | |
| 434 | + { | |
| 435 | + /* Consume a word, if necessary. */ | |
| 436 | + | |
| 437 | + while (buflen < 4) | |
| 438 | + { | |
| 439 | + /* Find the end of the current word. */ | |
| 440 | + | |
| 441 | + const char* end = strchr(zString, ' '); | |
| 442 | + if (!end) | |
| 443 | + end = strchr(zString, '-'); | |
| 444 | + if (!end) | |
| 445 | + end = strchr(zString, '_'); | |
| 446 | + | |
| 447 | + /* Parse. */ | |
| 448 | + | |
| 449 | + value = baseword_to_index(zString); | |
| 450 | + if (value == -1) /* Parse error? */ | |
| 451 | + return 0; | |
| 452 | + buffer <<= CHUNK_SIZE; | |
| 453 | + buffer |= value; | |
| 454 | + buflen += CHUNK_SIZE; | |
| 455 | + | |
| 456 | + /* Advance the pointer to the next word. */ | |
| 457 | + | |
| 458 | + zString = end; | |
| 459 | + if (zString) | |
| 460 | + zString++; | |
| 461 | + } | |
| 462 | + | |
| 463 | + /* Now consume nibbles and produce hex bytes. */ | |
| 464 | + | |
| 465 | + while (buflen >= 4) | |
| 466 | + { | |
| 467 | + value = (buffer >> (buflen-4)) & 0xf; | |
| 468 | + buflen -= 4; | |
| 469 | + | |
| 470 | + blob_appendf(xBlob, "%x", value); | |
| 471 | + } | |
| 472 | + } | |
| 565 | 473 | |
| 566 | 474 | return 1; |
| 567 | 475 | } |
| 568 | 476 | |
| 569 | 477 | /* |
| @@ -584,16 +492,15 @@ | ||
| 584 | 492 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 585 | 493 | if( zName==0 ) return; |
| 586 | 494 | |
| 587 | 495 | blob_zero(&x); |
| 588 | 496 | rc = mnemonic_encode(zName, &x); |
| 589 | - if ( rc==0 ){ | |
| 497 | + if ((rc==0) || (blob_size(&x) == 0)) | |
| 590 | 498 | sqlite3_result_error(context, "could not mnemonic encode value", -1); |
| 591 | - }else{ | |
| 499 | + else | |
| 592 | 500 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 593 | 501 | SQLITE_TRANSIENT); |
| 594 | - } | |
| 595 | 502 | blob_reset(&x); |
| 596 | 503 | } |
| 597 | 504 | |
| 598 | 505 | /* |
| 599 | 506 | ** Implementation of the "mnemonic_decode(X)" SQL function. The argument X |
| @@ -616,16 +523,15 @@ | ||
| 616 | 523 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 617 | 524 | if( zName==0 ) return; |
| 618 | 525 | |
| 619 | 526 | blob_zero(&x); |
| 620 | 527 | rc = mnemonic_decode(zName, &x); |
| 621 | - if ( rc==0 ){ | |
| 528 | + if ((rc==0) || (blob_size(&x)==0)) | |
| 622 | 529 | sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT); |
| 623 | - }else{ | |
| 530 | + else | |
| 624 | 531 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 625 | 532 | SQLITE_TRANSIENT); |
| 626 | - } | |
| 627 | 533 | blob_reset(&x); |
| 628 | 534 | } |
| 629 | 535 | |
| 630 | 536 | /* |
| 631 | 537 | ** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode(). |
| @@ -636,5 +542,8 @@ | ||
| 636 | 542 | sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0, |
| 637 | 543 | sqlcmd_mnemonic_encode, 0, 0); |
| 638 | 544 | sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0, |
| 639 | 545 | sqlcmd_mnemonic_decode, 0, 0); |
| 640 | 546 | } |
| 547 | + | |
| 548 | +/* vi: set ts=2:sw=2:expandtab: */ | |
| 549 | + | |
| 641 | 550 |
| --- src/mnemonic.c | |
| +++ src/mnemonic.c | |
| @@ -2,11 +2,10 @@ | |
| 2 | ** Copyright (c) 2008 D. Richard Hipp |
| 3 | ** |
| 4 | ** This program is free software; you can redistribute it and/or |
| 5 | ** modify it under the terms of the Simplified BSD License (also |
| 6 | ** known as the "2-Clause License" or "FreeBSD License".) |
| 7 | |
| 8 | ** This program is distributed in the hope that it will be useful, |
| 9 | ** but without any warranty; without even the implied warranty of |
| 10 | ** merchantability or fitness for a particular purpose. |
| 11 | ** |
| 12 | ** Author contact information: |
| @@ -14,23 +13,33 @@ | |
| 14 | ** http://www.hwaci.com/drh/ |
| 15 | ** |
| 16 | ******************************************************************************* |
| 17 | ** |
| 18 | ** This file contains the code to do mnemonic encoding of commit IDs. The |
| 19 | ** wordlist and algorithm is taken from: |
| 20 | ** |
| 21 | ** https://github.com/singpolyma/mnemonicode |
| 22 | ** |
| 23 | ** The original code is MIT licensed, but this code uses a reimplementation |
| 24 | ** from scratch anyway. |
| 25 | */ |
| 26 | |
| 27 | #include "config.h" |
| 28 | #include "mnemonic.h" |
| 29 | |
| 30 | #define MN_BASE 1626 |
| 31 | #define MN_REMAINDER 7 |
| 32 | |
| 33 | static const char* basewordlist[MN_BASE] = |
| 34 | { |
| 35 | "ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL", |
| 36 | "ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT", |
| @@ -303,37 +312,28 @@ | |
| 303 | "WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL", |
| 304 | "WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC", |
| 305 | "RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN" |
| 306 | }; |
| 307 | |
| 308 | static const char* extwordlist[MN_REMAINDER] = |
| 309 | { |
| 310 | "EGO", "FAX", "JET", "JOB", "RIO", "SKI", |
| 311 | "YES" |
| 312 | }; |
| 313 | |
| 314 | /* |
| 315 | ** String comparator which supports both '\0' and ' ' as a terminator |
| 316 | ** character. |
| 317 | */ |
| 318 | |
| 319 | static int strspacecmp(const char* s1, const char* s2) |
| 320 | { |
| 321 | int c1, c2; |
| 322 | |
| 323 | do |
| 324 | { |
| 325 | c1 = *s1++; |
| 326 | c2 = *s2++; |
| 327 | if ((c1 == '\0') || (c2 == ' ')) |
| 328 | { |
| 329 | if (c1 == ' ') |
| 330 | c1 = '\0'; |
| 331 | if (c2 == ' ') |
| 332 | c2 = '\0'; |
| 333 | break; |
| 334 | } |
| 335 | } |
| 336 | while (c1 == c2); |
| 337 | |
| 338 | return c1 - c2; |
| 339 | } |
| @@ -346,11 +346,11 @@ | |
| 346 | static int baseword_to_index(const char* word) |
| 347 | { |
| 348 | int i; |
| 349 | |
| 350 | for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++) |
| 351 | if (strspacecmp(basewordlist[i], word) == 0) |
| 352 | return i; |
| 353 | |
| 354 | return -1; |
| 355 | } |
| 356 | |
| @@ -361,104 +361,61 @@ | |
| 361 | static const char* index_to_baseword(int index) |
| 362 | { |
| 363 | return basewordlist[index]; |
| 364 | } |
| 365 | |
| 366 | /* |
| 367 | ** Look up a word in the extended dictionary and return its index; -1 if the |
| 368 | ** word is not in the dictionary. |
| 369 | */ |
| 370 | |
| 371 | static int extword_to_index(const char* word) |
| 372 | { |
| 373 | int i; |
| 374 | |
| 375 | for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++) |
| 376 | if (strspacecmp(extwordlist[i], word) == 0) |
| 377 | return i; |
| 378 | |
| 379 | return -1; |
| 380 | } |
| 381 | |
| 382 | /* |
| 383 | ** Look up a word by index in the extended dictionary and return the string. |
| 384 | */ |
| 385 | |
| 386 | static const char* index_to_extword(int index) |
| 387 | { |
| 388 | return extwordlist[index]; |
| 389 | } |
| 390 | |
| 391 | /* |
| 392 | ** Encode a hex-character string using mnemonic encoding, writing the |
| 393 | ** result into the supplied blob. |
| 394 | */ |
| 395 | |
| 396 | int mnemonic_encode(const char* zString, Blob* xBlob) |
| 397 | { |
| 398 | int stringlen = strlen(zString); |
| 399 | unsigned int accumulator; |
| 400 | int charsread; |
| 401 | int first = 1; |
| 402 | |
| 403 | while (stringlen > 0) |
| 404 | { |
| 405 | /* Read up to four bytes worth of digest into the accumulator. */ |
| 406 | |
| 407 | { |
| 408 | int i = sscanf(zString, "%8x%n", &accumulator, &charsread); |
| 409 | if (i == 0) |
| 410 | return 0; |
| 411 | if (i == EOF) |
| 412 | break; |
| 413 | if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */ |
| 414 | return 0; |
| 415 | |
| 416 | zString += charsread; |
| 417 | } |
| 418 | |
| 419 | /* Insert a leading space, if necessary. */ |
| 420 | |
| 421 | if (!first) |
| 422 | blob_appendf(xBlob, " "); |
| 423 | first = 0; |
| 424 | |
| 425 | /* Actually do the encoding. */ |
| 426 | |
| 427 | { |
| 428 | switch (charsread) |
| 429 | { |
| 430 | /* 24-bit encodings are special. */ |
| 431 | |
| 432 | case 6: |
| 433 | blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); |
| 434 | accumulator /= MN_BASE; |
| 435 | |
| 436 | blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); |
| 437 | accumulator /= MN_BASE; |
| 438 | |
| 439 | blob_appendf(xBlob, "%s", index_to_extword(accumulator)); |
| 440 | break; |
| 441 | |
| 442 | /* Otherwise, just follow the common pattern. */ |
| 443 | |
| 444 | case 8: |
| 445 | blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); |
| 446 | accumulator /= MN_BASE; |
| 447 | /* fall through */ |
| 448 | |
| 449 | case 4: |
| 450 | blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE)); |
| 451 | accumulator /= MN_BASE; |
| 452 | /* fall through */ |
| 453 | |
| 454 | case 2: |
| 455 | blob_appendf(xBlob, "%s", index_to_baseword(accumulator)); |
| 456 | break; |
| 457 | } |
| 458 | } |
| 459 | } |
| 460 | |
| 461 | return 1; |
| 462 | } |
| 463 | |
| 464 | /* |
| @@ -466,104 +423,55 @@ | |
| 466 | ** string into the supplied blob. |
| 467 | */ |
| 468 | |
| 469 | int mnemonic_decode(const char* zString, Blob* xBlob) |
| 470 | { |
| 471 | const char* word1; |
| 472 | const char* word2; |
| 473 | const char* word3; |
| 474 | const char* p; |
| 475 | int i; |
| 476 | unsigned int value; |
| 477 | |
| 478 | do |
| 479 | { |
| 480 | word1 = word2 = word3 = NULL; |
| 481 | |
| 482 | /* Read up to three words from the string. */ |
| 483 | |
| 484 | word1 = zString; |
| 485 | p = strchr(zString, ' '); |
| 486 | if (!p) |
| 487 | zString = NULL; |
| 488 | else |
| 489 | { |
| 490 | zString = p+1; |
| 491 | word2 = zString; |
| 492 | p = strchr(zString, ' '); |
| 493 | if (!p) |
| 494 | zString = NULL; |
| 495 | else |
| 496 | { |
| 497 | zString = p+1; |
| 498 | word3 = zString; |
| 499 | p = strchr(zString, ' '); |
| 500 | if (!p) |
| 501 | zString = NULL; |
| 502 | else |
| 503 | zString = p+1; |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | if (word1 && !word2 && !word3) |
| 508 | { |
| 509 | /* One word means one byte. */ |
| 510 | |
| 511 | i = baseword_to_index(word1); |
| 512 | if (i == -1) |
| 513 | return 0; |
| 514 | |
| 515 | blob_appendf(xBlob, "%02x", i); |
| 516 | } |
| 517 | else if (word1 && word2 && !word3) |
| 518 | { |
| 519 | /* Two words means two bytes. */ |
| 520 | |
| 521 | i = baseword_to_index(word1); |
| 522 | if (i == -1) |
| 523 | return 0; |
| 524 | value = i; |
| 525 | |
| 526 | i = baseword_to_index(word2); |
| 527 | if (i == -1) |
| 528 | return 0; |
| 529 | value += i*MN_BASE; |
| 530 | |
| 531 | blob_appendf(xBlob, "%04x", value); |
| 532 | } |
| 533 | else |
| 534 | { |
| 535 | /* Three words means either three or four bytes, depending on what |
| 536 | * the final word is. */ |
| 537 | |
| 538 | i = baseword_to_index(word1); |
| 539 | if (i == -1) |
| 540 | return 0; |
| 541 | value = i; |
| 542 | |
| 543 | i = baseword_to_index(word2); |
| 544 | if (i == -1) |
| 545 | return 0; |
| 546 | value += i*MN_BASE; |
| 547 | |
| 548 | i = baseword_to_index(word3); |
| 549 | if (i == -1) |
| 550 | { |
| 551 | i = extword_to_index(word3); |
| 552 | if (i == -1) |
| 553 | return 0; |
| 554 | value += i*MN_BASE*MN_BASE; |
| 555 | blob_appendf(xBlob, "%06x", value); |
| 556 | } |
| 557 | else |
| 558 | { |
| 559 | value += i*MN_BASE*MN_BASE; |
| 560 | blob_appendf(xBlob, "%08x", value); |
| 561 | } |
| 562 | } |
| 563 | } |
| 564 | while (zString); |
| 565 | |
| 566 | return 1; |
| 567 | } |
| 568 | |
| 569 | /* |
| @@ -584,16 +492,15 @@ | |
| 584 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 585 | if( zName==0 ) return; |
| 586 | |
| 587 | blob_zero(&x); |
| 588 | rc = mnemonic_encode(zName, &x); |
| 589 | if ( rc==0 ){ |
| 590 | sqlite3_result_error(context, "could not mnemonic encode value", -1); |
| 591 | }else{ |
| 592 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 593 | SQLITE_TRANSIENT); |
| 594 | } |
| 595 | blob_reset(&x); |
| 596 | } |
| 597 | |
| 598 | /* |
| 599 | ** Implementation of the "mnemonic_decode(X)" SQL function. The argument X |
| @@ -616,16 +523,15 @@ | |
| 616 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 617 | if( zName==0 ) return; |
| 618 | |
| 619 | blob_zero(&x); |
| 620 | rc = mnemonic_decode(zName, &x); |
| 621 | if ( rc==0 ){ |
| 622 | sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT); |
| 623 | }else{ |
| 624 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 625 | SQLITE_TRANSIENT); |
| 626 | } |
| 627 | blob_reset(&x); |
| 628 | } |
| 629 | |
| 630 | /* |
| 631 | ** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode(). |
| @@ -636,5 +542,8 @@ | |
| 636 | sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0, |
| 637 | sqlcmd_mnemonic_encode, 0, 0); |
| 638 | sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0, |
| 639 | sqlcmd_mnemonic_decode, 0, 0); |
| 640 | } |
| 641 |
| --- src/mnemonic.c | |
| +++ src/mnemonic.c | |
| @@ -2,11 +2,10 @@ | |
| 2 | ** Copyright (c) 2008 D. Richard Hipp |
| 3 | ** |
| 4 | ** This program is free software; you can redistribute it and/or |
| 5 | ** modify it under the terms of the Simplified BSD License (also |
| 6 | ** known as the "2-Clause License" or "FreeBSD License".) |
| 7 | ** This program is distributed in the hope that it will be useful, |
| 8 | ** but without any warranty; without even the implied warranty of |
| 9 | ** merchantability or fitness for a particular purpose. |
| 10 | ** |
| 11 | ** Author contact information: |
| @@ -14,23 +13,33 @@ | |
| 13 | ** http://www.hwaci.com/drh/ |
| 14 | ** |
| 15 | ******************************************************************************* |
| 16 | ** |
| 17 | ** This file contains the code to do mnemonic encoding of commit IDs. The |
| 18 | ** wordlist and algorithm were originally taken from: |
| 19 | ** |
| 20 | ** https://github.com/singpolyma/mnemonicode |
| 21 | ** |
| 22 | ** ...which is BSD licensed. However, the algorithm has since been heavily |
| 23 | ** modified to more fit Fossil's special needs. |
| 24 | ** |
| 25 | ** The new algorithm works by reading hex characters one at a time and |
| 26 | ** accumulating bits into a ring buffer. When available, 10 bits are consumed |
| 27 | ** and used as a word index. This only uses 1024 of the available 1626 words |
| 28 | ** but preserves the property that encoded strings may be truncated at any |
| 29 | ** point and the resulting hex string, when decoded, is a truncated form of |
| 30 | ** the original hex string. It is, unfortunately, slightly less efficient, and |
| 31 | ** three words can only encode seven hex digits. |
| 32 | */ |
| 33 | |
| 34 | #include "config.h" |
| 35 | #include "mnemonic.h" |
| 36 | |
| 37 | #define MN_BASE 1626 |
| 38 | |
| 39 | /* Number of bits to encode as a word. 10 is the maximum. */ |
| 40 | #define CHUNK_SIZE 10 |
| 41 | |
| 42 | static const char* basewordlist[MN_BASE] = |
| 43 | { |
| 44 | "ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL", |
| 45 | "ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT", |
| @@ -303,37 +312,28 @@ | |
| 312 | "WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL", |
| 313 | "WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC", |
| 314 | "RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN" |
| 315 | }; |
| 316 | |
| 317 | /* |
| 318 | ** Special string comparator. |
| 319 | */ |
| 320 | |
| 321 | static int strspecialcmp(const char* s1, const char* s2) |
| 322 | { |
| 323 | int c1, c2; |
| 324 | |
| 325 | do |
| 326 | { |
| 327 | c1 = toupper(*s1++); |
| 328 | c2 = toupper(*s2++); |
| 329 | if ((c1 == ' ') || (c1 == '-') || (c1 == '_')) |
| 330 | c1 = 0; |
| 331 | if ((c2 == ' ') || (c2 == '-') || (c2 == '_')) |
| 332 | c2 = 0; |
| 333 | if (!c1 || !c2) |
| 334 | break; |
| 335 | } |
| 336 | while (c1 == c2); |
| 337 | |
| 338 | return c1 - c2; |
| 339 | } |
| @@ -346,11 +346,11 @@ | |
| 346 | static int baseword_to_index(const char* word) |
| 347 | { |
| 348 | int i; |
| 349 | |
| 350 | for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++) |
| 351 | if (strspecialcmp(basewordlist[i], word) == 0) |
| 352 | return i; |
| 353 | |
| 354 | return -1; |
| 355 | } |
| 356 | |
| @@ -361,104 +361,61 @@ | |
| 361 | static const char* index_to_baseword(int index) |
| 362 | { |
| 363 | return basewordlist[index]; |
| 364 | } |
| 365 | |
| 366 | /* |
| 367 | ** Encode a hex-character string using mnemonic encoding, writing the |
| 368 | ** result into the supplied blob. |
| 369 | */ |
| 370 | |
| 371 | int mnemonic_encode(const char* zString, Blob* xBlob) |
| 372 | { |
| 373 | unsigned int buffer = 0; |
| 374 | int buflen = 0; |
| 375 | int first = 1; |
| 376 | int value; |
| 377 | |
| 378 | for (;;) |
| 379 | { |
| 380 | /* Read hex characters into the buffer until we have at least CHUNK_SIZE |
| 381 | * bits. */ |
| 382 | |
| 383 | while (buflen < CHUNK_SIZE) |
| 384 | { |
| 385 | int i = sscanf(zString, "%1x", &value); |
| 386 | if (i == 0) /* Parse error? */ |
| 387 | return 0; |
| 388 | if (i == EOF) /* End of line */ |
| 389 | break; |
| 390 | buffer <<= 4; |
| 391 | buffer |= value; |
| 392 | buflen += 4; |
| 393 | zString++; |
| 394 | } |
| 395 | |
| 396 | if (buflen < CHUNK_SIZE) |
| 397 | { |
| 398 | /* Run out of available characters --- give up. */ |
| 399 | break; |
| 400 | } |
| 401 | |
| 402 | /* Extract ten bits of data. */ |
| 403 | |
| 404 | value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1); |
| 405 | buflen -= CHUNK_SIZE; |
| 406 | |
| 407 | /* Insert a leading space, if necessary. */ |
| 408 | |
| 409 | if (!first) |
| 410 | blob_appendf(xBlob, " "); |
| 411 | first = 0; |
| 412 | |
| 413 | /* Actually encode a word. */ |
| 414 | |
| 415 | blob_appendf(xBlob, "%s", index_to_baseword(value)); |
| 416 | } |
| 417 | |
| 418 | return 1; |
| 419 | } |
| 420 | |
| 421 | /* |
| @@ -466,104 +423,55 @@ | |
| 423 | ** string into the supplied blob. |
| 424 | */ |
| 425 | |
| 426 | int mnemonic_decode(const char* zString, Blob* xBlob) |
| 427 | { |
| 428 | unsigned int buffer = 0; |
| 429 | int buflen = 0; |
| 430 | int first = 1; |
| 431 | int value; |
| 432 | |
| 433 | while (zString) |
| 434 | { |
| 435 | /* Consume a word, if necessary. */ |
| 436 | |
| 437 | while (buflen < 4) |
| 438 | { |
| 439 | /* Find the end of the current word. */ |
| 440 | |
| 441 | const char* end = strchr(zString, ' '); |
| 442 | if (!end) |
| 443 | end = strchr(zString, '-'); |
| 444 | if (!end) |
| 445 | end = strchr(zString, '_'); |
| 446 | |
| 447 | /* Parse. */ |
| 448 | |
| 449 | value = baseword_to_index(zString); |
| 450 | if (value == -1) /* Parse error? */ |
| 451 | return 0; |
| 452 | buffer <<= CHUNK_SIZE; |
| 453 | buffer |= value; |
| 454 | buflen += CHUNK_SIZE; |
| 455 | |
| 456 | /* Advance the pointer to the next word. */ |
| 457 | |
| 458 | zString = end; |
| 459 | if (zString) |
| 460 | zString++; |
| 461 | } |
| 462 | |
| 463 | /* Now consume nibbles and produce hex bytes. */ |
| 464 | |
| 465 | while (buflen >= 4) |
| 466 | { |
| 467 | value = (buffer >> (buflen-4)) & 0xf; |
| 468 | buflen -= 4; |
| 469 | |
| 470 | blob_appendf(xBlob, "%x", value); |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | return 1; |
| 475 | } |
| 476 | |
| 477 | /* |
| @@ -584,16 +492,15 @@ | |
| 492 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 493 | if( zName==0 ) return; |
| 494 | |
| 495 | blob_zero(&x); |
| 496 | rc = mnemonic_encode(zName, &x); |
| 497 | if ((rc==0) || (blob_size(&x) == 0)) |
| 498 | sqlite3_result_error(context, "could not mnemonic encode value", -1); |
| 499 | else |
| 500 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 501 | SQLITE_TRANSIENT); |
| 502 | blob_reset(&x); |
| 503 | } |
| 504 | |
| 505 | /* |
| 506 | ** Implementation of the "mnemonic_decode(X)" SQL function. The argument X |
| @@ -616,16 +523,15 @@ | |
| 523 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 524 | if( zName==0 ) return; |
| 525 | |
| 526 | blob_zero(&x); |
| 527 | rc = mnemonic_decode(zName, &x); |
| 528 | if ((rc==0) || (blob_size(&x)==0)) |
| 529 | sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT); |
| 530 | else |
| 531 | sqlite3_result_text(context, blob_buffer(&x), blob_size(&x), |
| 532 | SQLITE_TRANSIENT); |
| 533 | blob_reset(&x); |
| 534 | } |
| 535 | |
| 536 | /* |
| 537 | ** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode(). |
| @@ -636,5 +542,8 @@ | |
| 542 | sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0, |
| 543 | sqlcmd_mnemonic_encode, 0, 0); |
| 544 | sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0, |
| 545 | sqlcmd_mnemonic_decode, 0, 0); |
| 546 | } |
| 547 | |
| 548 | /* vi: set ts=2:sw=2:expandtab: */ |
| 549 | |
| 550 |