Fossil SCM

Rework the algorithm to consume a certain number of bits from the string, rather than using modulus arithmetic --- this is much friendlier on partial hashes, at the expense of being a bit less efficient.

dg 2013-09-07 16:09 dg-codenames
Commit 0160d40b5d0c345b2efb27e5f6e1a0e4716efb88
1 file changed +113 -204
+113 -204
--- src/mnemonic.c
+++ src/mnemonic.c
@@ -2,11 +2,10 @@
22
** Copyright (c) 2008 D. Richard Hipp
33
**
44
** This program is free software; you can redistribute it and/or
55
** modify it under the terms of the Simplified BSD License (also
66
** known as the "2-Clause License" or "FreeBSD License".)
7
-
87
** This program is distributed in the hope that it will be useful,
98
** but without any warranty; without even the implied warranty of
109
** merchantability or fitness for a particular purpose.
1110
**
1211
** Author contact information:
@@ -14,23 +13,33 @@
1413
** http://www.hwaci.com/drh/
1514
**
1615
*******************************************************************************
1716
**
1817
** This file contains the code to do mnemonic encoding of commit IDs. The
19
-** wordlist and algorithm is taken from:
18
+** wordlist and algorithm were originally taken from:
2019
**
2120
** https://github.com/singpolyma/mnemonicode
2221
**
23
-** The original code is MIT licensed, but this code uses a reimplementation
24
-** from scratch anyway.
22
+** ...which is BSD licensed. However, the algorithm has since been heavily
23
+** modified to more fit Fossil's special needs.
24
+**
25
+** The new algorithm works by reading hex characters one at a time and
26
+** accumulating bits into a ring buffer. When available, 10 bits are consumed
27
+** and used as a word index. This only uses 1024 of the available 1626 words
28
+** but preserves the property that encoded strings may be truncated at any
29
+** point and the resulting hex string, when decoded, is a truncated form of
30
+** the original hex string. It is, unfortunately, slightly less efficient, and
31
+** three words can only encode seven hex digits.
2532
*/
2633
2734
#include "config.h"
2835
#include "mnemonic.h"
2936
3037
#define MN_BASE 1626
31
-#define MN_REMAINDER 7
38
+
39
+/* Number of bits to encode as a word. 10 is the maximum. */
40
+#define CHUNK_SIZE 10
3241
3342
static const char* basewordlist[MN_BASE] =
3443
{
3544
"ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
3645
"ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
@@ -303,37 +312,28 @@
303312
"WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
304313
"WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
305314
"RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
306315
};
307316
308
-static const char* extwordlist[MN_REMAINDER] =
309
-{
310
- "EGO", "FAX", "JET", "JOB", "RIO", "SKI",
311
- "YES"
312
-};
313
-
314317
/*
315
-** String comparator which supports both '\0' and ' ' as a terminator
316
-** character.
318
+** Special string comparator.
317319
*/
318320
319
-static int strspacecmp(const char* s1, const char* s2)
321
+static int strspecialcmp(const char* s1, const char* s2)
320322
{
321323
int c1, c2;
322324
323325
do
324326
{
325
- c1 = *s1++;
326
- c2 = *s2++;
327
- if ((c1 == '\0') || (c2 == ' '))
328
- {
329
- if (c1 == ' ')
330
- c1 = '\0';
331
- if (c2 == ' ')
332
- c2 = '\0';
333
- break;
334
- }
327
+ c1 = toupper(*s1++);
328
+ c2 = toupper(*s2++);
329
+ if ((c1 == ' ') || (c1 == '-') || (c1 == '_'))
330
+ c1 = 0;
331
+ if ((c2 == ' ') || (c2 == '-') || (c2 == '_'))
332
+ c2 = 0;
333
+ if (!c1 || !c2)
334
+ break;
335335
}
336336
while (c1 == c2);
337337
338338
return c1 - c2;
339339
}
@@ -346,11 +346,11 @@
346346
static int baseword_to_index(const char* word)
347347
{
348348
int i;
349349
350350
for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351
- if (strspacecmp(basewordlist[i], word) == 0)
351
+ if (strspecialcmp(basewordlist[i], word) == 0)
352352
return i;
353353
354354
return -1;
355355
}
356356
@@ -361,104 +361,61 @@
361361
static const char* index_to_baseword(int index)
362362
{
363363
return basewordlist[index];
364364
}
365365
366
-/*
367
-** Look up a word in the extended dictionary and return its index; -1 if the
368
-** word is not in the dictionary.
369
-*/
370
-
371
-static int extword_to_index(const char* word)
372
-{
373
- int i;
374
-
375
- for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++)
376
- if (strspacecmp(extwordlist[i], word) == 0)
377
- return i;
378
-
379
- return -1;
380
-}
381
-
382
-/*
383
-** Look up a word by index in the extended dictionary and return the string.
384
-*/
385
-
386
-static const char* index_to_extword(int index)
387
-{
388
- return extwordlist[index];
389
-}
390
-
391366
/*
392367
** Encode a hex-character string using mnemonic encoding, writing the
393368
** result into the supplied blob.
394369
*/
395370
396371
int mnemonic_encode(const char* zString, Blob* xBlob)
397372
{
398
- int stringlen = strlen(zString);
399
- unsigned int accumulator;
400
- int charsread;
373
+ unsigned int buffer = 0;
374
+ int buflen = 0;
401375
int first = 1;
402
-
403
- while (stringlen > 0)
404
- {
405
- /* Read up to four bytes worth of digest into the accumulator. */
406
-
407
- {
408
- int i = sscanf(zString, "%8x%n", &accumulator, &charsread);
409
- if (i == 0)
410
- return 0;
411
- if (i == EOF)
412
- break;
413
- if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */
414
- return 0;
415
-
416
- zString += charsread;
417
- }
376
+ int value;
377
+
378
+ for (;;)
379
+ {
380
+ /* Read hex characters into the buffer until we have at least CHUNK_SIZE
381
+ * bits. */
382
+
383
+ while (buflen < CHUNK_SIZE)
384
+ {
385
+ int i = sscanf(zString, "%1x", &value);
386
+ if (i == 0) /* Parse error? */
387
+ return 0;
388
+ if (i == EOF) /* End of line */
389
+ break;
390
+ buffer <<= 4;
391
+ buffer |= value;
392
+ buflen += 4;
393
+ zString++;
394
+ }
395
+
396
+ if (buflen < CHUNK_SIZE)
397
+ {
398
+ /* Run out of available characters --- give up. */
399
+ break;
400
+ }
401
+
402
+ /* Extract ten bits of data. */
403
+
404
+ value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1);
405
+ buflen -= CHUNK_SIZE;
418406
419407
/* Insert a leading space, if necessary. */
420408
421409
if (!first)
422410
blob_appendf(xBlob, " ");
423411
first = 0;
424412
425
- /* Actually do the encoding. */
426
-
427
- {
428
- switch (charsread)
429
- {
430
- /* 24-bit encodings are special. */
431
-
432
- case 6:
433
- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
434
- accumulator /= MN_BASE;
435
-
436
- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
437
- accumulator /= MN_BASE;
438
-
439
- blob_appendf(xBlob, "%s", index_to_extword(accumulator));
440
- break;
441
-
442
- /* Otherwise, just follow the common pattern. */
443
-
444
- case 8:
445
- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
446
- accumulator /= MN_BASE;
447
- /* fall through */
448
-
449
- case 4:
450
- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
451
- accumulator /= MN_BASE;
452
- /* fall through */
453
-
454
- case 2:
455
- blob_appendf(xBlob, "%s", index_to_baseword(accumulator));
456
- break;
457
- }
458
- }
459
- }
413
+ /* Actually encode a word. */
414
+
415
+ blob_appendf(xBlob, "%s", index_to_baseword(value));
416
+ }
460417
461418
return 1;
462419
}
463420
464421
/*
@@ -466,104 +423,55 @@
466423
** string into the supplied blob.
467424
*/
468425
469426
int mnemonic_decode(const char* zString, Blob* xBlob)
470427
{
471
- const char* word1;
472
- const char* word2;
473
- const char* word3;
474
- const char* p;
475
- int i;
476
- unsigned int value;
477
-
478
- do
479
- {
480
- word1 = word2 = word3 = NULL;
481
-
482
- /* Read up to three words from the string. */
483
-
484
- word1 = zString;
485
- p = strchr(zString, ' ');
486
- if (!p)
487
- zString = NULL;
488
- else
489
- {
490
- zString = p+1;
491
- word2 = zString;
492
- p = strchr(zString, ' ');
493
- if (!p)
494
- zString = NULL;
495
- else
496
- {
497
- zString = p+1;
498
- word3 = zString;
499
- p = strchr(zString, ' ');
500
- if (!p)
501
- zString = NULL;
502
- else
503
- zString = p+1;
504
- }
505
- }
506
-
507
- if (word1 && !word2 && !word3)
508
- {
509
- /* One word means one byte. */
510
-
511
- i = baseword_to_index(word1);
512
- if (i == -1)
513
- return 0;
514
-
515
- blob_appendf(xBlob, "%02x", i);
516
- }
517
- else if (word1 && word2 && !word3)
518
- {
519
- /* Two words means two bytes. */
520
-
521
- i = baseword_to_index(word1);
522
- if (i == -1)
523
- return 0;
524
- value = i;
525
-
526
- i = baseword_to_index(word2);
527
- if (i == -1)
528
- return 0;
529
- value += i*MN_BASE;
530
-
531
- blob_appendf(xBlob, "%04x", value);
532
- }
533
- else
534
- {
535
- /* Three words means either three or four bytes, depending on what
536
- * the final word is. */
537
-
538
- i = baseword_to_index(word1);
539
- if (i == -1)
540
- return 0;
541
- value = i;
542
-
543
- i = baseword_to_index(word2);
544
- if (i == -1)
545
- return 0;
546
- value += i*MN_BASE;
547
-
548
- i = baseword_to_index(word3);
549
- if (i == -1)
550
- {
551
- i = extword_to_index(word3);
552
- if (i == -1)
553
- return 0;
554
- value += i*MN_BASE*MN_BASE;
555
- blob_appendf(xBlob, "%06x", value);
556
- }
557
- else
558
- {
559
- value += i*MN_BASE*MN_BASE;
560
- blob_appendf(xBlob, "%08x", value);
561
- }
562
- }
563
- }
564
- while (zString);
428
+ unsigned int buffer = 0;
429
+ int buflen = 0;
430
+ int first = 1;
431
+ int value;
432
+
433
+ while (zString)
434
+ {
435
+ /* Consume a word, if necessary. */
436
+
437
+ while (buflen < 4)
438
+ {
439
+ /* Find the end of the current word. */
440
+
441
+ const char* end = strchr(zString, ' ');
442
+ if (!end)
443
+ end = strchr(zString, '-');
444
+ if (!end)
445
+ end = strchr(zString, '_');
446
+
447
+ /* Parse. */
448
+
449
+ value = baseword_to_index(zString);
450
+ if (value == -1) /* Parse error? */
451
+ return 0;
452
+ buffer <<= CHUNK_SIZE;
453
+ buffer |= value;
454
+ buflen += CHUNK_SIZE;
455
+
456
+ /* Advance the pointer to the next word. */
457
+
458
+ zString = end;
459
+ if (zString)
460
+ zString++;
461
+ }
462
+
463
+ /* Now consume nibbles and produce hex bytes. */
464
+
465
+ while (buflen >= 4)
466
+ {
467
+ value = (buffer >> (buflen-4)) & 0xf;
468
+ buflen -= 4;
469
+
470
+ blob_appendf(xBlob, "%x", value);
471
+ }
472
+ }
565473
566474
return 1;
567475
}
568476
569477
/*
@@ -584,16 +492,15 @@
584492
zName = (const char*)sqlite3_value_text(argv[0]);
585493
if( zName==0 ) return;
586494
587495
blob_zero(&x);
588496
rc = mnemonic_encode(zName, &x);
589
- if ( rc==0 ){
497
+ if ((rc==0) || (blob_size(&x) == 0))
590498
sqlite3_result_error(context, "could not mnemonic encode value", -1);
591
- }else{
499
+ else
592500
sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
593501
SQLITE_TRANSIENT);
594
- }
595502
blob_reset(&x);
596503
}
597504
598505
/*
599506
** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
@@ -616,16 +523,15 @@
616523
zName = (const char*)sqlite3_value_text(argv[0]);
617524
if( zName==0 ) return;
618525
619526
blob_zero(&x);
620527
rc = mnemonic_decode(zName, &x);
621
- if ( rc==0 ){
528
+ if ((rc==0) || (blob_size(&x)==0))
622529
sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
623
- }else{
530
+ else
624531
sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
625532
SQLITE_TRANSIENT);
626
- }
627533
blob_reset(&x);
628534
}
629535
630536
/*
631537
** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
@@ -636,5 +542,8 @@
636542
sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
637543
sqlcmd_mnemonic_encode, 0, 0);
638544
sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
639545
sqlcmd_mnemonic_decode, 0, 0);
640546
}
547
+
548
+/* vi: set ts=2:sw=2:expandtab: */
549
+
641550
--- src/mnemonic.c
+++ src/mnemonic.c
@@ -2,11 +2,10 @@
2 ** Copyright (c) 2008 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
@@ -14,23 +13,33 @@
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains the code to do mnemonic encoding of commit IDs. The
19 ** wordlist and algorithm is taken from:
20 **
21 ** https://github.com/singpolyma/mnemonicode
22 **
23 ** The original code is MIT licensed, but this code uses a reimplementation
24 ** from scratch anyway.
 
 
 
 
 
 
 
 
25 */
26
27 #include "config.h"
28 #include "mnemonic.h"
29
30 #define MN_BASE 1626
31 #define MN_REMAINDER 7
 
 
32
33 static const char* basewordlist[MN_BASE] =
34 {
35 "ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
36 "ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
@@ -303,37 +312,28 @@
303 "WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
304 "WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
305 "RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
306 };
307
308 static const char* extwordlist[MN_REMAINDER] =
309 {
310 "EGO", "FAX", "JET", "JOB", "RIO", "SKI",
311 "YES"
312 };
313
314 /*
315 ** String comparator which supports both '\0' and ' ' as a terminator
316 ** character.
317 */
318
319 static int strspacecmp(const char* s1, const char* s2)
320 {
321 int c1, c2;
322
323 do
324 {
325 c1 = *s1++;
326 c2 = *s2++;
327 if ((c1 == '\0') || (c2 == ' '))
328 {
329 if (c1 == ' ')
330 c1 = '\0';
331 if (c2 == ' ')
332 c2 = '\0';
333 break;
334 }
335 }
336 while (c1 == c2);
337
338 return c1 - c2;
339 }
@@ -346,11 +346,11 @@
346 static int baseword_to_index(const char* word)
347 {
348 int i;
349
350 for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351 if (strspacecmp(basewordlist[i], word) == 0)
352 return i;
353
354 return -1;
355 }
356
@@ -361,104 +361,61 @@
361 static const char* index_to_baseword(int index)
362 {
363 return basewordlist[index];
364 }
365
366 /*
367 ** Look up a word in the extended dictionary and return its index; -1 if the
368 ** word is not in the dictionary.
369 */
370
371 static int extword_to_index(const char* word)
372 {
373 int i;
374
375 for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++)
376 if (strspacecmp(extwordlist[i], word) == 0)
377 return i;
378
379 return -1;
380 }
381
382 /*
383 ** Look up a word by index in the extended dictionary and return the string.
384 */
385
386 static const char* index_to_extword(int index)
387 {
388 return extwordlist[index];
389 }
390
391 /*
392 ** Encode a hex-character string using mnemonic encoding, writing the
393 ** result into the supplied blob.
394 */
395
396 int mnemonic_encode(const char* zString, Blob* xBlob)
397 {
398 int stringlen = strlen(zString);
399 unsigned int accumulator;
400 int charsread;
401 int first = 1;
402
403 while (stringlen > 0)
404 {
405 /* Read up to four bytes worth of digest into the accumulator. */
406
407 {
408 int i = sscanf(zString, "%8x%n", &accumulator, &charsread);
409 if (i == 0)
410 return 0;
411 if (i == EOF)
412 break;
413 if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */
414 return 0;
415
416 zString += charsread;
417 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
419 /* Insert a leading space, if necessary. */
420
421 if (!first)
422 blob_appendf(xBlob, " ");
423 first = 0;
424
425 /* Actually do the encoding. */
426
427 {
428 switch (charsread)
429 {
430 /* 24-bit encodings are special. */
431
432 case 6:
433 blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
434 accumulator /= MN_BASE;
435
436 blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
437 accumulator /= MN_BASE;
438
439 blob_appendf(xBlob, "%s", index_to_extword(accumulator));
440 break;
441
442 /* Otherwise, just follow the common pattern. */
443
444 case 8:
445 blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
446 accumulator /= MN_BASE;
447 /* fall through */
448
449 case 4:
450 blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
451 accumulator /= MN_BASE;
452 /* fall through */
453
454 case 2:
455 blob_appendf(xBlob, "%s", index_to_baseword(accumulator));
456 break;
457 }
458 }
459 }
460
461 return 1;
462 }
463
464 /*
@@ -466,104 +423,55 @@
466 ** string into the supplied blob.
467 */
468
469 int mnemonic_decode(const char* zString, Blob* xBlob)
470 {
471 const char* word1;
472 const char* word2;
473 const char* word3;
474 const char* p;
475 int i;
476 unsigned int value;
477
478 do
479 {
480 word1 = word2 = word3 = NULL;
481
482 /* Read up to three words from the string. */
483
484 word1 = zString;
485 p = strchr(zString, ' ');
486 if (!p)
487 zString = NULL;
488 else
489 {
490 zString = p+1;
491 word2 = zString;
492 p = strchr(zString, ' ');
493 if (!p)
494 zString = NULL;
495 else
496 {
497 zString = p+1;
498 word3 = zString;
499 p = strchr(zString, ' ');
500 if (!p)
501 zString = NULL;
502 else
503 zString = p+1;
504 }
505 }
506
507 if (word1 && !word2 && !word3)
508 {
509 /* One word means one byte. */
510
511 i = baseword_to_index(word1);
512 if (i == -1)
513 return 0;
514
515 blob_appendf(xBlob, "%02x", i);
516 }
517 else if (word1 && word2 && !word3)
518 {
519 /* Two words means two bytes. */
520
521 i = baseword_to_index(word1);
522 if (i == -1)
523 return 0;
524 value = i;
525
526 i = baseword_to_index(word2);
527 if (i == -1)
528 return 0;
529 value += i*MN_BASE;
530
531 blob_appendf(xBlob, "%04x", value);
532 }
533 else
534 {
535 /* Three words means either three or four bytes, depending on what
536 * the final word is. */
537
538 i = baseword_to_index(word1);
539 if (i == -1)
540 return 0;
541 value = i;
542
543 i = baseword_to_index(word2);
544 if (i == -1)
545 return 0;
546 value += i*MN_BASE;
547
548 i = baseword_to_index(word3);
549 if (i == -1)
550 {
551 i = extword_to_index(word3);
552 if (i == -1)
553 return 0;
554 value += i*MN_BASE*MN_BASE;
555 blob_appendf(xBlob, "%06x", value);
556 }
557 else
558 {
559 value += i*MN_BASE*MN_BASE;
560 blob_appendf(xBlob, "%08x", value);
561 }
562 }
563 }
564 while (zString);
565
566 return 1;
567 }
568
569 /*
@@ -584,16 +492,15 @@
584 zName = (const char*)sqlite3_value_text(argv[0]);
585 if( zName==0 ) return;
586
587 blob_zero(&x);
588 rc = mnemonic_encode(zName, &x);
589 if ( rc==0 ){
590 sqlite3_result_error(context, "could not mnemonic encode value", -1);
591 }else{
592 sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
593 SQLITE_TRANSIENT);
594 }
595 blob_reset(&x);
596 }
597
598 /*
599 ** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
@@ -616,16 +523,15 @@
616 zName = (const char*)sqlite3_value_text(argv[0]);
617 if( zName==0 ) return;
618
619 blob_zero(&x);
620 rc = mnemonic_decode(zName, &x);
621 if ( rc==0 ){
622 sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
623 }else{
624 sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
625 SQLITE_TRANSIENT);
626 }
627 blob_reset(&x);
628 }
629
630 /*
631 ** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
@@ -636,5 +542,8 @@
636 sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
637 sqlcmd_mnemonic_encode, 0, 0);
638 sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
639 sqlcmd_mnemonic_decode, 0, 0);
640 }
 
 
 
641
--- src/mnemonic.c
+++ src/mnemonic.c
@@ -2,11 +2,10 @@
2 ** Copyright (c) 2008 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
 
7 ** This program is distributed in the hope that it will be useful,
8 ** but without any warranty; without even the implied warranty of
9 ** merchantability or fitness for a particular purpose.
10 **
11 ** Author contact information:
@@ -14,23 +13,33 @@
13 ** http://www.hwaci.com/drh/
14 **
15 *******************************************************************************
16 **
17 ** This file contains the code to do mnemonic encoding of commit IDs. The
18 ** wordlist and algorithm were originally taken from:
19 **
20 ** https://github.com/singpolyma/mnemonicode
21 **
22 ** ...which is BSD licensed. However, the algorithm has since been heavily
23 ** modified to more fit Fossil's special needs.
24 **
25 ** The new algorithm works by reading hex characters one at a time and
26 ** accumulating bits into a ring buffer. When available, 10 bits are consumed
27 ** and used as a word index. This only uses 1024 of the available 1626 words
28 ** but preserves the property that encoded strings may be truncated at any
29 ** point and the resulting hex string, when decoded, is a truncated form of
30 ** the original hex string. It is, unfortunately, slightly less efficient, and
31 ** three words can only encode seven hex digits.
32 */
33
34 #include "config.h"
35 #include "mnemonic.h"
36
37 #define MN_BASE 1626
38
39 /* Number of bits to encode as a word. 10 is the maximum. */
40 #define CHUNK_SIZE 10
41
42 static const char* basewordlist[MN_BASE] =
43 {
44 "ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
45 "ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
@@ -303,37 +312,28 @@
312 "WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
313 "WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
314 "RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
315 };
316
 
 
 
 
 
 
317 /*
318 ** Special string comparator.
 
319 */
320
321 static int strspecialcmp(const char* s1, const char* s2)
322 {
323 int c1, c2;
324
325 do
326 {
327 c1 = toupper(*s1++);
328 c2 = toupper(*s2++);
329 if ((c1 == ' ') || (c1 == '-') || (c1 == '_'))
330 c1 = 0;
331 if ((c2 == ' ') || (c2 == '-') || (c2 == '_'))
332 c2 = 0;
333 if (!c1 || !c2)
334 break;
 
 
335 }
336 while (c1 == c2);
337
338 return c1 - c2;
339 }
@@ -346,11 +346,11 @@
346 static int baseword_to_index(const char* word)
347 {
348 int i;
349
350 for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351 if (strspecialcmp(basewordlist[i], word) == 0)
352 return i;
353
354 return -1;
355 }
356
@@ -361,104 +361,61 @@
361 static const char* index_to_baseword(int index)
362 {
363 return basewordlist[index];
364 }
365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366 /*
367 ** Encode a hex-character string using mnemonic encoding, writing the
368 ** result into the supplied blob.
369 */
370
371 int mnemonic_encode(const char* zString, Blob* xBlob)
372 {
373 unsigned int buffer = 0;
374 int buflen = 0;
 
375 int first = 1;
376 int value;
377
378 for (;;)
379 {
380 /* Read hex characters into the buffer until we have at least CHUNK_SIZE
381 * bits. */
382
383 while (buflen < CHUNK_SIZE)
384 {
385 int i = sscanf(zString, "%1x", &value);
386 if (i == 0) /* Parse error? */
387 return 0;
388 if (i == EOF) /* End of line */
389 break;
390 buffer <<= 4;
391 buffer |= value;
392 buflen += 4;
393 zString++;
394 }
395
396 if (buflen < CHUNK_SIZE)
397 {
398 /* Run out of available characters --- give up. */
399 break;
400 }
401
402 /* Extract ten bits of data. */
403
404 value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1);
405 buflen -= CHUNK_SIZE;
406
407 /* Insert a leading space, if necessary. */
408
409 if (!first)
410 blob_appendf(xBlob, " ");
411 first = 0;
412
413 /* Actually encode a word. */
414
415 blob_appendf(xBlob, "%s", index_to_baseword(value));
416 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
418 return 1;
419 }
420
421 /*
@@ -466,104 +423,55 @@
423 ** string into the supplied blob.
424 */
425
426 int mnemonic_decode(const char* zString, Blob* xBlob)
427 {
428 unsigned int buffer = 0;
429 int buflen = 0;
430 int first = 1;
431 int value;
432
433 while (zString)
434 {
435 /* Consume a word, if necessary. */
436
437 while (buflen < 4)
438 {
439 /* Find the end of the current word. */
440
441 const char* end = strchr(zString, ' ');
442 if (!end)
443 end = strchr(zString, '-');
444 if (!end)
445 end = strchr(zString, '_');
446
447 /* Parse. */
448
449 value = baseword_to_index(zString);
450 if (value == -1) /* Parse error? */
451 return 0;
452 buffer <<= CHUNK_SIZE;
453 buffer |= value;
454 buflen += CHUNK_SIZE;
455
456 /* Advance the pointer to the next word. */
457
458 zString = end;
459 if (zString)
460 zString++;
461 }
462
463 /* Now consume nibbles and produce hex bytes. */
464
465 while (buflen >= 4)
466 {
467 value = (buffer >> (buflen-4)) & 0xf;
468 buflen -= 4;
469
470 blob_appendf(xBlob, "%x", value);
471 }
472 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
474 return 1;
475 }
476
477 /*
@@ -584,16 +492,15 @@
492 zName = (const char*)sqlite3_value_text(argv[0]);
493 if( zName==0 ) return;
494
495 blob_zero(&x);
496 rc = mnemonic_encode(zName, &x);
497 if ((rc==0) || (blob_size(&x) == 0))
498 sqlite3_result_error(context, "could not mnemonic encode value", -1);
499 else
500 sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
501 SQLITE_TRANSIENT);
 
502 blob_reset(&x);
503 }
504
505 /*
506 ** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
@@ -616,16 +523,15 @@
523 zName = (const char*)sqlite3_value_text(argv[0]);
524 if( zName==0 ) return;
525
526 blob_zero(&x);
527 rc = mnemonic_decode(zName, &x);
528 if ((rc==0) || (blob_size(&x)==0))
529 sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
530 else
531 sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
532 SQLITE_TRANSIENT);
 
533 blob_reset(&x);
534 }
535
536 /*
537 ** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
@@ -636,5 +542,8 @@
542 sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
543 sqlcmd_mnemonic_encode, 0, 0);
544 sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
545 sqlcmd_mnemonic_decode, 0, 0);
546 }
547
548 /* vi: set ts=2:sw=2:expandtab: */
549
550

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button