Fossil SCM

Rework the algorithm to consume a certain number of bits from the string, rather than using modulus arithmetic --- this is much friendlier on partial hashes, at the expense of being a bit less efficient.

dg 2013-09-07 16:09 dg-codenames

Commit 0160d40b5d0c345b2efb27e5f6e1a0e4716efb88

Parent 15090b9450aa34e…

1 file changed +113 -204

~ src/mnemonic.c

M src/mnemonic.c

+113 -204

		--- src/mnemonic.c
		+++ src/mnemonic.c
		@@ -2,11 +2,10 @@
2	2	** Copyright (c) 2008 D. Richard Hipp
3	3	**
4	4	** This program is free software; you can redistribute it and/or
5	5	** modify it under the terms of the Simplified BSD License (also
6	6	** known as the "2-Clause License" or "FreeBSD License".)
7		-
8	7	** This program is distributed in the hope that it will be useful,
9	8	** but without any warranty; without even the implied warranty of
10	9	** merchantability or fitness for a particular purpose.
11	10	**
12	11	** Author contact information:
		@@ -14,23 +13,33 @@
14	13	** http://www.hwaci.com/drh/
15	14	**
16	15	*******************************************************************************
17	16	**
18	17	** This file contains the code to do mnemonic encoding of commit IDs. The
19		-** wordlist and algorithm is taken from:
	18	+** wordlist and algorithm were originally taken from:
20	19	**
21	20	** https://github.com/singpolyma/mnemonicode
22	21	**
23		-** The original code is MIT licensed, but this code uses a reimplementation
24		-** from scratch anyway.
	22	+** ...which is BSD licensed. However, the algorithm has since been heavily
	23	+** modified to more fit Fossil's special needs.
	24	+**
	25	+** The new algorithm works by reading hex characters one at a time and
	26	+** accumulating bits into a ring buffer. When available, 10 bits are consumed
	27	+** and used as a word index. This only uses 1024 of the available 1626 words
	28	+** but preserves the property that encoded strings may be truncated at any
	29	+** point and the resulting hex string, when decoded, is a truncated form of
	30	+** the original hex string. It is, unfortunately, slightly less efficient, and
	31	+** three words can only encode seven hex digits.
25	32	*/
26	33
27	34	#include "config.h"
28	35	#include "mnemonic.h"
29	36
30	37	#define MN_BASE 1626
31		-#define MN_REMAINDER 7
	38	+
	39	+/* Number of bits to encode as a word. 10 is the maximum. */
	40	+#define CHUNK_SIZE 10
32	41
33	42	static const char* basewordlist[MN_BASE] =
34	43	{
35	44	"ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
36	45	"ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
		@@ -303,37 +312,28 @@
303	312	"WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
304	313	"WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
305	314	"RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
306	315	};
307	316
308		-static const char* extwordlist[MN_REMAINDER] =
309		-{
310		- "EGO", "FAX", "JET", "JOB", "RIO", "SKI",
311		- "YES"
312		-};
313		-
314	317	/*
315		-** String comparator which supports both '\0' and ' ' as a terminator
316		-** character.
	318	+** Special string comparator.
317	319	*/
318	320
319		-static int strspacecmp(const char* s1, const char* s2)
	321	+static int strspecialcmp(const char* s1, const char* s2)
320	322	{
321	323	int c1, c2;
322	324
323	325	do
324	326	{
325		- c1 = *s1++;
326		- c2 = *s2++;
327		- if ((c1 == '\0') \|\| (c2 == ' '))
328		- {
329		- if (c1 == ' ')
330		- c1 = '\0';
331		- if (c2 == ' ')
332		- c2 = '\0';
333		- break;
334		- }
	327	+ c1 = toupper(*s1++);
	328	+ c2 = toupper(*s2++);
	329	+ if ((c1 == ' ') \|\| (c1 == '-') \|\| (c1 == '_'))
	330	+ c1 = 0;
	331	+ if ((c2 == ' ') \|\| (c2 == '-') \|\| (c2 == '_'))
	332	+ c2 = 0;
	333	+ if (!c1 \|\| !c2)
	334	+ break;
335	335	}
336	336	while (c1 == c2);
337	337
338	338	return c1 - c2;
339	339	}
		@@ -346,11 +346,11 @@
346	346	static int baseword_to_index(const char* word)
347	347	{
348	348	int i;
349	349
350	350	for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351		- if (strspacecmp(basewordlist[i], word) == 0)
	351	+ if (strspecialcmp(basewordlist[i], word) == 0)
352	352	return i;
353	353
354	354	return -1;
355	355	}
356	356
		@@ -361,104 +361,61 @@
361	361	static const char* index_to_baseword(int index)
362	362	{
363	363	return basewordlist[index];
364	364	}
365	365
366		-/*
367		-** Look up a word in the extended dictionary and return its index; -1 if the
368		-** word is not in the dictionary.
369		-*/
370		-
371		-static int extword_to_index(const char* word)
372		-{
373		- int i;
374		-
375		- for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++)
376		- if (strspacecmp(extwordlist[i], word) == 0)
377		- return i;
378		-
379		- return -1;
380		-}
381		-
382		-/*
383		-** Look up a word by index in the extended dictionary and return the string.
384		-*/
385		-
386		-static const char* index_to_extword(int index)
387		-{
388		- return extwordlist[index];
389		-}
390		-
391	366	/*
392	367	** Encode a hex-character string using mnemonic encoding, writing the
393	368	** result into the supplied blob.
394	369	*/
395	370
396	371	int mnemonic_encode(const char* zString, Blob* xBlob)
397	372	{
398		- int stringlen = strlen(zString);
399		- unsigned int accumulator;
400		- int charsread;
	373	+ unsigned int buffer = 0;
	374	+ int buflen = 0;
401	375	int first = 1;
402		-
403		- while (stringlen > 0)
404		- {
405		- /* Read up to four bytes worth of digest into the accumulator. */
406		-
407		- {
408		- int i = sscanf(zString, "%8x%n", &accumulator, &charsread);
409		- if (i == 0)
410		- return 0;
411		- if (i == EOF)
412		- break;
413		- if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */
414		- return 0;
415		-
416		- zString += charsread;
417		- }
	376	+ int value;
	377	+
	378	+ for (;;)
	379	+ {
	380	+ /* Read hex characters into the buffer until we have at least CHUNK_SIZE
	381	+ * bits. */
	382	+
	383	+ while (buflen < CHUNK_SIZE)
	384	+ {
	385	+ int i = sscanf(zString, "%1x", &value);
	386	+ if (i == 0) /* Parse error? */
	387	+ return 0;
	388	+ if (i == EOF) /* End of line */
	389	+ break;
	390	+ buffer <<= 4;
	391	+ buffer \|= value;
	392	+ buflen += 4;
	393	+ zString++;
	394	+ }
	395	+
	396	+ if (buflen < CHUNK_SIZE)
	397	+ {
	398	+ /* Run out of available characters --- give up. */
	399	+ break;
	400	+ }
	401	+
	402	+ /* Extract ten bits of data. */
	403	+
	404	+ value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1);
	405	+ buflen -= CHUNK_SIZE;
418	406
419	407	/* Insert a leading space, if necessary. */
420	408
421	409	if (!first)
422	410	blob_appendf(xBlob, " ");
423	411	first = 0;
424	412
425		- /* Actually do the encoding. */
426		-
427		- {
428		- switch (charsread)
429		- {
430		- /* 24-bit encodings are special. */
431		-
432		- case 6:
433		- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
434		- accumulator /= MN_BASE;
435		-
436		- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
437		- accumulator /= MN_BASE;
438		-
439		- blob_appendf(xBlob, "%s", index_to_extword(accumulator));
440		- break;
441		-
442		- /* Otherwise, just follow the common pattern. */
443		-
444		- case 8:
445		- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
446		- accumulator /= MN_BASE;
447		- /* fall through */
448		-
449		- case 4:
450		- blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
451		- accumulator /= MN_BASE;
452		- /* fall through */
453		-
454		- case 2:
455		- blob_appendf(xBlob, "%s", index_to_baseword(accumulator));
456		- break;
457		- }
458		- }
459		- }
	413	+ /* Actually encode a word. */
	414	+
	415	+ blob_appendf(xBlob, "%s", index_to_baseword(value));
	416	+ }
460	417
461	418	return 1;
462	419	}
463	420
464	421	/*
		@@ -466,104 +423,55 @@
466	423	** string into the supplied blob.
467	424	*/
468	425
469	426	int mnemonic_decode(const char* zString, Blob* xBlob)
470	427	{
471		- const char* word1;
472		- const char* word2;
473		- const char* word3;
474		- const char* p;
475		- int i;
476		- unsigned int value;
477		-
478		- do
479		- {
480		- word1 = word2 = word3 = NULL;
481		-
482		- /* Read up to three words from the string. */
483		-
484		- word1 = zString;
485		- p = strchr(zString, ' ');
486		- if (!p)
487		- zString = NULL;
488		- else
489		- {
490		- zString = p+1;
491		- word2 = zString;
492		- p = strchr(zString, ' ');
493		- if (!p)
494		- zString = NULL;
495		- else
496		- {
497		- zString = p+1;
498		- word3 = zString;
499		- p = strchr(zString, ' ');
500		- if (!p)
501		- zString = NULL;
502		- else
503		- zString = p+1;
504		- }
505		- }
506		-
507		- if (word1 && !word2 && !word3)
508		- {
509		- /* One word means one byte. */
510		-
511		- i = baseword_to_index(word1);
512		- if (i == -1)
513		- return 0;
514		-
515		- blob_appendf(xBlob, "%02x", i);
516		- }
517		- else if (word1 && word2 && !word3)
518		- {
519		- /* Two words means two bytes. */
520		-
521		- i = baseword_to_index(word1);
522		- if (i == -1)
523		- return 0;
524		- value = i;
525		-
526		- i = baseword_to_index(word2);
527		- if (i == -1)
528		- return 0;
529		- value += i*MN_BASE;
530		-
531		- blob_appendf(xBlob, "%04x", value);
532		- }
533		- else
534		- {
535		- /* Three words means either three or four bytes, depending on what
536		- * the final word is. */
537		-
538		- i = baseword_to_index(word1);
539		- if (i == -1)
540		- return 0;
541		- value = i;
542		-
543		- i = baseword_to_index(word2);
544		- if (i == -1)
545		- return 0;
546		- value += i*MN_BASE;
547		-
548		- i = baseword_to_index(word3);
549		- if (i == -1)
550		- {
551		- i = extword_to_index(word3);
552		- if (i == -1)
553		- return 0;
554		- value += iMN_BASEMN_BASE;
555		- blob_appendf(xBlob, "%06x", value);
556		- }
557		- else
558		- {
559		- value += iMN_BASEMN_BASE;
560		- blob_appendf(xBlob, "%08x", value);
561		- }
562		- }
563		- }
564		- while (zString);
	428	+ unsigned int buffer = 0;
	429	+ int buflen = 0;
	430	+ int first = 1;
	431	+ int value;
	432	+
	433	+ while (zString)
	434	+ {
	435	+ /* Consume a word, if necessary. */
	436	+
	437	+ while (buflen < 4)
	438	+ {
	439	+ /* Find the end of the current word. */
	440	+
	441	+ const char* end = strchr(zString, ' ');
	442	+ if (!end)
	443	+ end = strchr(zString, '-');
	444	+ if (!end)
	445	+ end = strchr(zString, '_');
	446	+
	447	+ /* Parse. */
	448	+
	449	+ value = baseword_to_index(zString);
	450	+ if (value == -1) /* Parse error? */
	451	+ return 0;
	452	+ buffer <<= CHUNK_SIZE;
	453	+ buffer \|= value;
	454	+ buflen += CHUNK_SIZE;
	455	+
	456	+ /* Advance the pointer to the next word. */
	457	+
	458	+ zString = end;
	459	+ if (zString)
	460	+ zString++;
	461	+ }
	462	+
	463	+ /* Now consume nibbles and produce hex bytes. */
	464	+
	465	+ while (buflen >= 4)
	466	+ {
	467	+ value = (buffer >> (buflen-4)) & 0xf;
	468	+ buflen -= 4;
	469	+
	470	+ blob_appendf(xBlob, "%x", value);
	471	+ }
	472	+ }
565	473
566	474	return 1;
567	475	}
568	476
569	477	/*
		@@ -584,16 +492,15 @@
584	492	zName = (const char*)sqlite3_value_text(argv[0]);
585	493	if( zName==0 ) return;
586	494
587	495	blob_zero(&x);
588	496	rc = mnemonic_encode(zName, &x);
589		- if ( rc==0 ){
	497	+ if ((rc==0) \|\| (blob_size(&x) == 0))
590	498	sqlite3_result_error(context, "could not mnemonic encode value", -1);
591		- }else{
	499	+ else
592	500	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
593	501	SQLITE_TRANSIENT);
594		- }
595	502	blob_reset(&x);
596	503	}
597	504
598	505	/*
599	506	** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
		@@ -616,16 +523,15 @@
616	523	zName = (const char*)sqlite3_value_text(argv[0]);
617	524	if( zName==0 ) return;
618	525
619	526	blob_zero(&x);
620	527	rc = mnemonic_decode(zName, &x);
621		- if ( rc==0 ){
	528	+ if ((rc==0) \|\| (blob_size(&x)==0))
622	529	sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
623		- }else{
	530	+ else
624	531	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
625	532	SQLITE_TRANSIENT);
626		- }
627	533	blob_reset(&x);
628	534	}
629	535
630	536	/*
631	537	** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
		@@ -636,5 +542,8 @@
636	542	sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
637	543	sqlcmd_mnemonic_encode, 0, 0);
638	544	sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
639	545	sqlcmd_mnemonic_decode, 0, 0);
640	546	}
	547	+
	548	+/* vi: set ts=2:sw=2:expandtab: */
	549	+
641	550

	--- src/mnemonic.c
	+++ src/mnemonic.c
	@@ -2,11 +2,10 @@
2	** Copyright (c) 2008 D. Richard Hipp
3	**
4	** This program is free software; you can redistribute it and/or
5	** modify it under the terms of the Simplified BSD License (also
6	** known as the "2-Clause License" or "FreeBSD License".)
7
8	** This program is distributed in the hope that it will be useful,
9	** but without any warranty; without even the implied warranty of
10	** merchantability or fitness for a particular purpose.
11	**
12	** Author contact information:
	@@ -14,23 +13,33 @@
14	** http://www.hwaci.com/drh/
15	**
16	*******************************************************************************
17	**
18	** This file contains the code to do mnemonic encoding of commit IDs. The
19	** wordlist and algorithm is taken from:
20	**
21	** https://github.com/singpolyma/mnemonicode
22	**
23	** The original code is MIT licensed, but this code uses a reimplementation
24	** from scratch anyway.








25	*/
26
27	#include "config.h"
28	#include "mnemonic.h"
29
30	#define MN_BASE 1626
31	#define MN_REMAINDER 7


32
33	static const char* basewordlist[MN_BASE] =
34	{
35	"ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
36	"ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
	@@ -303,37 +312,28 @@
303	"WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
304	"WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
305	"RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
306	};
307
308	static const char* extwordlist[MN_REMAINDER] =
309	{
310	"EGO", "FAX", "JET", "JOB", "RIO", "SKI",
311	"YES"
312	};
313
314	/*
315	** String comparator which supports both '\0' and ' ' as a terminator
316	** character.
317	*/
318
319	static int strspacecmp(const char* s1, const char* s2)
320	{
321	int c1, c2;
322
323	do
324	{
325	c1 = *s1++;
326	c2 = *s2++;
327	if ((c1 == '\0') \|\| (c2 == ' '))
328	{
329	if (c1 == ' ')
330	c1 = '\0';
331	if (c2 == ' ')
332	c2 = '\0';
333	break;
334	}
335	}
336	while (c1 == c2);
337
338	return c1 - c2;
339	}
	@@ -346,11 +346,11 @@
346	static int baseword_to_index(const char* word)
347	{
348	int i;
349
350	for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351	if (strspacecmp(basewordlist[i], word) == 0)
352	return i;
353
354	return -1;
355	}
356
	@@ -361,104 +361,61 @@
361	static const char* index_to_baseword(int index)
362	{
363	return basewordlist[index];
364	}
365
366	/*
367	** Look up a word in the extended dictionary and return its index; -1 if the
368	** word is not in the dictionary.
369	*/
370
371	static int extword_to_index(const char* word)
372	{
373	int i;
374
375	for (i=0; i<sizeof(extwordlist)/sizeof(*extwordlist); i++)
376	if (strspacecmp(extwordlist[i], word) == 0)
377	return i;
378
379	return -1;
380	}
381
382	/*
383	** Look up a word by index in the extended dictionary and return the string.
384	*/
385
386	static const char* index_to_extword(int index)
387	{
388	return extwordlist[index];
389	}
390
391	/*
392	** Encode a hex-character string using mnemonic encoding, writing the
393	** result into the supplied blob.
394	*/
395
396	int mnemonic_encode(const char* zString, Blob* xBlob)
397	{
398	int stringlen = strlen(zString);
399	unsigned int accumulator;
400	int charsread;
401	int first = 1;
402
403	while (stringlen > 0)
404	{
405	/* Read up to four bytes worth of digest into the accumulator. */
406
407	{
408	int i = sscanf(zString, "%8x%n", &accumulator, &charsread);
409	if (i == 0)
410	return 0;
411	if (i == EOF)
412	break;
413	if (charsread & 1) /* Ensure we read complete bytes (pairs of chars) */
414	return 0;
415
416	zString += charsread;
417	}














418
419	/* Insert a leading space, if necessary. */
420
421	if (!first)
422	blob_appendf(xBlob, " ");
423	first = 0;
424
425	/* Actually do the encoding. */
426
427	{
428	switch (charsread)
429	{
430	/* 24-bit encodings are special. */
431
432	case 6:
433	blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
434	accumulator /= MN_BASE;
435
436	blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
437	accumulator /= MN_BASE;
438
439	blob_appendf(xBlob, "%s", index_to_extword(accumulator));
440	break;
441
442	/* Otherwise, just follow the common pattern. */
443
444	case 8:
445	blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
446	accumulator /= MN_BASE;
447	/* fall through */
448
449	case 4:
450	blob_appendf(xBlob, "%s ", index_to_baseword(accumulator % MN_BASE));
451	accumulator /= MN_BASE;
452	/* fall through */
453
454	case 2:
455	blob_appendf(xBlob, "%s", index_to_baseword(accumulator));
456	break;
457	}
458	}
459	}
460
461	return 1;
462	}
463
464	/*
	@@ -466,104 +423,55 @@
466	** string into the supplied blob.
467	*/
468
469	int mnemonic_decode(const char* zString, Blob* xBlob)
470	{
471	const char* word1;
472	const char* word2;
473	const char* word3;
474	const char* p;
475	int i;
476	unsigned int value;
477
478	do
479	{
480	word1 = word2 = word3 = NULL;
481
482	/* Read up to three words from the string. */
483
484	word1 = zString;
485	p = strchr(zString, ' ');
486	if (!p)
487	zString = NULL;
488	else
489	{
490	zString = p+1;
491	word2 = zString;
492	p = strchr(zString, ' ');
493	if (!p)
494	zString = NULL;
495	else
496	{
497	zString = p+1;
498	word3 = zString;
499	p = strchr(zString, ' ');
500	if (!p)
501	zString = NULL;
502	else
503	zString = p+1;
504	}
505	}
506
507	if (word1 && !word2 && !word3)
508	{
509	/* One word means one byte. */
510
511	i = baseword_to_index(word1);
512	if (i == -1)
513	return 0;
514
515	blob_appendf(xBlob, "%02x", i);
516	}
517	else if (word1 && word2 && !word3)
518	{
519	/* Two words means two bytes. */
520
521	i = baseword_to_index(word1);
522	if (i == -1)
523	return 0;
524	value = i;
525
526	i = baseword_to_index(word2);
527	if (i == -1)
528	return 0;
529	value += i*MN_BASE;
530
531	blob_appendf(xBlob, "%04x", value);
532	}
533	else
534	{
535	/* Three words means either three or four bytes, depending on what
536	* the final word is. */
537
538	i = baseword_to_index(word1);
539	if (i == -1)
540	return 0;
541	value = i;
542
543	i = baseword_to_index(word2);
544	if (i == -1)
545	return 0;
546	value += i*MN_BASE;
547
548	i = baseword_to_index(word3);
549	if (i == -1)
550	{
551	i = extword_to_index(word3);
552	if (i == -1)
553	return 0;
554	value += iMN_BASEMN_BASE;
555	blob_appendf(xBlob, "%06x", value);
556	}
557	else
558	{
559	value += iMN_BASEMN_BASE;
560	blob_appendf(xBlob, "%08x", value);
561	}
562	}
563	}
564	while (zString);
565
566	return 1;
567	}
568
569	/*
	@@ -584,16 +492,15 @@
584	zName = (const char*)sqlite3_value_text(argv[0]);
585	if( zName==0 ) return;
586
587	blob_zero(&x);
588	rc = mnemonic_encode(zName, &x);
589	if ( rc==0 ){
590	sqlite3_result_error(context, "could not mnemonic encode value", -1);
591	}else{
592	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
593	SQLITE_TRANSIENT);
594	}
595	blob_reset(&x);
596	}
597
598	/*
599	** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
	@@ -616,16 +523,15 @@
616	zName = (const char*)sqlite3_value_text(argv[0]);
617	if( zName==0 ) return;
618
619	blob_zero(&x);
620	rc = mnemonic_decode(zName, &x);
621	if ( rc==0 ){
622	sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
623	}else{
624	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
625	SQLITE_TRANSIENT);
626	}
627	blob_reset(&x);
628	}
629
630	/*
631	** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
	@@ -636,5 +542,8 @@
636	sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
637	sqlcmd_mnemonic_encode, 0, 0);
638	sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
639	sqlcmd_mnemonic_decode, 0, 0);
640	}



641

	--- src/mnemonic.c
	+++ src/mnemonic.c
	@@ -2,11 +2,10 @@
2	** Copyright (c) 2008 D. Richard Hipp
3	**
4	** This program is free software; you can redistribute it and/or
5	** modify it under the terms of the Simplified BSD License (also
6	** known as the "2-Clause License" or "FreeBSD License".)

7	** This program is distributed in the hope that it will be useful,
8	** but without any warranty; without even the implied warranty of
9	** merchantability or fitness for a particular purpose.
10	**
11	** Author contact information:
	@@ -14,23 +13,33 @@
13	** http://www.hwaci.com/drh/
14	**
15	*******************************************************************************
16	**
17	** This file contains the code to do mnemonic encoding of commit IDs. The
18	** wordlist and algorithm were originally taken from:
19	**
20	** https://github.com/singpolyma/mnemonicode
21	**
22	** ...which is BSD licensed. However, the algorithm has since been heavily
23	** modified to more fit Fossil's special needs.
24	**
25	** The new algorithm works by reading hex characters one at a time and
26	** accumulating bits into a ring buffer. When available, 10 bits are consumed
27	** and used as a word index. This only uses 1024 of the available 1626 words
28	** but preserves the property that encoded strings may be truncated at any
29	** point and the resulting hex string, when decoded, is a truncated form of
30	** the original hex string. It is, unfortunately, slightly less efficient, and
31	** three words can only encode seven hex digits.
32	*/
33
34	#include "config.h"
35	#include "mnemonic.h"
36
37	#define MN_BASE 1626
38
39	/* Number of bits to encode as a word. 10 is the maximum. */
40	#define CHUNK_SIZE 10
41
42	static const char* basewordlist[MN_BASE] =
43	{
44	"ACADEMY", "ACROBAT", "ACTIVE", "ACTOR", "ADAM", "ADMIRAL",
45	"ADRIAN", "AFRICA", "AGENDA", "AGENT", "AIRLINE", "AIRPORT",
	@@ -303,37 +312,28 @@
312	"WAITER", "WATCH", "WAVE", "WEATHER", "WEDDING", "WHEEL",
313	"WHISKEY", "WISDOM", "DEAL", "NULL", "NURSE", "QUEBEC",
314	"RESERVE", "REUNION", "ROOF", "SINGER", "VERBAL", "AMEN"
315	};
316






317	/*
318	** Special string comparator.

319	*/
320
321	static int strspecialcmp(const char* s1, const char* s2)
322	{
323	int c1, c2;
324
325	do
326	{
327	c1 = toupper(*s1++);
328	c2 = toupper(*s2++);
329	if ((c1 == ' ') \|\| (c1 == '-') \|\| (c1 == '_'))
330	c1 = 0;
331	if ((c2 == ' ') \|\| (c2 == '-') \|\| (c2 == '_'))
332	c2 = 0;
333	if (!c1 \|\| !c2)
334	break;


335	}
336	while (c1 == c2);
337
338	return c1 - c2;
339	}
	@@ -346,11 +346,11 @@
346	static int baseword_to_index(const char* word)
347	{
348	int i;
349
350	for (i=0; i<sizeof(basewordlist)/sizeof(*basewordlist); i++)
351	if (strspecialcmp(basewordlist[i], word) == 0)
352	return i;
353
354	return -1;
355	}
356
	@@ -361,104 +361,61 @@
361	static const char* index_to_baseword(int index)
362	{
363	return basewordlist[index];
364	}
365

























366	/*
367	** Encode a hex-character string using mnemonic encoding, writing the
368	** result into the supplied blob.
369	*/
370
371	int mnemonic_encode(const char* zString, Blob* xBlob)
372	{
373	unsigned int buffer = 0;
374	int buflen = 0;

375	int first = 1;
376	int value;
377
378	for (;;)
379	{
380	/* Read hex characters into the buffer until we have at least CHUNK_SIZE
381	* bits. */
382
383	while (buflen < CHUNK_SIZE)
384	{
385	int i = sscanf(zString, "%1x", &value);
386	if (i == 0) /* Parse error? */
387	return 0;
388	if (i == EOF) /* End of line */
389	break;
390	buffer <<= 4;
391	buffer \|= value;
392	buflen += 4;
393	zString++;
394	}
395
396	if (buflen < CHUNK_SIZE)
397	{
398	/* Run out of available characters --- give up. */
399	break;
400	}
401
402	/* Extract ten bits of data. */
403
404	value = (buffer >> (buflen-CHUNK_SIZE)) & ((1<<CHUNK_SIZE)-1);
405	buflen -= CHUNK_SIZE;
406
407	/* Insert a leading space, if necessary. */
408
409	if (!first)
410	blob_appendf(xBlob, " ");
411	first = 0;
412
413	/* Actually encode a word. */
414
415	blob_appendf(xBlob, "%s", index_to_baseword(value));
416	}































417
418	return 1;
419	}
420
421	/*
	@@ -466,104 +423,55 @@
423	** string into the supplied blob.
424	*/
425
426	int mnemonic_decode(const char* zString, Blob* xBlob)
427	{
428	unsigned int buffer = 0;
429	int buflen = 0;
430	int first = 1;
431	int value;
432
433	while (zString)
434	{
435	/* Consume a word, if necessary. */
436
437	while (buflen < 4)
438	{
439	/* Find the end of the current word. */
440
441	const char* end = strchr(zString, ' ');
442	if (!end)
443	end = strchr(zString, '-');
444	if (!end)
445	end = strchr(zString, '_');
446
447	/* Parse. */
448
449	value = baseword_to_index(zString);
450	if (value == -1) /* Parse error? */
451	return 0;
452	buffer <<= CHUNK_SIZE;
453	buffer \|= value;
454	buflen += CHUNK_SIZE;
455
456	/* Advance the pointer to the next word. */
457
458	zString = end;
459	if (zString)
460	zString++;
461	}
462
463	/* Now consume nibbles and produce hex bytes. */
464
465	while (buflen >= 4)
466	{
467	value = (buffer >> (buflen-4)) & 0xf;
468	buflen -= 4;
469
470	blob_appendf(xBlob, "%x", value);
471	}
472	}

















































473
474	return 1;
475	}
476
477	/*
	@@ -584,16 +492,15 @@
492	zName = (const char*)sqlite3_value_text(argv[0]);
493	if( zName==0 ) return;
494
495	blob_zero(&x);
496	rc = mnemonic_encode(zName, &x);
497	if ((rc==0) \|\| (blob_size(&x) == 0))
498	sqlite3_result_error(context, "could not mnemonic encode value", -1);
499	else
500	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
501	SQLITE_TRANSIENT);

502	blob_reset(&x);
503	}
504
505	/*
506	** Implementation of the "mnemonic_decode(X)" SQL function. The argument X
	@@ -616,16 +523,15 @@
523	zName = (const char*)sqlite3_value_text(argv[0]);
524	if( zName==0 ) return;
525
526	blob_zero(&x);
527	rc = mnemonic_decode(zName, &x);
528	if ((rc==0) \|\| (blob_size(&x)==0))
529	sqlite3_result_text(context, zName, -1, SQLITE_TRANSIENT);
530	else
531	sqlite3_result_text(context, blob_buffer(&x), blob_size(&x),
532	SQLITE_TRANSIENT);

533	blob_reset(&x);
534	}
535
536	/*
537	** Add the SQL functions that wrap mnemonic_encode() and mnemonic_decode().
	@@ -636,5 +542,8 @@
542	sqlite3_create_function(db, "mnemonic_encode", 1, SQLITE_ANY, 0,
543	sqlcmd_mnemonic_encode, 0, 0);
544	sqlite3_create_function(db, "mnemonic_decode", 1, SQLITE_ANY, 0,
545	sqlcmd_mnemonic_decode, 0, 0);
546	}
547
548	/* vi: set ts=2:sw=2:expandtab: */
549
550

Fossil SCM

Keyboard Shortcuts