Fossil SCM

fossil-scm / src / regexp.c

Blame History Raw 1172 lines

1	`/*`
2	`** Copyright (c) 2013 D. Richard Hipp`
3	`**`
4	`** This program is free software; you can redistribute it and/or`
5	`** modify it under the terms of the Simplified BSD License (also`
6	`** known as the "2-Clause License" or "FreeBSD License".)`
7	`**`
8	`** This program is distributed in the hope that it will be useful,`
9	`** but without any warranty; without even the implied warranty of`
10	`** merchantability or fitness for a particular purpose.`
11	`**`
12	`** Author contact information:`
13	`** [email protected]`
14	`** http://www.hwaci.com/drh/`
15	`**`
16	`******************************************************************************`
17	`**`
18	`** This file was adapted from the ext/misc/regexp.c file in SQLite3. That`
19	`** file is in the public domain.`
20	`**`
21	`** See ../www/grep.md for details of the algorithm and RE dialect.`
22	`**`
23	`**`
24	`** The following regular expression syntax is supported:`
25	`**`
26	`** X* zero or more occurrences of X`
27	`** X+ one or more occurrences of X`
28	`** X? zero or one occurrences of X`
29	`** X{p,q} between p and q occurrences of X`
30	`** (X) match X`
31	`** X\|Y X or Y`
32	`** ^X X occurring at the beginning of the string`
33	`** X$ X occurring at the end of the string`
34	`** . Match any single character`
35	`** \c Character c where c is one of \{}()[]\|*+?.`
36	`** \c C-language escapes for c in afnrtv. ex: \t or \n`
37	`** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX`
38	`** \xXX Where XX is exactly 2 hex digits, unicode value XX`
39	`** [abc] Any single character from the set abc`
40	`** [^abc] Any single character not in the set abc`
41	`** [a-z] Any single character in the range a-z`
42	`** [^a-z] Any single character not in the range a-z`
43	`** \b Word boundary`
44	`** \w Word character. [A-Za-z0-9_]`
45	`** \W Non-word character`
46	`** \d Digit`
47	`** \D Non-digit`
48	`** \s Whitespace character`
49	`** \S Non-whitespace character`
50	`**`
51	`** A nondeterministic finite automaton (NFA) is used for matching, so the`
52	`** performance is bounded by O(N*M) where N is the size of the regular`
53	`** expression and M is the size of the input string. The matcher never`
54	`** exhibits exponential behavior. Note that the X{p,q} operator expands`
55	`** to p copies of X following by q-p copies of X? and that the size of the`
56	`** regular expression in the O(N*M) performance bound is computed after`
57	`** this expansion.`
58	`**`
59	`** To help prevent DoS attacks, the maximum size of the NFA is restricted.`
60	`*/`
61	`#include "config.h"`
62	`#include "regexp.h"`
63
64	`/* The end-of-input character */`
65	`#define RE_EOF 0 /* End of input */`
66	`#define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */`
67
68	`/* The NFA is implemented as sequence of opcodes taken from the following`
69	`** set. Each opcode has a single integer argument.`
70	`*/`
71	`#define RE_OP_MATCH 1 /* Match the one character in the argument */`
72	`#define RE_OP_ANY 2 /* Match any one character. (Implements ".") */`
73	`#define RE_OP_ANYSTAR 3 /* Special optimized version of .* */`
74	`#define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */`
75	`#define RE_OP_GOTO 5 /* Jump to opcode at iArg */`
76	`#define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */`
77	`#define RE_OP_CC_INC 7 /* Beginning of a [...] character class */`
78	`#define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */`
79	`#define RE_OP_CC_VALUE 9 /* Single value in a character class */`
80	`#define RE_OP_CC_RANGE 10 /* Range of values in a character class */`
81	`#define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */`
82	`#define RE_OP_NOTWORD 12 /* Not a perl word character */`
83	`#define RE_OP_DIGIT 13 /* digit: [0-9] */`
84	`#define RE_OP_NOTDIGIT 14 /* Not a digit */`
85	`#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */`
86	`#define RE_OP_NOTSPACE 16 /* Not a digit */`
87	`#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */`
88	`#define RE_OP_ATSTART 18 /* Currently at the start of the string */`
89
90	`/* Each opcode is a "state" in the NFA */`
91	`typedef unsigned short ReStateNumber;`
92
93	`/* Because this is an NFA and not a DFA, multiple states can be active at`
94	`** once. An instance of the following object records all active states in`
95	`** the NFA. The implementation is optimized for the common case where the`
96	`** number of actives states is small.`
97	`*/`
98	`typedef struct ReStateSet {`
99	`unsigned nState; /* Number of current states */`
100	`ReStateNumber aState; / Current states */`
101	`} ReStateSet;`
102
103	`#if INTERFACE`
104	`/* An input string read one character at a time.`
105	`*/`
106	`struct ReInput {`
107	`const unsigned char z; / All text */`
108	`int i; /* Next byte to read */`
109	`int mx; /* EOF when i>=mx */`
110	`};`
111
112	`/* A compiled NFA (or an NFA that is in the process of being compiled) is`
113	`** an instance of the following object.`
114	`*/`
115	`struct ReCompiled {`
116	`ReInput sIn; /* Regular expression text */`
117	`const char zErr; / Error message to return */`
118	`char aOp; / Operators for the virtual machine */`
119	`int aArg; / Arguments to each operator */`
120	`unsigned (xNextChar)(ReInput); /* Next character function */`
121	`unsigned char zInit[12]; /* Initial text to match */`
122	`int nInit; /* Number of bytes in zInit */`
123	`unsigned nState; /* Number of entries in aOp[] and aArg[] */`
124	`unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */`
125	`unsigned mxAlloc; /* Complexity limit */`
126	`};`
127	`#endif`
128
129	`/* Add a state to the given state set if it is not already there */`
130	`static void re_add_state(ReStateSet *pSet, int newState){`
131	`unsigned i;`
132	`for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return;`
133	`pSet->aState[pSet->nState++] = (ReStateNumber)newState;`
134	`}`
135
136	`/* Extract the next unicode character from *pzIn and return it. Advance`
137	`** *pzIn to the first byte past the end of the character returned. To`
138	`** be clear: this routine converts utf8 to unicode. This routine is`
139	`** optimized for the common case where the next character is a single byte.`
140	`*/`
141	`static unsigned re_next_char(ReInput *p){`
142	`unsigned c;`
143	`if( p->i>=p->mx ) return 0;`
144	`c = p->z[p->i++];`
145	`if( c>=0x80 ){`
146	`if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){`
147	`c = (c&0x1f)<<6 \| (p->z[p->i++]&0x3f);`
148	`if( c<0x80 ) c = 0xfffd;`
149	`}else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80`
150	`&& (p->z[p->i+1]&0xc0)==0x80 ){`
151	`c = (c&0x0f)<<12 \| ((p->z[p->i]&0x3f)<<6) \| (p->z[p->i+1]&0x3f);`
152	`p->i += 2;`
153	`if( c<=0x7ff \|\| (c>=0xd800 && c<=0xdfff) ) c = 0xfffd;`
154	`}else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80`
155	`&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){`
156	`c = (c&0x07)<<18 \| ((p->z[p->i]&0x3f)<<12) \| ((p->z[p->i+1]&0x3f)<<6)`
157	`\| (p->z[p->i+2]&0x3f);`
158	`p->i += 3;`
159	`if( c<=0xffff \|\| c>0x10ffff ) c = 0xfffd;`
160	`}else{`
161	`c = 0xfffd;`
162	`}`
163	`}`
164	`return c;`
165	`}`
166	`static unsigned re_next_char_nocase(ReInput *p){`
167	`unsigned c = re_next_char(p);`
168	`return unicode_fold(c,2);`
169	`}`
170
171	`/* Return true if c is a perl "word" character: [A-Za-z0-9_] */`
172	`static int re_word_char(int c){`
173	`return unicode_isalnum(c) \|\| c=='_';`
174	`}`
175
176	`/* Return true if c is a "digit" character: [0-9] */`
177	`static int re_digit_char(int c){`
178	`return (c>='0' && c<='9');`
179	`}`
180
181	`/* Return true if c is a perl "space" character: [ \t\r\n\v\f] */`
182	`static int re_space_char(int c){`
183	`return c==' ' \|\| c=='\t' \|\| c=='\n' \|\| c=='\r' \|\| c=='\v' \|\| c=='\f';`
184	`}`
185
186	`/* Run a compiled regular expression on the zero-terminated input`
187	`** string zIn[]. Return true on a match and false if there is no match.`
188	`*/`
189	`int re_match(ReCompiled pRe, const unsigned char zIn, int nIn){`
190	`ReStateSet aStateSet[2], pThis, pNext;`
191	`ReStateNumber aSpace[100];`
192	`ReStateNumber *pToFree;`
193	`unsigned int i = 0;`
194	`unsigned int iSwap = 0;`
195	`int c = RE_START;`
196	`int cPrev = 0;`
197	`int rc = 0;`
198	`ReInput in;`
199
200	`in.z = zIn;`
201	`in.i = 0;`
202	`in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn);`
203
204	`/* Look for the initial prefix match, if there is one. */`
205	`if( pRe->nInit ){`
206	`unsigned char x = pRe->zInit[0];`
207	`while( in.i+pRe->nInit<=in.mx`
208	`&& (zIn[in.i]!=x \|\|`
209	`strncmp((const char)zIn+in.i, (const char)pRe->zInit, pRe->nInit)!=0)`
210	`){`
211	`in.i++;`
212	`}`
213	`if( in.i+pRe->nInit>in.mx ) return 0;`
214	`c = RE_START-1;`
215	`}`
216
217	`if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){`
218	`pToFree = 0;`
219	`aStateSet[0].aState = aSpace;`
220	`}else{`
221	`pToFree = fossil_malloc( sizeof(ReStateNumber)2pRe->nState );`
222	`if( pToFree==0 ) return -1;`
223	`aStateSet[0].aState = pToFree;`
224	`}`
225	`aStateSet[1].aState = &aStateSet[0].aState[pRe->nState];`
226	`pNext = &aStateSet[1];`
227	`pNext->nState = 0;`
228	`re_add_state(pNext, 0);`
229	`while( c!=RE_EOF && pNext->nState>0 ){`
230	`cPrev = c;`
231	`c = pRe->xNextChar(&in);`
232	`pThis = pNext;`
233	`pNext = &aStateSet[iSwap];`
234	`iSwap = 1 - iSwap;`
235	`pNext->nState = 0;`
236	`for(i=0; i<pThis->nState; i++){`
237	`int x = pThis->aState[i];`
238	`switch( pRe->aOp[x] ){`
239	`case RE_OP_MATCH: {`
240	`if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);`
241	`break;`
242	`}`
243	`case RE_OP_ATSTART: {`
244	`if( cPrev==RE_START ) re_add_state(pThis, x+1);`
245	`break;`
246	`}`
247	`case RE_OP_ANY: {`
248	`if( c!=0 ) re_add_state(pNext, x+1);`
249	`break;`
250	`}`
251	`case RE_OP_WORD: {`
252	`if( re_word_char(c) ) re_add_state(pNext, x+1);`
253	`break;`
254	`}`
255	`case RE_OP_NOTWORD: {`
256	`if( !re_word_char(c) && c!=0 ) re_add_state(pNext, x+1);`
257	`break;`
258	`}`
259	`case RE_OP_DIGIT: {`
260	`if( re_digit_char(c) ) re_add_state(pNext, x+1);`
261	`break;`
262	`}`
263	`case RE_OP_NOTDIGIT: {`
264	`if( !re_digit_char(c) && c!=0 ) re_add_state(pNext, x+1);`
265	`break;`
266	`}`
267	`case RE_OP_SPACE: {`
268	`if( re_space_char(c) ) re_add_state(pNext, x+1);`
269	`break;`
270	`}`
271	`case RE_OP_NOTSPACE: {`
272	`if( !re_space_char(c) && c!=0 ) re_add_state(pNext, x+1);`
273	`break;`
274	`}`
275	`case RE_OP_BOUNDARY: {`
276	`if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1);`
277	`break;`
278	`}`
279	`case RE_OP_ANYSTAR: {`
280	`re_add_state(pNext, x);`
281	`re_add_state(pThis, x+1);`
282	`break;`
283	`}`
284	`case RE_OP_FORK: {`
285	`re_add_state(pThis, x+pRe->aArg[x]);`
286	`re_add_state(pThis, x+1);`
287	`break;`
288	`}`
289	`case RE_OP_GOTO: {`
290	`re_add_state(pThis, x+pRe->aArg[x]);`
291	`break;`
292	`}`
293	`case RE_OP_ACCEPT: {`
294	`rc = 1;`
295	`goto re_match_end;`
296	`}`
297	`case RE_OP_CC_EXC: {`
298	`if( c==0 ) break;`
299	`/* fall-through */ goto re_op_cc_inc;`
300	`}`
301	`case RE_OP_CC_INC: re_op_cc_inc: {`
302	`int j = 1;`
303	`int n = pRe->aArg[x];`
304	`int hit = 0;`
305	`for(j=1; j>0 && j<n; j++){`
306	`if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){`
307	`if( pRe->aArg[x+j]==c ){`
308	`hit = 1;`
309	`j = -1;`
310	`}`
311	`}else{`
312	`if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){`
313	`hit = 1;`
314	`j = -1;`
315	`}else{`
316	`j++;`
317	`}`
318	`}`
319	`}`
320	`if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit;`
321	`if( hit ) re_add_state(pNext, x+n);`
322	`break;`
323	`}`
324	`}`
325	`}`
326	`}`
327	`for(i=0; i<pNext->nState; i++){`
328	`int x = pNext->aState[i];`
329	`while( pRe->aOp[x]==RE_OP_GOTO ) x += pRe->aArg[x];`
330	`if( pRe->aOp[x]==RE_OP_ACCEPT ){ rc = 1; break; }`
331	`}`
332	`re_match_end:`
333	`fossil_free(pToFree);`
334	`return rc;`
335	`}`
336
337	`/* Resize the opcode and argument arrays for an RE under construction.`
338	`*/`
339	`static int re_resize(ReCompiled *p, int N){`
340	`char *aOp;`
341	`int *aArg;`
342	`if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; }`
343	`aOp = fossil_realloc(p->aOp, N*sizeof(p->aOp[0]));`
344	`if( aOp==0 ){ p->zErr = "out of memory"; return 1; }`
345	`p->aOp = aOp;`
346	`aArg = fossil_realloc(p->aArg, N*sizeof(p->aArg[0]));`
347	`if( aArg==0 ){ p->zErr = "out of memory"; return 1; }`
348	`p->aArg = aArg;`
349	`p->nAlloc = N;`
350	`return 0;`
351	`}`
352
353	`/* Insert a new opcode and argument into an RE under construction. The`
354	`** insertion point is just prior to existing opcode iBefore.`
355	`*/`
356	`static int re_insert(ReCompiled *p, int iBefore, int op, int arg){`
357	`int i;`
358	`if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0;`
359	`for(i=p->nState; i>iBefore; i--){`
360	`p->aOp[i] = p->aOp[i-1];`
361	`p->aArg[i] = p->aArg[i-1];`
362	`}`
363	`p->nState++;`
364	`p->aOp[iBefore] = (char)op;`
365	`p->aArg[iBefore] = arg;`
366	`return iBefore;`
367	`}`
368
369	`/* Append a new opcode and argument to the end of the RE under construction.`
370	`*/`
371	`static int re_append(ReCompiled *p, int op, int arg){`
372	`return re_insert(p, p->nState, op, arg);`
373	`}`
374
375	`/* Make a copy of N opcodes starting at iStart onto the end of the RE`
376	`** under construction.`
377	`*/`
378	`static void re_copy(ReCompiled *p, int iStart, int N){`
379	`if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return;`
380	`memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0]));`
381	`memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0]));`
382	`p->nState += N;`
383	`}`
384
385	`/* Return true if c is a hexadecimal digit character: [0-9a-fA-F]`
386	`** If c is a hex digit, also set pV = (pV)*16 + valueof(c). If`
387	`** c is not a hex digit *pV is unchanged.`
388	`*/`
389	`static int re_hex(int c, int *pV){`
390	`if( c>='0' && c<='9' ){`
391	`c -= '0';`
392	`}else if( c>='a' && c<='f' ){`
393	`c -= 'a' - 10;`
394	`}else if( c>='A' && c<='F' ){`
395	`c -= 'A' - 10;`
396	`}else{`
397	`return 0;`
398	`}`
399	`pV = (pV)*16 + (c & 0xff);`
400	`return 1;`
401	`}`
402
403	`/* A backslash character has been seen, read the next character and`
404	`** return its interpretation.`
405	`*/`
406	`static unsigned re_esc_char(ReCompiled *p){`
407	`static const char zEsc[] = "afnrtv\\()*.+?[$^{\|}]";`
408	`static const char zTrans[] = "\a\f\n\r\t\v";`
409	`int i, v = 0;`
410	`char c;`
411	`if( p->sIn.i>=p->sIn.mx ) return 0;`
412	`c = p->sIn.z[p->sIn.i];`
413	`if( c=='u' && p->sIn.i+4<p->sIn.mx ){`
414	`const unsigned char *zIn = p->sIn.z + p->sIn.i;`
415	`if( re_hex(zIn[1],&v)`
416	`&& re_hex(zIn[2],&v)`
417	`&& re_hex(zIn[3],&v)`
418	`&& re_hex(zIn[4],&v)`
419	`){`
420	`p->sIn.i += 5;`
421	`return v;`
422	`}`
423	`}`
424	`if( c=='x' && p->sIn.i+2<p->sIn.mx ){`
425	`const unsigned char *zIn = p->sIn.z + p->sIn.i;`
426	`if( re_hex(zIn[1],&v)`
427	`&& re_hex(zIn[2],&v)`
428	`){`
429	`p->sIn.i += 3;`
430	`return v;`
431	`}`
432	`}`
433	`for(i=0; zEsc[i] && zEsc[i]!=c; i++){}`
434	`if( zEsc[i] ){`
435	`if( i<6 ) c = zTrans[i];`
436	`p->sIn.i++;`
437	`}else{`
438	`p->zErr = "unknown \\ escape";`
439	`}`
440	`return c;`
441	`}`
442
443	`/* Forward declaration */`
444	`static const char re_subcompile_string(ReCompiled);`
445
446	`/* Peek at the next byte of input */`
447	`static unsigned char rePeek(ReCompiled *p){`
448	`return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0;`
449	`}`
450
451	`/* Compile RE text into a sequence of opcodes. Continue up to the`
452	`** first unmatched ")" character, then return. If an error is found,`
453	`** return a pointer to the error message string.`
454	`*/`
455	`static const char re_subcompile_re(ReCompiled p){`
456	`const char *zErr;`
457	`int iStart, iEnd, iGoto;`
458	`iStart = p->nState;`
459	`zErr = re_subcompile_string(p);`
460	`if( zErr ) return zErr;`
461	`while( rePeek(p)=='\|' ){`
462	`iEnd = p->nState;`
463	`re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart);`
464	`iGoto = re_append(p, RE_OP_GOTO, 0);`
465	`p->sIn.i++;`
466	`zErr = re_subcompile_string(p);`
467	`if( zErr ) return zErr;`
468	`p->aArg[iGoto] = p->nState - iGoto;`
469	`}`
470	`return 0;`
471	`}`
472
473	`/* Compile an element of regular expression text (anything that can be`
474	`** an operand to the "\|" operator). Return NULL on success or a pointer`
475	`** to the error message if there is a problem.`
476	`*/`
477	`static const char re_subcompile_string(ReCompiled p){`
478	`int iPrev = -1;`
479	`int iStart;`
480	`unsigned c;`
481	`const char *zErr;`
482	`while( (c = p->xNextChar(&p->sIn))!=0 ){`
483	`iStart = p->nState;`
484	`switch( c ){`
485	`case '\|':`
486	`case ')': {`
487	`p->sIn.i--;`
488	`return 0;`
489	`}`
490	`case '(': {`
491	`zErr = re_subcompile_re(p);`
492	`if( zErr ) return zErr;`
493	`if( rePeek(p)!=')' ) return "unmatched '('";`
494	`p->sIn.i++;`
495	`break;`
496	`}`
497	`case '.': {`
498	`if( rePeek(p)=='*' ){`
499	`re_append(p, RE_OP_ANYSTAR, 0);`
500	`p->sIn.i++;`
501	`}else{`
502	`re_append(p, RE_OP_ANY, 0);`
503	`}`
504	`break;`
505	`}`
506	`case '*': {`
507	`if( iPrev<0 ) return "'*' without operand";`
508	`re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1);`
509	`re_append(p, RE_OP_FORK, iPrev - p->nState + 1);`
510	`break;`
511	`}`
512	`case '+': {`
513	`if( iPrev<0 ) return "'+' without operand";`
514	`re_append(p, RE_OP_FORK, iPrev - p->nState);`
515	`break;`
516	`}`
517	`case '?': {`
518	`if( iPrev<0 ) return "'?' without operand";`
519	`re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1);`
520	`break;`
521	`}`
522	`case '$': {`
523	`re_append(p, RE_OP_MATCH, RE_EOF);`
524	`break;`
525	`}`
526	`case '^': {`
527	`re_append(p, RE_OP_ATSTART, 0);`
528	`break;`
529	`}`
530	`case '{': {`
531	`unsigned int m = 0, n = 0;`
532	`unsigned int sz, j;`
533	`if( iPrev<0 ) return "'{m,n}' without operand";`
534	`while( (c=rePeek(p))>='0' && c<='9' ){`
535	`m = m*10 + c - '0';`
536	`if( m*2>p->mxAlloc ) return "REGEXP pattern too big";`
537	`p->sIn.i++;`
538	`}`
539	`n = m;`
540	`if( c==',' ){`
541	`p->sIn.i++;`
542	`n = 0;`
543	`while( (c=rePeek(p))>='0' && c<='9' ){`
544	`n = n*10 + c-'0';`
545	`if( n*2>p->mxAlloc ) return "REGEXP pattern too big";`
546	`p->sIn.i++;`
547	`}`
548	`}`
549	`if( c!='}' ) return "unmatched '{'";`
550	`if( n<m ) return "n less than m in '{m,n}'";`
551	`p->sIn.i++;`
552	`sz = p->nState - iPrev;`
553	`if( m==0 ){`
554	`if( n==0 ) return "both m and n are zero in '{m,n}'";`
555	`re_insert(p, iPrev, RE_OP_FORK, sz+1);`
556	`iPrev++;`
557	`n--;`
558	`}else{`
559	`for(j=1; j<m; j++) re_copy(p, iPrev, sz);`
560	`}`
561	`for(j=m; j<n; j++){`
562	`re_append(p, RE_OP_FORK, sz+1);`
563	`re_copy(p, iPrev, sz);`
564	`}`
565	`if( n==0 && m>0 ){`
566	`re_append(p, RE_OP_FORK, -(int)sz);`
567	`}`
568	`break;`
569	`}`
570	`case '[': {`
571	`unsigned int iFirst = p->nState;`
572	`if( rePeek(p)=='^' ){`
573	`re_append(p, RE_OP_CC_EXC, 0);`
574	`p->sIn.i++;`
575	`}else{`
576	`re_append(p, RE_OP_CC_INC, 0);`
577	`}`
578	`while( (c = p->xNextChar(&p->sIn))!=0 ){`
579	`if( c=='[' && rePeek(p)==':' ){`
580	`return "POSIX character classes not supported";`
581	`}`
582	`if( c=='\\' ) c = re_esc_char(p);`
583	`if( rePeek(p)=='-' ){`
584	`re_append(p, RE_OP_CC_RANGE, c);`
585	`p->sIn.i++;`
586	`c = p->xNextChar(&p->sIn);`
587	`if( c=='\\' ) c = re_esc_char(p);`
588	`re_append(p, RE_OP_CC_RANGE, c);`
589	`}else{`
590	`re_append(p, RE_OP_CC_VALUE, c);`
591	`}`
592	`if( rePeek(p)==']' ){ p->sIn.i++; break; }`
593	`}`
594	`if( c==0 ) return "unclosed '['";`
595	`if( p->nState>iFirst ) p->aArg[iFirst] = p->nState - iFirst;`
596	`break;`
597	`}`
598	`case '\\': {`
599	`int specialOp = 0;`
600	`switch( rePeek(p) ){`
601	`case 'b': specialOp = RE_OP_BOUNDARY; break;`
602	`case 'd': specialOp = RE_OP_DIGIT; break;`
603	`case 'D': specialOp = RE_OP_NOTDIGIT; break;`
604	`case 's': specialOp = RE_OP_SPACE; break;`
605	`case 'S': specialOp = RE_OP_NOTSPACE; break;`
606	`case 'w': specialOp = RE_OP_WORD; break;`
607	`case 'W': specialOp = RE_OP_NOTWORD; break;`
608	`}`
609	`if( specialOp ){`
610	`p->sIn.i++;`
611	`re_append(p, specialOp, 0);`
612	`}else{`
613	`c = re_esc_char(p);`
614	`re_append(p, RE_OP_MATCH, c);`
615	`}`
616	`break;`
617	`}`
618	`default: {`
619	`re_append(p, RE_OP_MATCH, c);`
620	`break;`
621	`}`
622	`}`
623	`iPrev = iStart;`
624	`}`
625	`return 0;`
626	`}`
627
628	`/* Free and reclaim all the memory used by a previously compiled`
629	`** regular expression. Applications should invoke this routine once`
630	`** for every call to re_compile() to avoid memory leaks.`
631	`*/`
632	`void re_free(ReCompiled *pRe){`
633	`if( pRe ){`
634	`fossil_free(pRe->aOp);`
635	`fossil_free(pRe->aArg);`
636	`fossil_free(pRe);`
637	`}`
638	`}`
639
640	`/*`
641	`** Compile a textual regular expression in zIn[] into a compiled regular`
642	`** expression suitable for us by re_match() and return a pointer to the`
643	`** compiled regular expression in *ppRe. Return NULL on success or an`
644	`** error message if something goes wrong.`
645	`*/`
646	`static const char *re_compile(`
647	`ReCompiled *ppRe, / OUT: write compiled NFA here */`
648	`const char zIn, / Input regular expression */`
649	`int mxRe, /* Complexity limit */`
650	`int noCase /* True for caseless comparisons */`
651	`){`
652	`ReCompiled *pRe;`
653	`const char *zErr;`
654	`int i, j;`
655
656	`*ppRe = 0;`
657	`pRe = fossil_malloc( sizeof(*pRe) );`
658	`if( pRe==0 ){`
659	`return "out of memory";`
660	`}`
661	`memset(pRe, 0, sizeof(*pRe));`
662	`pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char;`
663	`pRe->mxAlloc = mxRe;`
664	`if( re_resize(pRe, 30) ){`
665	`zErr = pRe->zErr;`
666	`re_free(pRe);`
667	`return zErr;`
668	`}`
669	`if( zIn[0]=='^' ){`
670	`zIn++;`
671	`}else{`
672	`re_append(pRe, RE_OP_ANYSTAR, 0);`
673	`}`
674	`pRe->sIn.z = (unsigned char*)zIn;`
675	`pRe->sIn.i = 0;`
676	`pRe->sIn.mx = (int)strlen(zIn);`
677	`zErr = re_subcompile_re(pRe);`
678	`if( zErr ){`
679	`re_free(pRe);`
680	`return zErr;`
681	`}`
682	`if( pRe->sIn.i>=pRe->sIn.mx ){`
683	`re_append(pRe, RE_OP_ACCEPT, 0);`
684	`*ppRe = pRe;`
685	`}else{`
686	`re_free(pRe);`
687	`return "unrecognized character";`
688	`}`
689
690	`/* The following is a performance optimization. If the regex begins with`
691	`** ".*" (if the input regex lacks an initial "^") and afterwards there are`
692	`** one or more matching characters, enter those matching characters into`
693	`** zInit[]. The re_match() routine can then search ahead in the input`
694	`** string looking for the initial match without having to run the whole`
695	`** regex engine over the string. Do not worry about trying to match`
696	`** unicode characters beyond plane 0 - those are very rare and this is`
697	`** just an optimization. */`
698	`if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){`
699	`for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){`
700	`unsigned x = pRe->aArg[i];`
701	`if( x<=0x7f ){`
702	`pRe->zInit[j++] = (unsigned char)x;`
703	`}else if( x<=0x7ff ){`
704	`pRe->zInit[j++] = (unsigned char)(0xc0 \| (x>>6));`
705	`pRe->zInit[j++] = 0x80 \| (x&0x3f);`
706	`}else if( x<=0xffff ){`
707	`pRe->zInit[j++] = (unsigned char)(0xe0 \| (x>>12));`
708	`pRe->zInit[j++] = 0x80 \| ((x>>6)&0x3f);`
709	`pRe->zInit[j++] = 0x80 \| (x&0x3f);`
710	`}else{`
711	`break;`
712	`}`
713	`}`
714	`if( j>0 && pRe->zInit[j-1]==0 ) j--;`
715	`pRe->nInit = j;`
716	`}`
717	`return pRe->zErr;`
718	`}`
719
720	`/*`
721	`** Implementation of the regexp() SQL function. This function implements`
722	`** the build-in REGEXP operator. The first argument to the function is the`
723	`** pattern and the second argument is the string. So, the SQL statements:`
724	`**`
725	`** A REGEXP B`
726	`**`
727	`** is implemented as regexp(B,A).`
728	`*/`
729	`static void re_sql_func(`
730	`sqlite3_context *context,`
731	`int argc,`
732	`sqlite3_value **argv`
733	`){`
734	`ReCompiled pRe; / Compiled regular expression */`
735	`const char zPattern; / The regular expression */`
736	`const unsigned char zStr;/ String being searched */`
737	`const char zErr; / Compile error message */`
738	`int setAux = 0; /* True to invoke sqlite3_set_auxdata() */`
739
740	`(void)argc; /* Unused */`
741	`pRe = sqlite3_get_auxdata(context, 0);`
742	`if( pRe==0 ){`
743	`zPattern = (const char*)sqlite3_value_text(argv[0]);`
744	`if( zPattern==0 ) return;`
745	`zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);`
746	`if( zErr ){`
747	`re_free(pRe);`
748	`/* The original SQLite function from which this code was copied raises`
749	`** an error if the REGEXP contained a syntax error. This variant`
750	`** silently fails to match, as that works better for Fossil.`
751	`** sqlite3_result_error(context, zErr, -1); */`
752	`sqlite3_result_int(context, 0);`
753	`return;`
754	`}`
755	`if( pRe==0 ){`
756	`sqlite3_result_error_nomem(context);`
757	`return;`
758	`}`
759	`setAux = 1;`
760	`}`
761	`zStr = (const unsigned char*)sqlite3_value_text(argv[1]);`
762	`if( zStr!=0 ){`
763	`sqlite3_result_int(context, re_match(pRe, zStr, -1));`
764	`}`
765	`if( setAux ){`
766	`sqlite3_set_auxdata(context, 0, pRe, (void()(void))re_free);`
767	`}`
768	`}`
769
770	`/*`
771	`** Invoke this routine to register the regexp() function with the`
772	`** SQLite database connection.`
773	`*/`
774	`int re_add_sql_func(sqlite3 *db){`
775	`int rc;`
776	`rc = sqlite3_create_function(db, "regexp", 2,`
777	`SQLITE_UTF8\|SQLITE_INNOCUOUS\|SQLITE_DETERMINISTIC,`
778	`0, re_sql_func, 0, 0);`
779	`if( rc==SQLITE_OK ){`
780	`/* The regexpi(PATTERN,STRING) function is a case-insensitive version`
781	`** of regexp(PATTERN,STRING). */`
782	`rc = sqlite3_create_function(db, "regexpi", 2,`
783	`SQLITE_UTF8\|SQLITE_INNOCUOUS\|SQLITE_DETERMINISTIC,`
784	`(void*)db, re_sql_func, 0, 0);`
785	`}`
786	`return rc;`
787	`}`
788
789	`/*`
790	`** The input zIn is a string that we want to match exactly as part of`
791	`** a regular expression. Return a new string (in space obtained from`
792	`** fossil_malloc() or the equivalent) that escapes all regexp syntax`
793	`** characters in zIn.`
794	`*/`
795	`char re_quote(const char zIn){`
796	`Blob out;`
797	`blob_init(&out, 0, 0);`
798	`while( zIn[0] ){`
799	`switch( zIn[0] ){`
800	`case '.':`
801	`case '?':`
802	`case '*':`
803	`case '+':`
804	`case '\\':`
805	`case '(':`
806	`case ')':`
807	`case '[':`
808	`case ']':`
809	`case '\|':`
810	`case '^':`
811	`case '$':`
812	`case '{':`
813	`case '}': {`
814	`blob_appendf(&out,"\\x%02x", (unsigned char)zIn[0]);`
815	`break;`
816	`}`
817	`default: {`
818	`blob_append_char(&out, zIn[0]);`
819	`break;`
820	`}`
821	`}`
822	`zIn++;`
823	`}`
824	`blob_materialize(&out);`
825	`return out.aData;`
826	`}`
827
828	`/*`
829	`** SETTING: regexp-limit width=8 default=1000`
830	`**`
831	`** Limit the size of the bytecode used to implement a regular expression`
832	`** to this many steps. It is important to limit this to avoid possible`
833	`** DoS attacks.`
834	`*/`
835
836	`/*`
837	`** Compile an RE using re_maxlen().`
838	`*/`
839	`const char *fossil_re_compile(`
840	`ReCompiled *ppRe, / OUT: write compiled NFA here */`
841	`const char zIn, / Input regular expression */`
842	`int noCase /* True for caseless comparisons */`
843	`){`
844	`int mxLen = g.db ? db_get_int("regexp-limit",1000) : 1000;`
845	`return re_compile(ppRe, zIn, mxLen, noCase);`
846	`}`
847
848	`/*`
849	`** Run a "grep" over a single file read from disk.`
850	`*/`
851	`static void grep_file(ReCompiled pRe, const char zFile, FILE *in){`
852	`int ln = 0;`
853	`int n;`
854	`char zLine[2000];`
855	`while( fgets(zLine, sizeof(zLine), in) ){`
856	`ln++;`
857	`n = (int)strlen(zLine);`
858	`while( n && (zLine[n-1]=='\n' \|\| zLine[n-1]=='\r') ) n--;`
859	`if( re_match(pRe, (const unsigned char*)zLine, n) ){`
860	`fossil_print("%s:%d:%.*s\n", zFile, ln, n, zLine);`
861	`}`
862	`}`
863	`}`
864
865	`/*`
866	`** Flags for grep_buffer()`
867	`*/`
868	`#define GREP_EXISTS 0x001 /* If any match, print only the name and stop */`
869	`#define GREP_QUIET 0x002 /* Return code only */`
870
871	`/*`
872	`** Run a "grep" over a text file`
873	`*/`
874	`static int grep_buffer(`
875	`ReCompiled *pRe,`
876	`const char *zName,`
877	`const char *z,`
878	`u32 flags`
879	`){`
880	`int i, j, n, ln, cnt;`
881	`for(i=j=ln=cnt=0; z[i]; i=j+1){`
882	`for(j=i; z[j] && z[j]!='\n'; j++){}`
883	`n = j - i;`
884	`ln++;`
885	`if( re_match(pRe, (const unsigned char*)(z+i), j-i) ){`
886	`cnt++;`
887	`if( flags & GREP_EXISTS ){`
888	`if( (flags & GREP_QUIET)==0 && zName ) fossil_print("%s\n", zName);`
889	`break;`
890	`}`
891	`if( (flags & GREP_QUIET)==0 ){`
892	`if( cnt==1 && zName ){`
893	`fossil_print("== %s\n", zName);`
894	`}`
895	`fossil_print("%d:%.*s\n", ln, n, z+i);`
896	`}`
897	`}`
898	`}`
899	`return cnt;`
900	`}`
901
902	`/*`
903	`** COMMAND: test-grep`
904	`**`
905	`** Usage: %fossil test-grep REGEXP [FILE...]`
906	`**`
907	`** Run a regular expression match over the named disk files, or against`
908	`** standard input if no disk files are named on the command-line.`
909	`**`
910	`** Options:`
911	`** -i\|--ignore-case Ignore case`
912	`** --robot-exception Use the robot-exception setting as the REGEXP`
913	`*/`
914	`void re_test_grep(void){`
915	`ReCompiled *pRe;`
916	`const char *zErr;`
917	`int iFileList = 3;`
918	`int ignoreCase = find_option("ignore-case","i",0)!=0;`
919	`int bRobot = find_option("robot-exception",0,0)!=0;`
920	`if( bRobot ){`
921	`const char *zRe;`
922	`db_find_and_open_repository(0,0);`
923	`verify_all_options();`
924	`zRe = db_get("robot-exception","^$");`
925	`zErr = fossil_re_compile(&pRe, zRe, ignoreCase);`
926	`iFileList = 2;`
927	`}else{`
928	`verify_all_options();`
929	`if( g.argc<3 ){`
930	`usage("REGEXP [FILE...]");`
931	`}`
932	`zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase);`
933	`}`
934	`if( zErr ) fossil_fatal("%s", zErr);`
935	`if( g.argc==iFileList ){`
936	`grep_file(pRe, "-", stdin);`
937	`}else{`
938	`int i;`
939	`for(i=iFileList; i<g.argc; i++){`
940	`FILE *in = fossil_fopen(g.argv[i], "rb");`
941	`if( in==0 ){`
942	`fossil_warning("cannot open \"%s\"", g.argv[i]);`
943	`}else{`
944	`grep_file(pRe, g.argv[i], in);`
945	`fclose(in);`
946	`}`
947	`}`
948	`}`
949	`re_free(pRe);`
950	`}`
951
952	`/*`
953	`** COMMAND: grep`
954	`**`
955	`** Usage: %fossil grep [OPTIONS] PATTERN FILENAME ...`
956	`**`
957	`** Attempt to match the given POSIX extended regular expression PATTERN over`
958	`** all historic versions of FILENAME. The search begins with the most recent`
959	`** version of the file and moves backwards in time. Multiple FILENAMEs can`
960	`** be specified, in which case all named files are searched in reverse`
961	`** chronological order.`
962	`**`
963	`** For details of the supported regular expression dialect, see`
964	`** https://fossil-scm.org/fossil/doc/trunk/www/grep.md`
965	`**`
966	`** Options:`
967	`** -c\|--count Suppress normal output; instead print a count`
968	`** of the number of matching files`
969	`** -i\|--ignore-case Ignore case`
970	`** -l\|--files-with-matches List only hash for each match`
971	`** --once Stop searching after the first match`
972	`** -s\|--no-messages Suppress error messages about nonexistent`
973	`** or unreadable files`
974	`** -v\|--invert-match Invert the sense of matching. Show only`
975	`** files that have no matches. Implies -l`
976	`** --verbose Show each file as it is analyzed`
977	`*/`
978	`void re_grep_cmd(void){`
979	`u32 flags = 0;`
980	`int bVerbose = 0;`
981	`ReCompiled *pRe;`
982	`const char *zErr;`
983	`int ignoreCase = 0;`
984	`Blob fullName;`
985	`int ii;`
986	`int nMatch = 0;`
987	`int bNoMsg;`
988	`int cntFlag;`
989	`int bOnce;`
990	`int bInvert;`
991	`int nSearch = 0;`
992	`Stmt q;`
993
994
995	`if( find_option("ignore-case","i",0)!=0 ) ignoreCase = 1;`
996	`if( find_option("files-with-matches","l",0)!=0 ) flags \|= GREP_EXISTS;`
997	`if( find_option("verbose",0,0)!=0 ) bVerbose = 1;`
998	`if( g.fQuiet ) flags \|= GREP_QUIET\|GREP_EXISTS;`
999	`bNoMsg = find_option("no-messages","s",0)!=0;`
1000	`bOnce = find_option("once",0,0)!=0;`
1001	`bInvert = find_option("invert-match","v",0)!=0;`
1002	`if( bInvert ){`
1003	`flags \|= GREP_QUIET\|GREP_EXISTS;`
1004	`}`
1005	`cntFlag = find_option("count","c",0)!=0;`
1006	`if( cntFlag ){`
1007	`flags \|= GREP_QUIET\|GREP_EXISTS;`
1008	`}`
1009	`db_find_and_open_repository(0, 0);`
1010	`verify_all_options();`
1011	`if( g.argc<4 ){`
1012	`usage("REGEXP FILENAME ...");`
1013	`}`
1014	`zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase);`
1015	`if( zErr ) fossil_fatal("%s", zErr);`
1016
1017	`add_content_sql_commands(g.db);`
1018	`db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);");`
1019	`for(ii=3; ii<g.argc; ii++){`
1020	`const char *zTarget = g.argv[ii];`
1021	`if( file_tree_name(zTarget, &fullName, 0, 1) ){`
1022	`int fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q",`
1023	`blob_str(&fullName));`
1024	`if( !fnid ){`
1025	`if( bNoMsg ) continue;`
1026	`if( file_size(zTarget, ExtFILE)<0 ){`
1027	`fossil_fatal("no such file: %s", zTarget);`
1028	`}`
1029	`fossil_fatal("not a managed file: %s", zTarget);`
1030	`}else{`
1031	`db_multi_exec(`
1032	`"INSERT INTO arglist(iname,fname,fnid) VALUES(%Q,%Q,%d)",`
1033	`zTarget, blob_str(&fullName), fnid);`
1034	`}`
1035	`}`
1036	`blob_reset(&fullName);`
1037	`}`
1038	`db_prepare(&q,`
1039	`" SELECT"`
1040	`" A.uuid," /* file hash */`
1041	`" A.rid," /* file rid */`
1042	`" B.uuid," /* check-in hash */`
1043	`" datetime(min(event.mtime))," /* check-in time */`
1044	`" arglist.iname" /* file name */`
1045	`" FROM arglist, mlink, blob A, blob B, event"`
1046	`" WHERE mlink.mid=event.objid"`
1047	`" AND mlink.fid=A.rid"`
1048	`" AND mlink.mid=B.rid"`
1049	`" AND mlink.fnid=arglist.fnid"`
1050	`" GROUP BY A.uuid"`
1051	`" ORDER BY min(event.mtime) DESC;"`
1052	`);`
1053	`while( db_step(&q)==SQLITE_ROW ){`
1054	`const char *zFileHash = db_column_text(&q,0);`
1055	`int rid = db_column_int(&q,1);`
1056	`const char *zCkinHash = db_column_text(&q,2);`
1057	`const char *zDate = db_column_text(&q,3);`
1058	`const char *zFN = db_column_text(&q,4);`
1059	`char *zLabel;`
1060	`Blob cx;`
1061	`content_get(rid, &cx);`
1062	`zLabel = mprintf("%.16s %s %S checkin %S", zDate, zFN,zFileHash,zCkinHash);`
1063	`if( bVerbose ) fossil_print("Scanning: %s\n", zLabel);`
1064	`nSearch++;`
1065	`nMatch += grep_buffer(pRe, zLabel, blob_str(&cx), flags);`
1066	`blob_reset(&cx);`
1067	`if( bInvert && cntFlag==0 ){`
1068	`if( nMatch==0 ){`
1069	`fossil_print("== %s\n", zLabel);`
1070	`if( bOnce ) nMatch = 1;`
1071	`}else{`
1072	`nMatch = 0;`
1073	`}`
1074	`}`
1075	`fossil_free(zLabel);`
1076	`if( nMatch ){`
1077	`if( (flags & GREP_QUIET)!=0 ) break;`
1078	`if( bOnce ) break;`
1079	`}`
1080	`}`
1081	`db_finalize(&q);`
1082	`re_free(pRe);`
1083	`if( cntFlag ){`
1084	`if( bInvert ){`
1085	`fossil_print("%d\n", nSearch-nMatch);`
1086	`}else{`
1087	`fossil_print("%d\n", nMatch);`
1088	`}`
1089	`}`
1090	`}`
1091
1092	`/*`
1093	`** WEBPAGE: re_rules`
1094	`**`
1095	`** Show a summary of the regular expression matching rules for Fossil.`
1096	`*/`
1097	`void re_rules_page(void){`
1098	`style_set_current_feature("wiki");`
1099	`style_header("Regular Expression Syntax");`
1100	`@ <p>Syntax rules for regular expression matching in Fossil:</p>`
1101	`@`
1102	`@ <table border="0" cellpadding="0" cellspacing="0">`
1103	`@ <tr><th>&emsp;&emsp;&emsp;<th>Pattern`
1104	`@ <th>&emsp;&emsp;&emsp;<th align="left">Match`
1105	`@ <tr><td><td><i>X</i><b>*</b>`
1106	`@ <td><td>Zero or more occurrences of <i>X</i>`
1107	`@ <tr><td><td><i>X</i><b>+</b>`
1108	`@ <td><td>One or more occurrences of <i>X</i>`
1109	`@ <tr><td><td><i>X</i><b>?</b>`
1110	`@ <td><td>Zero or one occurrences of <i>X</i>`
1111	`@ <tr><td><td><i>X</i><b>{</b><i>P</i><b>,</b><i>Q</i><b>}</b>`
1112	`@ <td><td>Between P and Q occurrences of <i>X</i>`
1113	`@ <tr><td><td><b>(</b><i>X</i><b>)</b>`
1114	`@ <td><td><i>X</i>`
1115	`@ <tr><td><td><i>X</i><b>\|</b><i>Y</i>`
1116	`@ <td><td><i>X</i> or <i>Y</i>`
1117	`@ <tr><td><td><b>^</b><i>X</i>`
1118	`@ <td><td><i>X</i> at the beginning of the string`
1119	`@ <tr><td><td><i>X</i><b>$</b>`
1120	`@ <td><td><i>X</i> at the end of the string`
1121	`@ <tr><td><td><b>.</b>`
1122	`@ <td><td>Any single character`
1123	`@ <tr><td><td><b>\</b><i>C</i>`
1124	`@ <td><td>Character <i>C</i> if <i>C</i> is one of: <b>\{}()[]\|*+?</b>`
1125	`@ <tr><td><td><b>\</b><i>C</i>`
1126	`@ <td><td>C-language escapes if <i>C</i> is one of: <b>afnrtv</b>`
1127	`@ <tr><td><td><b>\u</b><i>HHHH</i>`
1128	`@ <td><td>Unicode character U+HHHH where <i>HHHH</i> is four hex digits`
1129	`@ <tr><td><td><b>\</b><i>HH</i>`
1130	`@ <td><td>Unicode character U+00HH where <i>HH</i> is two hex digits`
1131	`@ <tr><td><td><b>[</b><i>abc</i><b>]</b>`
1132	`@ <td><td>Any single character from <i>abc</i>`
1133	`@ <tr><td><td><b>[^</b><i>abc</i><b>]</b>`
1134	`@ <td><td>Any single character not in <i>abc</i>`
1135	`@ <tr><td><td><b>[</b><i>a-z</i><b>]</b>`
1136	`@ <td><td>Any single character between <i>a</i> and <i>z</i>, inclusive`
1137	`@ <tr><td><td><b>[^</b><i>a-z</i><b>]</b>`
1138	`@ <td><td>Any single character not between <i>a</i> and <i>z</i>`
1139	`@ <tr><td><td><b>\b</b>`
1140	`@ <td><td>Word boundary`
1141	`@ <tr><td><td><b>\w</b>`
1142	`@ <td><td>A word character: a-zA-Z0-9 or _`
1143	`@ <tr><td><td><b>\W</b>`
1144	`@ <td><td>A non-word character`
1145	`@ <tr><td><td><b>\d</b>`
1146	`@ <td><td>A digit. 0-9`
1147	`@ <tr><td><td><b>\D</b>`
1148	`@ <td><td>A non-digit character`
1149	`@ <tr><td><td><b>\s</b>`
1150	`@ <td><td>A whitespace character`
1151	`@ <tr><td><td><b>\S</b>`
1152	`@ <td><td>A non-whitespace character`
1153	`@ </table>`
1154	`@`
1155	`@ <p>In the "Pattern" column of the table above:</p>`
1156	`@ <ul>`
1157	`@ <li> "<i>X</i>" and "<i>Y</i>" mean any subpattern`
1158	`@ <li> "<i>P</i>" and "<i>Q</i>" mean integers`
1159	`@ <li> "<i>C</i>" means a single character`
1160	`@ <li> "<i>H</i>" means a hexadecimal digit`
1161	`@ <li> "<i>abc</i>" means any sequences of one or more characters`
1162	`@ <li> "<i>a-z</i>" means any single character, a single "<b>-</b>"`
1163	`@ character, and then one additional character.`
1164	`@ <li> All other symbols in the patterns are literal text`
1165	`@ </ul>`
1166	`@`
1167	`@ <p>The "<i>X</i><b>\|</b><i>Y</i>" pattern has lower precedence`
1168	`@ than the others. Use "<b>(</b>...<b>)</b>" for grouping, as`
1169	`@ necessary.`
1170	`style_finish_page();`
1171	`}`
1172

Fossil SCM

Keyboard Shortcuts