|
e2552de…
|
drh
|
1 |
/* |
|
e2552de…
|
drh
|
2 |
** Copyright (c) 2013 D. Richard Hipp |
|
e2552de…
|
drh
|
3 |
** |
|
e2552de…
|
drh
|
4 |
** This program is free software; you can redistribute it and/or |
|
e2552de…
|
drh
|
5 |
** modify it under the terms of the Simplified BSD License (also |
|
e2552de…
|
drh
|
6 |
** known as the "2-Clause License" or "FreeBSD License".) |
|
cb952c4…
|
drh
|
7 |
** |
|
e2552de…
|
drh
|
8 |
** This program is distributed in the hope that it will be useful, |
|
e2552de…
|
drh
|
9 |
** but without any warranty; without even the implied warranty of |
|
e2552de…
|
drh
|
10 |
** merchantability or fitness for a particular purpose. |
|
e2552de…
|
drh
|
11 |
** |
|
e2552de…
|
drh
|
12 |
** Author contact information: |
|
e2552de…
|
drh
|
13 |
** [email protected] |
|
e2552de…
|
drh
|
14 |
** http://www.hwaci.com/drh/ |
|
e2552de…
|
drh
|
15 |
** |
|
dfaa221…
|
drh
|
16 |
****************************************************************************** |
|
e2552de…
|
drh
|
17 |
** |
|
b2c424a…
|
jan.nijtmans
|
18 |
** This file was adapted from the ext/misc/regexp.c file in SQLite3. That |
|
e2552de…
|
drh
|
19 |
** file is in the public domain. |
|
e2552de…
|
drh
|
20 |
** |
|
2e1775e…
|
wyoung
|
21 |
** See ../www/grep.md for details of the algorithm and RE dialect. |
|
dfaa221…
|
drh
|
22 |
** |
|
dfaa221…
|
drh
|
23 |
** |
|
dfaa221…
|
drh
|
24 |
** The following regular expression syntax is supported: |
|
dfaa221…
|
drh
|
25 |
** |
|
dfaa221…
|
drh
|
26 |
** X* zero or more occurrences of X |
|
dfaa221…
|
drh
|
27 |
** X+ one or more occurrences of X |
|
dfaa221…
|
drh
|
28 |
** X? zero or one occurrences of X |
|
82888a0…
|
drh
|
29 |
** X{p,q} between p and q occurrences of X |
|
dfaa221…
|
drh
|
30 |
** (X) match X |
|
dfaa221…
|
drh
|
31 |
** X|Y X or Y |
|
dfaa221…
|
drh
|
32 |
** ^X X occurring at the beginning of the string |
|
dfaa221…
|
drh
|
33 |
** X$ X occurring at the end of the string |
|
dfaa221…
|
drh
|
34 |
** . Match any single character |
|
dfaa221…
|
drh
|
35 |
** \c Character c where c is one of \{}()[]|*+?. |
|
dfaa221…
|
drh
|
36 |
** \c C-language escapes for c in afnrtv. ex: \t or \n |
|
dfaa221…
|
drh
|
37 |
** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX |
|
dfaa221…
|
drh
|
38 |
** \xXX Where XX is exactly 2 hex digits, unicode value XX |
|
dfaa221…
|
drh
|
39 |
** [abc] Any single character from the set abc |
|
dfaa221…
|
drh
|
40 |
** [^abc] Any single character not in the set abc |
|
dfaa221…
|
drh
|
41 |
** [a-z] Any single character in the range a-z |
|
dfaa221…
|
drh
|
42 |
** [^a-z] Any single character not in the range a-z |
|
dfaa221…
|
drh
|
43 |
** \b Word boundary |
|
dfaa221…
|
drh
|
44 |
** \w Word character. [A-Za-z0-9_] |
|
dfaa221…
|
drh
|
45 |
** \W Non-word character |
|
dfaa221…
|
drh
|
46 |
** \d Digit |
|
dfaa221…
|
drh
|
47 |
** \D Non-digit |
|
dfaa221…
|
drh
|
48 |
** \s Whitespace character |
|
dfaa221…
|
drh
|
49 |
** \S Non-whitespace character |
|
dfaa221…
|
drh
|
50 |
** |
|
dfaa221…
|
drh
|
51 |
** A nondeterministic finite automaton (NFA) is used for matching, so the |
|
dfaa221…
|
drh
|
52 |
** performance is bounded by O(N*M) where N is the size of the regular |
|
dfaa221…
|
drh
|
53 |
** expression and M is the size of the input string. The matcher never |
|
dfaa221…
|
drh
|
54 |
** exhibits exponential behavior. Note that the X{p,q} operator expands |
|
dfaa221…
|
drh
|
55 |
** to p copies of X following by q-p copies of X? and that the size of the |
|
dfaa221…
|
drh
|
56 |
** regular expression in the O(N*M) performance bound is computed after |
|
dfaa221…
|
drh
|
57 |
** this expansion. |
|
a18dab4…
|
drh
|
58 |
** |
|
a18dab4…
|
drh
|
59 |
** To help prevent DoS attacks, the maximum size of the NFA is restricted. |
|
e2552de…
|
drh
|
60 |
*/ |
|
e2552de…
|
drh
|
61 |
#include "config.h" |
|
e2552de…
|
drh
|
62 |
#include "regexp.h" |
|
e2552de…
|
drh
|
63 |
|
|
e2552de…
|
drh
|
64 |
/* The end-of-input character */ |
|
e2552de…
|
drh
|
65 |
#define RE_EOF 0 /* End of input */ |
|
9642cde…
|
drh
|
66 |
#define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */ |
|
e2552de…
|
drh
|
67 |
|
|
e2552de…
|
drh
|
68 |
/* The NFA is implemented as sequence of opcodes taken from the following |
|
e2552de…
|
drh
|
69 |
** set. Each opcode has a single integer argument. |
|
e2552de…
|
drh
|
70 |
*/ |
|
e2552de…
|
drh
|
71 |
#define RE_OP_MATCH 1 /* Match the one character in the argument */ |
|
e2552de…
|
drh
|
72 |
#define RE_OP_ANY 2 /* Match any one character. (Implements ".") */ |
|
e2552de…
|
drh
|
73 |
#define RE_OP_ANYSTAR 3 /* Special optimized version of .* */ |
|
e2552de…
|
drh
|
74 |
#define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */ |
|
e2552de…
|
drh
|
75 |
#define RE_OP_GOTO 5 /* Jump to opcode at iArg */ |
|
e2552de…
|
drh
|
76 |
#define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */ |
|
e2552de…
|
drh
|
77 |
#define RE_OP_CC_INC 7 /* Beginning of a [...] character class */ |
|
e2552de…
|
drh
|
78 |
#define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */ |
|
e2552de…
|
drh
|
79 |
#define RE_OP_CC_VALUE 9 /* Single value in a character class */ |
|
e2552de…
|
drh
|
80 |
#define RE_OP_CC_RANGE 10 /* Range of values in a character class */ |
|
e2552de…
|
drh
|
81 |
#define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */ |
|
e2552de…
|
drh
|
82 |
#define RE_OP_NOTWORD 12 /* Not a perl word character */ |
|
e2552de…
|
drh
|
83 |
#define RE_OP_DIGIT 13 /* digit: [0-9] */ |
|
e2552de…
|
drh
|
84 |
#define RE_OP_NOTDIGIT 14 /* Not a digit */ |
|
e2552de…
|
drh
|
85 |
#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */ |
|
e2552de…
|
drh
|
86 |
#define RE_OP_NOTSPACE 16 /* Not a digit */ |
|
e2552de…
|
drh
|
87 |
#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ |
|
9642cde…
|
drh
|
88 |
#define RE_OP_ATSTART 18 /* Currently at the start of the string */ |
|
e2552de…
|
drh
|
89 |
|
|
e2552de…
|
drh
|
90 |
/* Each opcode is a "state" in the NFA */ |
|
e2552de…
|
drh
|
91 |
typedef unsigned short ReStateNumber; |
|
e2552de…
|
drh
|
92 |
|
|
e2552de…
|
drh
|
93 |
/* Because this is an NFA and not a DFA, multiple states can be active at |
|
e2552de…
|
drh
|
94 |
** once. An instance of the following object records all active states in |
|
e2552de…
|
drh
|
95 |
** the NFA. The implementation is optimized for the common case where the |
|
e2552de…
|
drh
|
96 |
** number of actives states is small. |
|
e2552de…
|
drh
|
97 |
*/ |
|
e2552de…
|
drh
|
98 |
typedef struct ReStateSet { |
|
e2552de…
|
drh
|
99 |
unsigned nState; /* Number of current states */ |
|
e2552de…
|
drh
|
100 |
ReStateNumber *aState; /* Current states */ |
|
e2552de…
|
drh
|
101 |
} ReStateSet; |
|
e2552de…
|
drh
|
102 |
|
|
e2552de…
|
drh
|
103 |
#if INTERFACE |
|
97a7b92…
|
drh
|
104 |
/* An input string read one character at a time. |
|
97a7b92…
|
drh
|
105 |
*/ |
|
97a7b92…
|
drh
|
106 |
struct ReInput { |
|
97a7b92…
|
drh
|
107 |
const unsigned char *z; /* All text */ |
|
97a7b92…
|
drh
|
108 |
int i; /* Next byte to read */ |
|
97a7b92…
|
drh
|
109 |
int mx; /* EOF when i>=mx */ |
|
97a7b92…
|
drh
|
110 |
}; |
|
97a7b92…
|
drh
|
111 |
|
|
e2552de…
|
drh
|
112 |
/* A compiled NFA (or an NFA that is in the process of being compiled) is |
|
e2552de…
|
drh
|
113 |
** an instance of the following object. |
|
e2552de…
|
drh
|
114 |
*/ |
|
e2552de…
|
drh
|
115 |
struct ReCompiled { |
|
97a7b92…
|
drh
|
116 |
ReInput sIn; /* Regular expression text */ |
|
e2552de…
|
drh
|
117 |
const char *zErr; /* Error message to return */ |
|
e2552de…
|
drh
|
118 |
char *aOp; /* Operators for the virtual machine */ |
|
e2552de…
|
drh
|
119 |
int *aArg; /* Arguments to each operator */ |
|
97a7b92…
|
drh
|
120 |
unsigned (*xNextChar)(ReInput*); /* Next character function */ |
|
520703d…
|
drh
|
121 |
unsigned char zInit[12]; /* Initial text to match */ |
|
a18dab4…
|
drh
|
122 |
int nInit; /* Number of bytes in zInit */ |
|
e2552de…
|
drh
|
123 |
unsigned nState; /* Number of entries in aOp[] and aArg[] */ |
|
e2552de…
|
drh
|
124 |
unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ |
|
82888a0…
|
drh
|
125 |
unsigned mxAlloc; /* Complexity limit */ |
|
e2552de…
|
drh
|
126 |
}; |
|
e2552de…
|
drh
|
127 |
#endif |
|
e2552de…
|
drh
|
128 |
|
|
e2552de…
|
drh
|
129 |
/* Add a state to the given state set if it is not already there */ |
|
e2552de…
|
drh
|
130 |
static void re_add_state(ReStateSet *pSet, int newState){ |
|
e2552de…
|
drh
|
131 |
unsigned i; |
|
e2552de…
|
drh
|
132 |
for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return; |
|
b2c424a…
|
jan.nijtmans
|
133 |
pSet->aState[pSet->nState++] = (ReStateNumber)newState; |
|
e2552de…
|
drh
|
134 |
} |
|
e2552de…
|
drh
|
135 |
|
|
e2552de…
|
drh
|
136 |
/* Extract the next unicode character from *pzIn and return it. Advance |
|
e2552de…
|
drh
|
137 |
** *pzIn to the first byte past the end of the character returned. To |
|
5330d10…
|
jan.nijtmans
|
138 |
** be clear: this routine converts utf8 to unicode. This routine is |
|
e2552de…
|
drh
|
139 |
** optimized for the common case where the next character is a single byte. |
|
e2552de…
|
drh
|
140 |
*/ |
|
97a7b92…
|
drh
|
141 |
static unsigned re_next_char(ReInput *p){ |
|
97a7b92…
|
drh
|
142 |
unsigned c; |
|
97a7b92…
|
drh
|
143 |
if( p->i>=p->mx ) return 0; |
|
97a7b92…
|
drh
|
144 |
c = p->z[p->i++]; |
|
a13e0a2…
|
jan.nijtmans
|
145 |
if( c>=0x80 ){ |
|
97a7b92…
|
drh
|
146 |
if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){ |
|
97a7b92…
|
drh
|
147 |
c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f); |
|
e2552de…
|
drh
|
148 |
if( c<0x80 ) c = 0xfffd; |
|
97a7b92…
|
drh
|
149 |
}else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 |
|
97a7b92…
|
drh
|
150 |
&& (p->z[p->i+1]&0xc0)==0x80 ){ |
|
97a7b92…
|
drh
|
151 |
c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); |
|
97a7b92…
|
drh
|
152 |
p->i += 2; |
|
6b32e21…
|
drh
|
153 |
if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; |
|
a18dab4…
|
drh
|
154 |
}else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80 |
|
97a7b92…
|
drh
|
155 |
&& (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ |
|
97a7b92…
|
drh
|
156 |
c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) |
|
97a7b92…
|
drh
|
157 |
| (p->z[p->i+2]&0x3f); |
|
97a7b92…
|
drh
|
158 |
p->i += 3; |
|
a13e0a2…
|
jan.nijtmans
|
159 |
if( c<=0xffff || c>0x10ffff ) c = 0xfffd; |
|
e2552de…
|
drh
|
160 |
}else{ |
|
e2552de…
|
drh
|
161 |
c = 0xfffd; |
|
e2552de…
|
drh
|
162 |
} |
|
e2552de…
|
drh
|
163 |
} |
|
e2552de…
|
drh
|
164 |
return c; |
|
62cd2e2…
|
drh
|
165 |
} |
|
97a7b92…
|
drh
|
166 |
static unsigned re_next_char_nocase(ReInput *p){ |
|
97a7b92…
|
drh
|
167 |
unsigned c = re_next_char(p); |
|
b2c424a…
|
jan.nijtmans
|
168 |
return unicode_fold(c,2); |
|
cb952c4…
|
drh
|
169 |
} |
|
e2552de…
|
drh
|
170 |
|
|
e2552de…
|
drh
|
171 |
/* Return true if c is a perl "word" character: [A-Za-z0-9_] */ |
|
e2552de…
|
drh
|
172 |
static int re_word_char(int c){ |
|
cb952c4…
|
drh
|
173 |
return unicode_isalnum(c) || c=='_'; |
|
e2552de…
|
drh
|
174 |
} |
|
e2552de…
|
drh
|
175 |
|
|
e2552de…
|
drh
|
176 |
/* Return true if c is a "digit" character: [0-9] */ |
|
e2552de…
|
drh
|
177 |
static int re_digit_char(int c){ |
|
e2552de…
|
drh
|
178 |
return (c>='0' && c<='9'); |
|
e2552de…
|
drh
|
179 |
} |
|
e2552de…
|
drh
|
180 |
|
|
e2552de…
|
drh
|
181 |
/* Return true if c is a perl "space" character: [ \t\r\n\v\f] */ |
|
e2552de…
|
drh
|
182 |
static int re_space_char(int c){ |
|
2f10a05…
|
drh
|
183 |
return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
|
e2552de…
|
drh
|
184 |
} |
|
e2552de…
|
drh
|
185 |
|
|
e2552de…
|
drh
|
186 |
/* Run a compiled regular expression on the zero-terminated input |
|
e2552de…
|
drh
|
187 |
** string zIn[]. Return true on a match and false if there is no match. |
|
e2552de…
|
drh
|
188 |
*/ |
|
c84051f…
|
drh
|
189 |
int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ |
|
e2552de…
|
drh
|
190 |
ReStateSet aStateSet[2], *pThis, *pNext; |
|
e2552de…
|
drh
|
191 |
ReStateNumber aSpace[100]; |
|
e2552de…
|
drh
|
192 |
ReStateNumber *pToFree; |
|
e2552de…
|
drh
|
193 |
unsigned int i = 0; |
|
e2552de…
|
drh
|
194 |
unsigned int iSwap = 0; |
|
9642cde…
|
drh
|
195 |
int c = RE_START; |
|
e2552de…
|
drh
|
196 |
int cPrev = 0; |
|
e2552de…
|
drh
|
197 |
int rc = 0; |
|
97a7b92…
|
drh
|
198 |
ReInput in; |
|
97a7b92…
|
drh
|
199 |
|
|
97a7b92…
|
drh
|
200 |
in.z = zIn; |
|
97a7b92…
|
drh
|
201 |
in.i = 0; |
|
b2c424a…
|
jan.nijtmans
|
202 |
in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn); |
|
c23e644…
|
drh
|
203 |
|
|
c23e644…
|
drh
|
204 |
/* Look for the initial prefix match, if there is one. */ |
|
e2552de…
|
drh
|
205 |
if( pRe->nInit ){ |
|
e2552de…
|
drh
|
206 |
unsigned char x = pRe->zInit[0]; |
|
5330d10…
|
jan.nijtmans
|
207 |
while( in.i+pRe->nInit<=in.mx |
|
520703d…
|
drh
|
208 |
&& (zIn[in.i]!=x || |
|
520703d…
|
drh
|
209 |
strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0) |
|
97a7b92…
|
drh
|
210 |
){ |
|
97a7b92…
|
drh
|
211 |
in.i++; |
|
e2552de…
|
drh
|
212 |
} |
|
c84051f…
|
drh
|
213 |
if( in.i+pRe->nInit>in.mx ) return 0; |
|
9642cde…
|
drh
|
214 |
c = RE_START-1; |
|
e2552de…
|
drh
|
215 |
} |
|
c23e644…
|
drh
|
216 |
|
|
b2c424a…
|
jan.nijtmans
|
217 |
if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ |
|
e2552de…
|
drh
|
218 |
pToFree = 0; |
|
e2552de…
|
drh
|
219 |
aStateSet[0].aState = aSpace; |
|
e2552de…
|
drh
|
220 |
}else{ |
|
e2552de…
|
drh
|
221 |
pToFree = fossil_malloc( sizeof(ReStateNumber)*2*pRe->nState ); |
|
e2552de…
|
drh
|
222 |
if( pToFree==0 ) return -1; |
|
e2552de…
|
drh
|
223 |
aStateSet[0].aState = pToFree; |
|
e2552de…
|
drh
|
224 |
} |
|
e2552de…
|
drh
|
225 |
aStateSet[1].aState = &aStateSet[0].aState[pRe->nState]; |
|
e2552de…
|
drh
|
226 |
pNext = &aStateSet[1]; |
|
e2552de…
|
drh
|
227 |
pNext->nState = 0; |
|
e2552de…
|
drh
|
228 |
re_add_state(pNext, 0); |
|
e2552de…
|
drh
|
229 |
while( c!=RE_EOF && pNext->nState>0 ){ |
|
e2552de…
|
drh
|
230 |
cPrev = c; |
|
97a7b92…
|
drh
|
231 |
c = pRe->xNextChar(&in); |
|
e2552de…
|
drh
|
232 |
pThis = pNext; |
|
e2552de…
|
drh
|
233 |
pNext = &aStateSet[iSwap]; |
|
e2552de…
|
drh
|
234 |
iSwap = 1 - iSwap; |
|
e2552de…
|
drh
|
235 |
pNext->nState = 0; |
|
e2552de…
|
drh
|
236 |
for(i=0; i<pThis->nState; i++){ |
|
e2552de…
|
drh
|
237 |
int x = pThis->aState[i]; |
|
e2552de…
|
drh
|
238 |
switch( pRe->aOp[x] ){ |
|
e2552de…
|
drh
|
239 |
case RE_OP_MATCH: { |
|
e2552de…
|
drh
|
240 |
if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); |
|
9642cde…
|
drh
|
241 |
break; |
|
9642cde…
|
drh
|
242 |
} |
|
9642cde…
|
drh
|
243 |
case RE_OP_ATSTART: { |
|
9642cde…
|
drh
|
244 |
if( cPrev==RE_START ) re_add_state(pThis, x+1); |
|
dfaa221…
|
drh
|
245 |
break; |
|
dfaa221…
|
drh
|
246 |
} |
|
e2552de…
|
drh
|
247 |
case RE_OP_ANY: { |
|
1f3a053…
|
drh
|
248 |
if( c!=0 ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
249 |
break; |
|
e2552de…
|
drh
|
250 |
} |
|
e2552de…
|
drh
|
251 |
case RE_OP_WORD: { |
|
e2552de…
|
drh
|
252 |
if( re_word_char(c) ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
253 |
break; |
|
e2552de…
|
drh
|
254 |
} |
|
e2552de…
|
drh
|
255 |
case RE_OP_NOTWORD: { |
|
dfaa221…
|
drh
|
256 |
if( !re_word_char(c) && c!=0 ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
257 |
break; |
|
e2552de…
|
drh
|
258 |
} |
|
e2552de…
|
drh
|
259 |
case RE_OP_DIGIT: { |
|
e2552de…
|
drh
|
260 |
if( re_digit_char(c) ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
261 |
break; |
|
e2552de…
|
drh
|
262 |
} |
|
e2552de…
|
drh
|
263 |
case RE_OP_NOTDIGIT: { |
|
dfaa221…
|
drh
|
264 |
if( !re_digit_char(c) && c!=0 ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
265 |
break; |
|
e2552de…
|
drh
|
266 |
} |
|
e2552de…
|
drh
|
267 |
case RE_OP_SPACE: { |
|
e2552de…
|
drh
|
268 |
if( re_space_char(c) ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
269 |
break; |
|
e2552de…
|
drh
|
270 |
} |
|
e2552de…
|
drh
|
271 |
case RE_OP_NOTSPACE: { |
|
dfaa221…
|
drh
|
272 |
if( !re_space_char(c) && c!=0 ) re_add_state(pNext, x+1); |
|
e2552de…
|
drh
|
273 |
break; |
|
e2552de…
|
drh
|
274 |
} |
|
e2552de…
|
drh
|
275 |
case RE_OP_BOUNDARY: { |
|
e2552de…
|
drh
|
276 |
if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); |
|
e2552de…
|
drh
|
277 |
break; |
|
e2552de…
|
drh
|
278 |
} |
|
e2552de…
|
drh
|
279 |
case RE_OP_ANYSTAR: { |
|
e2552de…
|
drh
|
280 |
re_add_state(pNext, x); |
|
e2552de…
|
drh
|
281 |
re_add_state(pThis, x+1); |
|
e2552de…
|
drh
|
282 |
break; |
|
e2552de…
|
drh
|
283 |
} |
|
e2552de…
|
drh
|
284 |
case RE_OP_FORK: { |
|
e2552de…
|
drh
|
285 |
re_add_state(pThis, x+pRe->aArg[x]); |
|
e2552de…
|
drh
|
286 |
re_add_state(pThis, x+1); |
|
e2552de…
|
drh
|
287 |
break; |
|
e2552de…
|
drh
|
288 |
} |
|
e2552de…
|
drh
|
289 |
case RE_OP_GOTO: { |
|
e2552de…
|
drh
|
290 |
re_add_state(pThis, x+pRe->aArg[x]); |
|
e2552de…
|
drh
|
291 |
break; |
|
e2552de…
|
drh
|
292 |
} |
|
e2552de…
|
drh
|
293 |
case RE_OP_ACCEPT: { |
|
e2552de…
|
drh
|
294 |
rc = 1; |
|
c84051f…
|
drh
|
295 |
goto re_match_end; |
|
e2552de…
|
drh
|
296 |
} |
|
e2552de…
|
drh
|
297 |
case RE_OP_CC_EXC: { |
|
dfaa221…
|
drh
|
298 |
if( c==0 ) break; |
|
a18dab4…
|
drh
|
299 |
/* fall-through */ goto re_op_cc_inc; |
|
dfaa221…
|
drh
|
300 |
} |
|
a18dab4…
|
drh
|
301 |
case RE_OP_CC_INC: re_op_cc_inc: { |
|
e2552de…
|
drh
|
302 |
int j = 1; |
|
e2552de…
|
drh
|
303 |
int n = pRe->aArg[x]; |
|
e2552de…
|
drh
|
304 |
int hit = 0; |
|
e2552de…
|
drh
|
305 |
for(j=1; j>0 && j<n; j++){ |
|
e2552de…
|
drh
|
306 |
if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){ |
|
e2552de…
|
drh
|
307 |
if( pRe->aArg[x+j]==c ){ |
|
e2552de…
|
drh
|
308 |
hit = 1; |
|
e2552de…
|
drh
|
309 |
j = -1; |
|
e2552de…
|
drh
|
310 |
} |
|
e2552de…
|
drh
|
311 |
}else{ |
|
e2552de…
|
drh
|
312 |
if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){ |
|
e2552de…
|
drh
|
313 |
hit = 1; |
|
e2552de…
|
drh
|
314 |
j = -1; |
|
e2552de…
|
drh
|
315 |
}else{ |
|
e2552de…
|
drh
|
316 |
j++; |
|
e2552de…
|
drh
|
317 |
} |
|
e2552de…
|
drh
|
318 |
} |
|
e2552de…
|
drh
|
319 |
} |
|
e2552de…
|
drh
|
320 |
if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit; |
|
e2552de…
|
drh
|
321 |
if( hit ) re_add_state(pNext, x+n); |
|
5330d10…
|
jan.nijtmans
|
322 |
break; |
|
e2552de…
|
drh
|
323 |
} |
|
e2552de…
|
drh
|
324 |
} |
|
e2552de…
|
drh
|
325 |
} |
|
e2552de…
|
drh
|
326 |
} |
|
e2552de…
|
drh
|
327 |
for(i=0; i<pNext->nState; i++){ |
|
9642cde…
|
drh
|
328 |
int x = pNext->aState[i]; |
|
9642cde…
|
drh
|
329 |
while( pRe->aOp[x]==RE_OP_GOTO ) x += pRe->aArg[x]; |
|
9642cde…
|
drh
|
330 |
if( pRe->aOp[x]==RE_OP_ACCEPT ){ rc = 1; break; } |
|
e2552de…
|
drh
|
331 |
} |
|
c84051f…
|
drh
|
332 |
re_match_end: |
|
e2552de…
|
drh
|
333 |
fossil_free(pToFree); |
|
e2552de…
|
drh
|
334 |
return rc; |
|
e2552de…
|
drh
|
335 |
} |
|
e2552de…
|
drh
|
336 |
|
|
e2552de…
|
drh
|
337 |
/* Resize the opcode and argument arrays for an RE under construction. |
|
e2552de…
|
drh
|
338 |
*/ |
|
e2552de…
|
drh
|
339 |
static int re_resize(ReCompiled *p, int N){ |
|
e2552de…
|
drh
|
340 |
char *aOp; |
|
e2552de…
|
drh
|
341 |
int *aArg; |
|
82888a0…
|
drh
|
342 |
if( N>p->mxAlloc ){ p->zErr = "REGEXP pattern too big"; return 1; } |
|
e2552de…
|
drh
|
343 |
aOp = fossil_realloc(p->aOp, N*sizeof(p->aOp[0])); |
|
82888a0…
|
drh
|
344 |
if( aOp==0 ){ p->zErr = "out of memory"; return 1; } |
|
e2552de…
|
drh
|
345 |
p->aOp = aOp; |
|
e2552de…
|
drh
|
346 |
aArg = fossil_realloc(p->aArg, N*sizeof(p->aArg[0])); |
|
82888a0…
|
drh
|
347 |
if( aArg==0 ){ p->zErr = "out of memory"; return 1; } |
|
e2552de…
|
drh
|
348 |
p->aArg = aArg; |
|
e2552de…
|
drh
|
349 |
p->nAlloc = N; |
|
e2552de…
|
drh
|
350 |
return 0; |
|
e2552de…
|
drh
|
351 |
} |
|
e2552de…
|
drh
|
352 |
|
|
e2552de…
|
drh
|
353 |
/* Insert a new opcode and argument into an RE under construction. The |
|
e2552de…
|
drh
|
354 |
** insertion point is just prior to existing opcode iBefore. |
|
e2552de…
|
drh
|
355 |
*/ |
|
e2552de…
|
drh
|
356 |
static int re_insert(ReCompiled *p, int iBefore, int op, int arg){ |
|
e2552de…
|
drh
|
357 |
int i; |
|
e2552de…
|
drh
|
358 |
if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0; |
|
e2552de…
|
drh
|
359 |
for(i=p->nState; i>iBefore; i--){ |
|
e2552de…
|
drh
|
360 |
p->aOp[i] = p->aOp[i-1]; |
|
e2552de…
|
drh
|
361 |
p->aArg[i] = p->aArg[i-1]; |
|
e2552de…
|
drh
|
362 |
} |
|
e2552de…
|
drh
|
363 |
p->nState++; |
|
b2c424a…
|
jan.nijtmans
|
364 |
p->aOp[iBefore] = (char)op; |
|
e2552de…
|
drh
|
365 |
p->aArg[iBefore] = arg; |
|
e2552de…
|
drh
|
366 |
return iBefore; |
|
e2552de…
|
drh
|
367 |
} |
|
e2552de…
|
drh
|
368 |
|
|
e2552de…
|
drh
|
369 |
/* Append a new opcode and argument to the end of the RE under construction. |
|
e2552de…
|
drh
|
370 |
*/ |
|
e2552de…
|
drh
|
371 |
static int re_append(ReCompiled *p, int op, int arg){ |
|
e2552de…
|
drh
|
372 |
return re_insert(p, p->nState, op, arg); |
|
e2552de…
|
drh
|
373 |
} |
|
e2552de…
|
drh
|
374 |
|
|
e2552de…
|
drh
|
375 |
/* Make a copy of N opcodes starting at iStart onto the end of the RE |
|
e2552de…
|
drh
|
376 |
** under construction. |
|
e2552de…
|
drh
|
377 |
*/ |
|
e2552de…
|
drh
|
378 |
static void re_copy(ReCompiled *p, int iStart, int N){ |
|
e2552de…
|
drh
|
379 |
if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return; |
|
e2552de…
|
drh
|
380 |
memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0])); |
|
e2552de…
|
drh
|
381 |
memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0])); |
|
e2552de…
|
drh
|
382 |
p->nState += N; |
|
e2552de…
|
drh
|
383 |
} |
|
e2552de…
|
drh
|
384 |
|
|
e2552de…
|
drh
|
385 |
/* Return true if c is a hexadecimal digit character: [0-9a-fA-F] |
|
e2552de…
|
drh
|
386 |
** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If |
|
e2552de…
|
drh
|
387 |
** c is not a hex digit *pV is unchanged. |
|
e2552de…
|
drh
|
388 |
*/ |
|
e2552de…
|
drh
|
389 |
static int re_hex(int c, int *pV){ |
|
e2552de…
|
drh
|
390 |
if( c>='0' && c<='9' ){ |
|
e2552de…
|
drh
|
391 |
c -= '0'; |
|
e2552de…
|
drh
|
392 |
}else if( c>='a' && c<='f' ){ |
|
e2552de…
|
drh
|
393 |
c -= 'a' - 10; |
|
e2552de…
|
drh
|
394 |
}else if( c>='A' && c<='F' ){ |
|
e2552de…
|
drh
|
395 |
c -= 'A' - 10; |
|
e2552de…
|
drh
|
396 |
}else{ |
|
e2552de…
|
drh
|
397 |
return 0; |
|
e2552de…
|
drh
|
398 |
} |
|
e2552de…
|
drh
|
399 |
*pV = (*pV)*16 + (c & 0xff); |
|
e2552de…
|
drh
|
400 |
return 1; |
|
e2552de…
|
drh
|
401 |
} |
|
e2552de…
|
drh
|
402 |
|
|
e2552de…
|
drh
|
403 |
/* A backslash character has been seen, read the next character and |
|
e4ca677…
|
drh
|
404 |
** return its interpretation. |
|
e2552de…
|
drh
|
405 |
*/ |
|
e2552de…
|
drh
|
406 |
static unsigned re_esc_char(ReCompiled *p){ |
|
e2552de…
|
drh
|
407 |
static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; |
|
e2552de…
|
drh
|
408 |
static const char zTrans[] = "\a\f\n\r\t\v"; |
|
e2552de…
|
drh
|
409 |
int i, v = 0; |
|
97a7b92…
|
drh
|
410 |
char c; |
|
97a7b92…
|
drh
|
411 |
if( p->sIn.i>=p->sIn.mx ) return 0; |
|
c84051f…
|
drh
|
412 |
c = p->sIn.z[p->sIn.i]; |
|
e4ca677…
|
drh
|
413 |
if( c=='u' && p->sIn.i+4<p->sIn.mx ){ |
|
97a7b92…
|
drh
|
414 |
const unsigned char *zIn = p->sIn.z + p->sIn.i; |
|
97a7b92…
|
drh
|
415 |
if( re_hex(zIn[1],&v) |
|
97a7b92…
|
drh
|
416 |
&& re_hex(zIn[2],&v) |
|
97a7b92…
|
drh
|
417 |
&& re_hex(zIn[3],&v) |
|
97a7b92…
|
drh
|
418 |
&& re_hex(zIn[4],&v) |
|
e2552de…
|
drh
|
419 |
){ |
|
97a7b92…
|
drh
|
420 |
p->sIn.i += 5; |
|
e2552de…
|
drh
|
421 |
return v; |
|
e2552de…
|
drh
|
422 |
} |
|
e2552de…
|
drh
|
423 |
} |
|
2f10a05…
|
drh
|
424 |
if( c=='x' && p->sIn.i+2<p->sIn.mx ){ |
|
e4ca677…
|
drh
|
425 |
const unsigned char *zIn = p->sIn.z + p->sIn.i; |
|
2f10a05…
|
drh
|
426 |
if( re_hex(zIn[1],&v) |
|
2f10a05…
|
drh
|
427 |
&& re_hex(zIn[2],&v) |
|
2f10a05…
|
drh
|
428 |
){ |
|
2f10a05…
|
drh
|
429 |
p->sIn.i += 3; |
|
2f10a05…
|
drh
|
430 |
return v; |
|
e2552de…
|
drh
|
431 |
} |
|
e2552de…
|
drh
|
432 |
} |
|
e2552de…
|
drh
|
433 |
for(i=0; zEsc[i] && zEsc[i]!=c; i++){} |
|
e2552de…
|
drh
|
434 |
if( zEsc[i] ){ |
|
e2552de…
|
drh
|
435 |
if( i<6 ) c = zTrans[i]; |
|
97a7b92…
|
drh
|
436 |
p->sIn.i++; |
|
e2552de…
|
drh
|
437 |
}else{ |
|
e2552de…
|
drh
|
438 |
p->zErr = "unknown \\ escape"; |
|
e2552de…
|
drh
|
439 |
} |
|
e2552de…
|
drh
|
440 |
return c; |
|
e2552de…
|
drh
|
441 |
} |
|
e2552de…
|
drh
|
442 |
|
|
e2552de…
|
drh
|
443 |
/* Forward declaration */ |
|
e2552de…
|
drh
|
444 |
static const char *re_subcompile_string(ReCompiled*); |
|
97a7b92…
|
drh
|
445 |
|
|
97a7b92…
|
drh
|
446 |
/* Peek at the next byte of input */ |
|
97a7b92…
|
drh
|
447 |
static unsigned char rePeek(ReCompiled *p){ |
|
97a7b92…
|
drh
|
448 |
return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0; |
|
97a7b92…
|
drh
|
449 |
} |
|
e2552de…
|
drh
|
450 |
|
|
e2552de…
|
drh
|
451 |
/* Compile RE text into a sequence of opcodes. Continue up to the |
|
e2552de…
|
drh
|
452 |
** first unmatched ")" character, then return. If an error is found, |
|
e2552de…
|
drh
|
453 |
** return a pointer to the error message string. |
|
e2552de…
|
drh
|
454 |
*/ |
|
e2552de…
|
drh
|
455 |
static const char *re_subcompile_re(ReCompiled *p){ |
|
e2552de…
|
drh
|
456 |
const char *zErr; |
|
e2552de…
|
drh
|
457 |
int iStart, iEnd, iGoto; |
|
e2552de…
|
drh
|
458 |
iStart = p->nState; |
|
e2552de…
|
drh
|
459 |
zErr = re_subcompile_string(p); |
|
e2552de…
|
drh
|
460 |
if( zErr ) return zErr; |
|
97a7b92…
|
drh
|
461 |
while( rePeek(p)=='|' ){ |
|
e2552de…
|
drh
|
462 |
iEnd = p->nState; |
|
e2552de…
|
drh
|
463 |
re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart); |
|
e2552de…
|
drh
|
464 |
iGoto = re_append(p, RE_OP_GOTO, 0); |
|
97a7b92…
|
drh
|
465 |
p->sIn.i++; |
|
e2552de…
|
drh
|
466 |
zErr = re_subcompile_string(p); |
|
e2552de…
|
drh
|
467 |
if( zErr ) return zErr; |
|
e2552de…
|
drh
|
468 |
p->aArg[iGoto] = p->nState - iGoto; |
|
e2552de…
|
drh
|
469 |
} |
|
e2552de…
|
drh
|
470 |
return 0; |
|
e2552de…
|
drh
|
471 |
} |
|
e2552de…
|
drh
|
472 |
|
|
e2552de…
|
drh
|
473 |
/* Compile an element of regular expression text (anything that can be |
|
e2552de…
|
drh
|
474 |
** an operand to the "|" operator). Return NULL on success or a pointer |
|
e2552de…
|
drh
|
475 |
** to the error message if there is a problem. |
|
e2552de…
|
drh
|
476 |
*/ |
|
e2552de…
|
drh
|
477 |
static const char *re_subcompile_string(ReCompiled *p){ |
|
e2552de…
|
drh
|
478 |
int iPrev = -1; |
|
e2552de…
|
drh
|
479 |
int iStart; |
|
e2552de…
|
drh
|
480 |
unsigned c; |
|
e2552de…
|
drh
|
481 |
const char *zErr; |
|
97a7b92…
|
drh
|
482 |
while( (c = p->xNextChar(&p->sIn))!=0 ){ |
|
e2552de…
|
drh
|
483 |
iStart = p->nState; |
|
e2552de…
|
drh
|
484 |
switch( c ){ |
|
e2552de…
|
drh
|
485 |
case '|': |
|
e2552de…
|
drh
|
486 |
case ')': { |
|
97a7b92…
|
drh
|
487 |
p->sIn.i--; |
|
e2552de…
|
drh
|
488 |
return 0; |
|
e2552de…
|
drh
|
489 |
} |
|
e2552de…
|
drh
|
490 |
case '(': { |
|
e2552de…
|
drh
|
491 |
zErr = re_subcompile_re(p); |
|
e2552de…
|
drh
|
492 |
if( zErr ) return zErr; |
|
97a7b92…
|
drh
|
493 |
if( rePeek(p)!=')' ) return "unmatched '('"; |
|
97a7b92…
|
drh
|
494 |
p->sIn.i++; |
|
e2552de…
|
drh
|
495 |
break; |
|
e2552de…
|
drh
|
496 |
} |
|
e2552de…
|
drh
|
497 |
case '.': { |
|
97a7b92…
|
drh
|
498 |
if( rePeek(p)=='*' ){ |
|
e2552de…
|
drh
|
499 |
re_append(p, RE_OP_ANYSTAR, 0); |
|
97a7b92…
|
drh
|
500 |
p->sIn.i++; |
|
5330d10…
|
jan.nijtmans
|
501 |
}else{ |
|
e2552de…
|
drh
|
502 |
re_append(p, RE_OP_ANY, 0); |
|
e2552de…
|
drh
|
503 |
} |
|
e2552de…
|
drh
|
504 |
break; |
|
e2552de…
|
drh
|
505 |
} |
|
e2552de…
|
drh
|
506 |
case '*': { |
|
e2552de…
|
drh
|
507 |
if( iPrev<0 ) return "'*' without operand"; |
|
e2552de…
|
drh
|
508 |
re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1); |
|
e2552de…
|
drh
|
509 |
re_append(p, RE_OP_FORK, iPrev - p->nState + 1); |
|
e2552de…
|
drh
|
510 |
break; |
|
e2552de…
|
drh
|
511 |
} |
|
e2552de…
|
drh
|
512 |
case '+': { |
|
e2552de…
|
drh
|
513 |
if( iPrev<0 ) return "'+' without operand"; |
|
e2552de…
|
drh
|
514 |
re_append(p, RE_OP_FORK, iPrev - p->nState); |
|
e2552de…
|
drh
|
515 |
break; |
|
e2552de…
|
drh
|
516 |
} |
|
e2552de…
|
drh
|
517 |
case '?': { |
|
e2552de…
|
drh
|
518 |
if( iPrev<0 ) return "'?' without operand"; |
|
e2552de…
|
drh
|
519 |
re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); |
|
e2552de…
|
drh
|
520 |
break; |
|
e2552de…
|
drh
|
521 |
} |
|
9642cde…
|
drh
|
522 |
case '$': { |
|
9642cde…
|
drh
|
523 |
re_append(p, RE_OP_MATCH, RE_EOF); |
|
9642cde…
|
drh
|
524 |
break; |
|
9642cde…
|
drh
|
525 |
} |
|
9642cde…
|
drh
|
526 |
case '^': { |
|
9642cde…
|
drh
|
527 |
re_append(p, RE_OP_ATSTART, 0); |
|
9642cde…
|
drh
|
528 |
break; |
|
9642cde…
|
drh
|
529 |
} |
|
e2552de…
|
drh
|
530 |
case '{': { |
|
9642cde…
|
drh
|
531 |
unsigned int m = 0, n = 0; |
|
9642cde…
|
drh
|
532 |
unsigned int sz, j; |
|
e2552de…
|
drh
|
533 |
if( iPrev<0 ) return "'{m,n}' without operand"; |
|
9642cde…
|
drh
|
534 |
while( (c=rePeek(p))>='0' && c<='9' ){ |
|
9642cde…
|
drh
|
535 |
m = m*10 + c - '0'; |
|
82888a0…
|
drh
|
536 |
if( m*2>p->mxAlloc ) return "REGEXP pattern too big"; |
|
9642cde…
|
drh
|
537 |
p->sIn.i++; |
|
9642cde…
|
drh
|
538 |
} |
|
e2552de…
|
drh
|
539 |
n = m; |
|
e2552de…
|
drh
|
540 |
if( c==',' ){ |
|
97a7b92…
|
drh
|
541 |
p->sIn.i++; |
|
e2552de…
|
drh
|
542 |
n = 0; |
|
9642cde…
|
drh
|
543 |
while( (c=rePeek(p))>='0' && c<='9' ){ |
|
9642cde…
|
drh
|
544 |
n = n*10 + c-'0'; |
|
82888a0…
|
drh
|
545 |
if( n*2>p->mxAlloc ) return "REGEXP pattern too big"; |
|
9642cde…
|
drh
|
546 |
p->sIn.i++; |
|
9642cde…
|
drh
|
547 |
} |
|
e2552de…
|
drh
|
548 |
} |
|
e2552de…
|
drh
|
549 |
if( c!='}' ) return "unmatched '{'"; |
|
a18dab4…
|
drh
|
550 |
if( n<m ) return "n less than m in '{m,n}'"; |
|
97a7b92…
|
drh
|
551 |
p->sIn.i++; |
|
e2552de…
|
drh
|
552 |
sz = p->nState - iPrev; |
|
e2552de…
|
drh
|
553 |
if( m==0 ){ |
|
e2552de…
|
drh
|
554 |
if( n==0 ) return "both m and n are zero in '{m,n}'"; |
|
e2552de…
|
drh
|
555 |
re_insert(p, iPrev, RE_OP_FORK, sz+1); |
|
9642cde…
|
drh
|
556 |
iPrev++; |
|
e2552de…
|
drh
|
557 |
n--; |
|
e2552de…
|
drh
|
558 |
}else{ |
|
e2552de…
|
drh
|
559 |
for(j=1; j<m; j++) re_copy(p, iPrev, sz); |
|
e2552de…
|
drh
|
560 |
} |
|
e2552de…
|
drh
|
561 |
for(j=m; j<n; j++){ |
|
e2552de…
|
drh
|
562 |
re_append(p, RE_OP_FORK, sz+1); |
|
e2552de…
|
drh
|
563 |
re_copy(p, iPrev, sz); |
|
e2552de…
|
drh
|
564 |
} |
|
e2552de…
|
drh
|
565 |
if( n==0 && m>0 ){ |
|
82888a0…
|
drh
|
566 |
re_append(p, RE_OP_FORK, -(int)sz); |
|
e2552de…
|
drh
|
567 |
} |
|
e2552de…
|
drh
|
568 |
break; |
|
e2552de…
|
drh
|
569 |
} |
|
e2552de…
|
drh
|
570 |
case '[': { |
|
9642cde…
|
drh
|
571 |
unsigned int iFirst = p->nState; |
|
97a7b92…
|
drh
|
572 |
if( rePeek(p)=='^' ){ |
|
e2552de…
|
drh
|
573 |
re_append(p, RE_OP_CC_EXC, 0); |
|
97a7b92…
|
drh
|
574 |
p->sIn.i++; |
|
e2552de…
|
drh
|
575 |
}else{ |
|
e2552de…
|
drh
|
576 |
re_append(p, RE_OP_CC_INC, 0); |
|
e2552de…
|
drh
|
577 |
} |
|
97a7b92…
|
drh
|
578 |
while( (c = p->xNextChar(&p->sIn))!=0 ){ |
|
97a7b92…
|
drh
|
579 |
if( c=='[' && rePeek(p)==':' ){ |
|
e2552de…
|
drh
|
580 |
return "POSIX character classes not supported"; |
|
e2552de…
|
drh
|
581 |
} |
|
e2552de…
|
drh
|
582 |
if( c=='\\' ) c = re_esc_char(p); |
|
97a7b92…
|
drh
|
583 |
if( rePeek(p)=='-' ){ |
|
e2552de…
|
drh
|
584 |
re_append(p, RE_OP_CC_RANGE, c); |
|
97a7b92…
|
drh
|
585 |
p->sIn.i++; |
|
97a7b92…
|
drh
|
586 |
c = p->xNextChar(&p->sIn); |
|
e2552de…
|
drh
|
587 |
if( c=='\\' ) c = re_esc_char(p); |
|
e2552de…
|
drh
|
588 |
re_append(p, RE_OP_CC_RANGE, c); |
|
e2552de…
|
drh
|
589 |
}else{ |
|
e2552de…
|
drh
|
590 |
re_append(p, RE_OP_CC_VALUE, c); |
|
e2552de…
|
drh
|
591 |
} |
|
97a7b92…
|
drh
|
592 |
if( rePeek(p)==']' ){ p->sIn.i++; break; } |
|
e2552de…
|
drh
|
593 |
} |
|
e2552de…
|
drh
|
594 |
if( c==0 ) return "unclosed '['"; |
|
9642cde…
|
drh
|
595 |
if( p->nState>iFirst ) p->aArg[iFirst] = p->nState - iFirst; |
|
e2552de…
|
drh
|
596 |
break; |
|
e2552de…
|
drh
|
597 |
} |
|
e2552de…
|
drh
|
598 |
case '\\': { |
|
e2552de…
|
drh
|
599 |
int specialOp = 0; |
|
97a7b92…
|
drh
|
600 |
switch( rePeek(p) ){ |
|
e2552de…
|
drh
|
601 |
case 'b': specialOp = RE_OP_BOUNDARY; break; |
|
e2552de…
|
drh
|
602 |
case 'd': specialOp = RE_OP_DIGIT; break; |
|
e2552de…
|
drh
|
603 |
case 'D': specialOp = RE_OP_NOTDIGIT; break; |
|
e2552de…
|
drh
|
604 |
case 's': specialOp = RE_OP_SPACE; break; |
|
e2552de…
|
drh
|
605 |
case 'S': specialOp = RE_OP_NOTSPACE; break; |
|
e2552de…
|
drh
|
606 |
case 'w': specialOp = RE_OP_WORD; break; |
|
e2552de…
|
drh
|
607 |
case 'W': specialOp = RE_OP_NOTWORD; break; |
|
e2552de…
|
drh
|
608 |
} |
|
e2552de…
|
drh
|
609 |
if( specialOp ){ |
|
97a7b92…
|
drh
|
610 |
p->sIn.i++; |
|
e2552de…
|
drh
|
611 |
re_append(p, specialOp, 0); |
|
e2552de…
|
drh
|
612 |
}else{ |
|
e2552de…
|
drh
|
613 |
c = re_esc_char(p); |
|
e2552de…
|
drh
|
614 |
re_append(p, RE_OP_MATCH, c); |
|
e2552de…
|
drh
|
615 |
} |
|
e2552de…
|
drh
|
616 |
break; |
|
e2552de…
|
drh
|
617 |
} |
|
e2552de…
|
drh
|
618 |
default: { |
|
e2552de…
|
drh
|
619 |
re_append(p, RE_OP_MATCH, c); |
|
e2552de…
|
drh
|
620 |
break; |
|
e2552de…
|
drh
|
621 |
} |
|
e2552de…
|
drh
|
622 |
} |
|
e2552de…
|
drh
|
623 |
iPrev = iStart; |
|
e2552de…
|
drh
|
624 |
} |
|
e2552de…
|
drh
|
625 |
return 0; |
|
e2552de…
|
drh
|
626 |
} |
|
e2552de…
|
drh
|
627 |
|
|
e2552de…
|
drh
|
628 |
/* Free and reclaim all the memory used by a previously compiled |
|
e2552de…
|
drh
|
629 |
** regular expression. Applications should invoke this routine once |
|
e2552de…
|
drh
|
630 |
** for every call to re_compile() to avoid memory leaks. |
|
e2552de…
|
drh
|
631 |
*/ |
|
e2552de…
|
drh
|
632 |
void re_free(ReCompiled *pRe){ |
|
e2552de…
|
drh
|
633 |
if( pRe ){ |
|
e2552de…
|
drh
|
634 |
fossil_free(pRe->aOp); |
|
e2552de…
|
drh
|
635 |
fossil_free(pRe->aArg); |
|
c84051f…
|
drh
|
636 |
fossil_free(pRe); |
|
e2552de…
|
drh
|
637 |
} |
|
e2552de…
|
drh
|
638 |
} |
|
e2552de…
|
drh
|
639 |
|
|
e2552de…
|
drh
|
640 |
/* |
|
e2552de…
|
drh
|
641 |
** Compile a textual regular expression in zIn[] into a compiled regular |
|
c84051f…
|
drh
|
642 |
** expression suitable for us by re_match() and return a pointer to the |
|
e2552de…
|
drh
|
643 |
** compiled regular expression in *ppRe. Return NULL on success or an |
|
e2552de…
|
drh
|
644 |
** error message if something goes wrong. |
|
e2552de…
|
drh
|
645 |
*/ |
|
a18dab4…
|
drh
|
646 |
static const char *re_compile( |
|
82888a0…
|
drh
|
647 |
ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
|
82888a0…
|
drh
|
648 |
const char *zIn, /* Input regular expression */ |
|
82888a0…
|
drh
|
649 |
int mxRe, /* Complexity limit */ |
|
82888a0…
|
drh
|
650 |
int noCase /* True for caseless comparisons */ |
|
82888a0…
|
drh
|
651 |
){ |
|
e2552de…
|
drh
|
652 |
ReCompiled *pRe; |
|
e2552de…
|
drh
|
653 |
const char *zErr; |
|
e2552de…
|
drh
|
654 |
int i, j; |
|
e2552de…
|
drh
|
655 |
|
|
e2552de…
|
drh
|
656 |
*ppRe = 0; |
|
e2552de…
|
drh
|
657 |
pRe = fossil_malloc( sizeof(*pRe) ); |
|
e2552de…
|
drh
|
658 |
if( pRe==0 ){ |
|
e2552de…
|
drh
|
659 |
return "out of memory"; |
|
e2552de…
|
drh
|
660 |
} |
|
e2552de…
|
drh
|
661 |
memset(pRe, 0, sizeof(*pRe)); |
|
62cd2e2…
|
drh
|
662 |
pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; |
|
82888a0…
|
drh
|
663 |
pRe->mxAlloc = mxRe; |
|
e2552de…
|
drh
|
664 |
if( re_resize(pRe, 30) ){ |
|
82888a0…
|
drh
|
665 |
zErr = pRe->zErr; |
|
e2552de…
|
drh
|
666 |
re_free(pRe); |
|
82888a0…
|
drh
|
667 |
return zErr; |
|
e2552de…
|
drh
|
668 |
} |
|
e2552de…
|
drh
|
669 |
if( zIn[0]=='^' ){ |
|
e2552de…
|
drh
|
670 |
zIn++; |
|
e2552de…
|
drh
|
671 |
}else{ |
|
e2552de…
|
drh
|
672 |
re_append(pRe, RE_OP_ANYSTAR, 0); |
|
e2552de…
|
drh
|
673 |
} |
|
97a7b92…
|
drh
|
674 |
pRe->sIn.z = (unsigned char*)zIn; |
|
97a7b92…
|
drh
|
675 |
pRe->sIn.i = 0; |
|
b2c424a…
|
jan.nijtmans
|
676 |
pRe->sIn.mx = (int)strlen(zIn); |
|
e2552de…
|
drh
|
677 |
zErr = re_subcompile_re(pRe); |
|
e2552de…
|
drh
|
678 |
if( zErr ){ |
|
e2552de…
|
drh
|
679 |
re_free(pRe); |
|
e2552de…
|
drh
|
680 |
return zErr; |
|
e2552de…
|
drh
|
681 |
} |
|
9642cde…
|
drh
|
682 |
if( pRe->sIn.i>=pRe->sIn.mx ){ |
|
e2552de…
|
drh
|
683 |
re_append(pRe, RE_OP_ACCEPT, 0); |
|
e2552de…
|
drh
|
684 |
*ppRe = pRe; |
|
e2552de…
|
drh
|
685 |
}else{ |
|
e2552de…
|
drh
|
686 |
re_free(pRe); |
|
e2552de…
|
drh
|
687 |
return "unrecognized character"; |
|
e2552de…
|
drh
|
688 |
} |
|
c23e644…
|
drh
|
689 |
|
|
c23e644…
|
drh
|
690 |
/* The following is a performance optimization. If the regex begins with |
|
c23e644…
|
drh
|
691 |
** ".*" (if the input regex lacks an initial "^") and afterwards there are |
|
c23e644…
|
drh
|
692 |
** one or more matching characters, enter those matching characters into |
|
5330d10…
|
jan.nijtmans
|
693 |
** zInit[]. The re_match() routine can then search ahead in the input |
|
c23e644…
|
drh
|
694 |
** string looking for the initial match without having to run the whole |
|
9642cde…
|
drh
|
695 |
** regex engine over the string. Do not worry about trying to match |
|
c23e644…
|
drh
|
696 |
** unicode characters beyond plane 0 - those are very rare and this is |
|
c23e644…
|
drh
|
697 |
** just an optimization. */ |
|
1f3a053…
|
drh
|
698 |
if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){ |
|
dfaa221…
|
drh
|
699 |
for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ |
|
e2552de…
|
drh
|
700 |
unsigned x = pRe->aArg[i]; |
|
9642cde…
|
drh
|
701 |
if( x<=0x7f ){ |
|
b2c424a…
|
jan.nijtmans
|
702 |
pRe->zInit[j++] = (unsigned char)x; |
|
9642cde…
|
drh
|
703 |
}else if( x<=0x7ff ){ |
|
b2c424a…
|
jan.nijtmans
|
704 |
pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6)); |
|
e2552de…
|
drh
|
705 |
pRe->zInit[j++] = 0x80 | (x&0x3f); |
|
e2552de…
|
drh
|
706 |
}else if( x<=0xffff ){ |
|
9642cde…
|
drh
|
707 |
pRe->zInit[j++] = (unsigned char)(0xe0 | (x>>12)); |
|
e2552de…
|
drh
|
708 |
pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); |
|
f86304f…
|
jan.nijtmans
|
709 |
pRe->zInit[j++] = 0x80 | (x&0x3f); |
|
e2552de…
|
drh
|
710 |
}else{ |
|
e2552de…
|
drh
|
711 |
break; |
|
e2552de…
|
drh
|
712 |
} |
|
e2552de…
|
drh
|
713 |
} |
|
c84051f…
|
drh
|
714 |
if( j>0 && pRe->zInit[j-1]==0 ) j--; |
|
e2552de…
|
drh
|
715 |
pRe->nInit = j; |
|
e2552de…
|
drh
|
716 |
} |
|
e2552de…
|
drh
|
717 |
return pRe->zErr; |
|
9642cde…
|
drh
|
718 |
} |
|
9642cde…
|
drh
|
719 |
|
|
9642cde…
|
drh
|
720 |
/* |
|
a18dab4…
|
drh
|
721 |
** Implementation of the regexp() SQL function. This function implements |
|
a18dab4…
|
drh
|
722 |
** the build-in REGEXP operator. The first argument to the function is the |
|
a18dab4…
|
drh
|
723 |
** pattern and the second argument is the string. So, the SQL statements: |
|
a18dab4…
|
drh
|
724 |
** |
|
a18dab4…
|
drh
|
725 |
** A REGEXP B |
|
a18dab4…
|
drh
|
726 |
** |
|
a18dab4…
|
drh
|
727 |
** is implemented as regexp(B,A). |
|
a18dab4…
|
drh
|
728 |
*/ |
|
a18dab4…
|
drh
|
729 |
static void re_sql_func( |
|
a18dab4…
|
drh
|
730 |
sqlite3_context *context, |
|
a18dab4…
|
drh
|
731 |
int argc, |
|
a18dab4…
|
drh
|
732 |
sqlite3_value **argv |
|
a18dab4…
|
drh
|
733 |
){ |
|
a18dab4…
|
drh
|
734 |
ReCompiled *pRe; /* Compiled regular expression */ |
|
a18dab4…
|
drh
|
735 |
const char *zPattern; /* The regular expression */ |
|
a18dab4…
|
drh
|
736 |
const unsigned char *zStr;/* String being searched */ |
|
a18dab4…
|
drh
|
737 |
const char *zErr; /* Compile error message */ |
|
a18dab4…
|
drh
|
738 |
int setAux = 0; /* True to invoke sqlite3_set_auxdata() */ |
|
a18dab4…
|
drh
|
739 |
|
|
a18dab4…
|
drh
|
740 |
(void)argc; /* Unused */ |
|
a18dab4…
|
drh
|
741 |
pRe = sqlite3_get_auxdata(context, 0); |
|
a18dab4…
|
drh
|
742 |
if( pRe==0 ){ |
|
a18dab4…
|
drh
|
743 |
zPattern = (const char*)sqlite3_value_text(argv[0]); |
|
a18dab4…
|
drh
|
744 |
if( zPattern==0 ) return; |
|
a18dab4…
|
drh
|
745 |
zErr = fossil_re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0); |
|
a18dab4…
|
drh
|
746 |
if( zErr ){ |
|
a18dab4…
|
drh
|
747 |
re_free(pRe); |
|
a18dab4…
|
drh
|
748 |
/* The original SQLite function from which this code was copied raises |
|
a18dab4…
|
drh
|
749 |
** an error if the REGEXP contained a syntax error. This variant |
|
a18dab4…
|
drh
|
750 |
** silently fails to match, as that works better for Fossil. |
|
a18dab4…
|
drh
|
751 |
** sqlite3_result_error(context, zErr, -1); */ |
|
a18dab4…
|
drh
|
752 |
sqlite3_result_int(context, 0); |
|
a18dab4…
|
drh
|
753 |
return; |
|
a18dab4…
|
drh
|
754 |
} |
|
a18dab4…
|
drh
|
755 |
if( pRe==0 ){ |
|
a18dab4…
|
drh
|
756 |
sqlite3_result_error_nomem(context); |
|
a18dab4…
|
drh
|
757 |
return; |
|
a18dab4…
|
drh
|
758 |
} |
|
a18dab4…
|
drh
|
759 |
setAux = 1; |
|
a18dab4…
|
drh
|
760 |
} |
|
a18dab4…
|
drh
|
761 |
zStr = (const unsigned char*)sqlite3_value_text(argv[1]); |
|
a18dab4…
|
drh
|
762 |
if( zStr!=0 ){ |
|
a18dab4…
|
drh
|
763 |
sqlite3_result_int(context, re_match(pRe, zStr, -1)); |
|
a18dab4…
|
drh
|
764 |
} |
|
a18dab4…
|
drh
|
765 |
if( setAux ){ |
|
a18dab4…
|
drh
|
766 |
sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); |
|
a18dab4…
|
drh
|
767 |
} |
|
a18dab4…
|
drh
|
768 |
} |
|
a18dab4…
|
drh
|
769 |
|
|
a18dab4…
|
drh
|
770 |
/* |
|
a18dab4…
|
drh
|
771 |
** Invoke this routine to register the regexp() function with the |
|
a18dab4…
|
drh
|
772 |
** SQLite database connection. |
|
a18dab4…
|
drh
|
773 |
*/ |
|
a18dab4…
|
drh
|
774 |
int re_add_sql_func(sqlite3 *db){ |
|
a18dab4…
|
drh
|
775 |
int rc; |
|
a18dab4…
|
drh
|
776 |
rc = sqlite3_create_function(db, "regexp", 2, |
|
a18dab4…
|
drh
|
777 |
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
|
a18dab4…
|
drh
|
778 |
0, re_sql_func, 0, 0); |
|
a18dab4…
|
drh
|
779 |
if( rc==SQLITE_OK ){ |
|
a18dab4…
|
drh
|
780 |
/* The regexpi(PATTERN,STRING) function is a case-insensitive version |
|
a18dab4…
|
drh
|
781 |
** of regexp(PATTERN,STRING). */ |
|
a18dab4…
|
drh
|
782 |
rc = sqlite3_create_function(db, "regexpi", 2, |
|
a18dab4…
|
drh
|
783 |
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC, |
|
a18dab4…
|
drh
|
784 |
(void*)db, re_sql_func, 0, 0); |
|
a18dab4…
|
drh
|
785 |
} |
|
a18dab4…
|
drh
|
786 |
return rc; |
|
a18dab4…
|
drh
|
787 |
} |
|
a18dab4…
|
drh
|
788 |
|
|
a18dab4…
|
drh
|
789 |
/* |
|
90e564f…
|
drh
|
790 |
** The input zIn is a string that we want to match exactly as part of |
|
90e564f…
|
drh
|
791 |
** a regular expression. Return a new string (in space obtained from |
|
90e564f…
|
drh
|
792 |
** fossil_malloc() or the equivalent) that escapes all regexp syntax |
|
90e564f…
|
drh
|
793 |
** characters in zIn. |
|
90e564f…
|
drh
|
794 |
*/ |
|
90e564f…
|
drh
|
795 |
char *re_quote(const char *zIn){ |
|
90e564f…
|
drh
|
796 |
Blob out; |
|
90e564f…
|
drh
|
797 |
blob_init(&out, 0, 0); |
|
90e564f…
|
drh
|
798 |
while( zIn[0] ){ |
|
90e564f…
|
drh
|
799 |
switch( zIn[0] ){ |
|
90e564f…
|
drh
|
800 |
case '.': |
|
90e564f…
|
drh
|
801 |
case '?': |
|
90e564f…
|
drh
|
802 |
case '*': |
|
90e564f…
|
drh
|
803 |
case '+': |
|
90e564f…
|
drh
|
804 |
case '\\': |
|
90e564f…
|
drh
|
805 |
case '(': |
|
90e564f…
|
drh
|
806 |
case ')': |
|
90e564f…
|
drh
|
807 |
case '[': |
|
90e564f…
|
drh
|
808 |
case ']': |
|
90e564f…
|
drh
|
809 |
case '|': |
|
90e564f…
|
drh
|
810 |
case '^': |
|
90e564f…
|
drh
|
811 |
case '$': |
|
90e564f…
|
drh
|
812 |
case '{': |
|
90e564f…
|
drh
|
813 |
case '}': { |
|
90e564f…
|
drh
|
814 |
blob_appendf(&out,"\\x%02x", (unsigned char)zIn[0]); |
|
90e564f…
|
drh
|
815 |
break; |
|
90e564f…
|
drh
|
816 |
} |
|
90e564f…
|
drh
|
817 |
default: { |
|
90e564f…
|
drh
|
818 |
blob_append_char(&out, zIn[0]); |
|
90e564f…
|
drh
|
819 |
break; |
|
90e564f…
|
drh
|
820 |
} |
|
90e564f…
|
drh
|
821 |
} |
|
90e564f…
|
drh
|
822 |
zIn++; |
|
90e564f…
|
drh
|
823 |
} |
|
90e564f…
|
drh
|
824 |
blob_materialize(&out); |
|
90e564f…
|
drh
|
825 |
return out.aData; |
|
90e564f…
|
drh
|
826 |
} |
|
90e564f…
|
drh
|
827 |
|
|
90e564f…
|
drh
|
828 |
/* |
|
82888a0…
|
drh
|
829 |
** SETTING: regexp-limit width=8 default=1000 |
|
90e564f…
|
drh
|
830 |
** |
|
82888a0…
|
drh
|
831 |
** Limit the size of the bytecode used to implement a regular expression |
|
82888a0…
|
drh
|
832 |
** to this many steps. It is important to limit this to avoid possible |
|
82888a0…
|
drh
|
833 |
** DoS attacks. |
|
82888a0…
|
drh
|
834 |
*/ |
|
82888a0…
|
drh
|
835 |
|
|
82888a0…
|
drh
|
836 |
/* |
|
82888a0…
|
drh
|
837 |
** Compile an RE using re_maxlen(). |
|
90e564f…
|
drh
|
838 |
*/ |
|
82888a0…
|
drh
|
839 |
const char *fossil_re_compile( |
|
82888a0…
|
drh
|
840 |
ReCompiled **ppRe, /* OUT: write compiled NFA here */ |
|
82888a0…
|
drh
|
841 |
const char *zIn, /* Input regular expression */ |
|
82888a0…
|
drh
|
842 |
int noCase /* True for caseless comparisons */ |
|
90e564f…
|
drh
|
843 |
){ |
|
a18dab4…
|
drh
|
844 |
int mxLen = g.db ? db_get_int("regexp-limit",1000) : 1000; |
|
a18dab4…
|
drh
|
845 |
return re_compile(ppRe, zIn, mxLen, noCase); |
|
c5a98aa…
|
drh
|
846 |
} |
|
c5a98aa…
|
drh
|
847 |
|
|
c5a98aa…
|
drh
|
848 |
/* |
|
c5a98aa…
|
drh
|
849 |
** Run a "grep" over a single file read from disk. |
|
c5a98aa…
|
drh
|
850 |
*/ |
|
c5a98aa…
|
drh
|
851 |
static void grep_file(ReCompiled *pRe, const char *zFile, FILE *in){ |
|
e2552de…
|
drh
|
852 |
int ln = 0; |
|
e2552de…
|
drh
|
853 |
int n; |
|
e2552de…
|
drh
|
854 |
char zLine[2000]; |
|
e2552de…
|
drh
|
855 |
while( fgets(zLine, sizeof(zLine), in) ){ |
|
e2552de…
|
drh
|
856 |
ln++; |
|
e2552de…
|
drh
|
857 |
n = (int)strlen(zLine); |
|
e2552de…
|
drh
|
858 |
while( n && (zLine[n-1]=='\n' || zLine[n-1]=='\r') ) n--; |
|
c84051f…
|
drh
|
859 |
if( re_match(pRe, (const unsigned char*)zLine, n) ){ |
|
c5a98aa…
|
drh
|
860 |
fossil_print("%s:%d:%.*s\n", zFile, ln, n, zLine); |
|
c5a98aa…
|
drh
|
861 |
} |
|
c5a98aa…
|
drh
|
862 |
} |
|
c5a98aa…
|
drh
|
863 |
} |
|
c5a98aa…
|
drh
|
864 |
|
|
c5a98aa…
|
drh
|
865 |
/* |
|
c5a98aa…
|
drh
|
866 |
** Flags for grep_buffer() |
|
c5a98aa…
|
drh
|
867 |
*/ |
|
c5a98aa…
|
drh
|
868 |
#define GREP_EXISTS 0x001 /* If any match, print only the name and stop */ |
|
f5f4471…
|
drh
|
869 |
#define GREP_QUIET 0x002 /* Return code only */ |
|
c5a98aa…
|
drh
|
870 |
|
|
c5a98aa…
|
drh
|
871 |
/* |
|
c5a98aa…
|
drh
|
872 |
** Run a "grep" over a text file |
|
c5a98aa…
|
drh
|
873 |
*/ |
|
c5a98aa…
|
drh
|
874 |
static int grep_buffer( |
|
c5a98aa…
|
drh
|
875 |
ReCompiled *pRe, |
|
c5a98aa…
|
drh
|
876 |
const char *zName, |
|
c5a98aa…
|
drh
|
877 |
const char *z, |
|
c5a98aa…
|
drh
|
878 |
u32 flags |
|
c5a98aa…
|
drh
|
879 |
){ |
|
c5a98aa…
|
drh
|
880 |
int i, j, n, ln, cnt; |
|
c5a98aa…
|
drh
|
881 |
for(i=j=ln=cnt=0; z[i]; i=j+1){ |
|
c5a98aa…
|
drh
|
882 |
for(j=i; z[j] && z[j]!='\n'; j++){} |
|
c5a98aa…
|
drh
|
883 |
n = j - i; |
|
c5a98aa…
|
drh
|
884 |
ln++; |
|
c5a98aa…
|
drh
|
885 |
if( re_match(pRe, (const unsigned char*)(z+i), j-i) ){ |
|
c5a98aa…
|
drh
|
886 |
cnt++; |
|
c5a98aa…
|
drh
|
887 |
if( flags & GREP_EXISTS ){ |
|
f5f4471…
|
drh
|
888 |
if( (flags & GREP_QUIET)==0 && zName ) fossil_print("%s\n", zName); |
|
c5a98aa…
|
drh
|
889 |
break; |
|
c5a98aa…
|
drh
|
890 |
} |
|
f5f4471…
|
drh
|
891 |
if( (flags & GREP_QUIET)==0 ){ |
|
f5f4471…
|
drh
|
892 |
if( cnt==1 && zName ){ |
|
f5f4471…
|
drh
|
893 |
fossil_print("== %s\n", zName); |
|
f5f4471…
|
drh
|
894 |
} |
|
f5f4471…
|
drh
|
895 |
fossil_print("%d:%.*s\n", ln, n, z+i); |
|
f5f4471…
|
drh
|
896 |
} |
|
e2552de…
|
drh
|
897 |
} |
|
e2552de…
|
drh
|
898 |
} |
|
c5a98aa…
|
drh
|
899 |
return cnt; |
|
e2552de…
|
drh
|
900 |
} |
|
e2552de…
|
drh
|
901 |
|
|
e2552de…
|
drh
|
902 |
/* |
|
e2552de…
|
drh
|
903 |
** COMMAND: test-grep |
|
e2552de…
|
drh
|
904 |
** |
|
e2552de…
|
drh
|
905 |
** Usage: %fossil test-grep REGEXP [FILE...] |
|
e2552de…
|
drh
|
906 |
** |
|
e2552de…
|
drh
|
907 |
** Run a regular expression match over the named disk files, or against |
|
e2552de…
|
drh
|
908 |
** standard input if no disk files are named on the command-line. |
|
62cd2e2…
|
drh
|
909 |
** |
|
62cd2e2…
|
drh
|
910 |
** Options: |
|
62cd2e2…
|
drh
|
911 |
** -i|--ignore-case Ignore case |
|
86b6ef7…
|
drh
|
912 |
** --robot-exception Use the robot-exception setting as the REGEXP |
|
e2552de…
|
drh
|
913 |
*/ |
|
e2552de…
|
drh
|
914 |
void re_test_grep(void){ |
|
e2552de…
|
drh
|
915 |
ReCompiled *pRe; |
|
e2552de…
|
drh
|
916 |
const char *zErr; |
|
86b6ef7…
|
drh
|
917 |
int iFileList = 3; |
|
62cd2e2…
|
drh
|
918 |
int ignoreCase = find_option("ignore-case","i",0)!=0; |
|
86b6ef7…
|
drh
|
919 |
int bRobot = find_option("robot-exception",0,0)!=0; |
|
86b6ef7…
|
drh
|
920 |
if( bRobot ){ |
|
86b6ef7…
|
drh
|
921 |
const char *zRe; |
|
86b6ef7…
|
drh
|
922 |
db_find_and_open_repository(0,0); |
|
86b6ef7…
|
drh
|
923 |
verify_all_options(); |
|
86b6ef7…
|
drh
|
924 |
zRe = db_get("robot-exception","^$"); |
|
82888a0…
|
drh
|
925 |
zErr = fossil_re_compile(&pRe, zRe, ignoreCase); |
|
86b6ef7…
|
drh
|
926 |
iFileList = 2; |
|
86b6ef7…
|
drh
|
927 |
}else{ |
|
86b6ef7…
|
drh
|
928 |
verify_all_options(); |
|
86b6ef7…
|
drh
|
929 |
if( g.argc<3 ){ |
|
86b6ef7…
|
drh
|
930 |
usage("REGEXP [FILE...]"); |
|
86b6ef7…
|
drh
|
931 |
} |
|
82888a0…
|
drh
|
932 |
zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); |
|
e2552de…
|
drh
|
933 |
} |
|
e2552de…
|
drh
|
934 |
if( zErr ) fossil_fatal("%s", zErr); |
|
86b6ef7…
|
drh
|
935 |
if( g.argc==iFileList ){ |
|
c5a98aa…
|
drh
|
936 |
grep_file(pRe, "-", stdin); |
|
e2552de…
|
drh
|
937 |
}else{ |
|
e2552de…
|
drh
|
938 |
int i; |
|
86b6ef7…
|
drh
|
939 |
for(i=iFileList; i<g.argc; i++){ |
|
e2552de…
|
drh
|
940 |
FILE *in = fossil_fopen(g.argv[i], "rb"); |
|
e2552de…
|
drh
|
941 |
if( in==0 ){ |
|
e2552de…
|
drh
|
942 |
fossil_warning("cannot open \"%s\"", g.argv[i]); |
|
e2552de…
|
drh
|
943 |
}else{ |
|
c5a98aa…
|
drh
|
944 |
grep_file(pRe, g.argv[i], in); |
|
e2552de…
|
drh
|
945 |
fclose(in); |
|
e2552de…
|
drh
|
946 |
} |
|
e2552de…
|
drh
|
947 |
} |
|
e2552de…
|
drh
|
948 |
} |
|
e2552de…
|
drh
|
949 |
re_free(pRe); |
|
c5a98aa…
|
drh
|
950 |
} |
|
c5a98aa…
|
drh
|
951 |
|
|
c5a98aa…
|
drh
|
952 |
/* |
|
c5a98aa…
|
drh
|
953 |
** COMMAND: grep |
|
c5a98aa…
|
drh
|
954 |
** |
|
f5f4471…
|
drh
|
955 |
** Usage: %fossil grep [OPTIONS] PATTERN FILENAME ... |
|
f5f4471…
|
drh
|
956 |
** |
|
60a28a8…
|
danield
|
957 |
** Attempt to match the given POSIX extended regular expression PATTERN over |
|
60a28a8…
|
danield
|
958 |
** all historic versions of FILENAME. The search begins with the most recent |
|
f5f4471…
|
drh
|
959 |
** version of the file and moves backwards in time. Multiple FILENAMEs can |
|
f5f4471…
|
drh
|
960 |
** be specified, in which case all named files are searched in reverse |
|
f5f4471…
|
drh
|
961 |
** chronological order. |
|
c5a98aa…
|
drh
|
962 |
** |
|
f5f4471…
|
drh
|
963 |
** For details of the supported regular expression dialect, see |
|
f5f4471…
|
drh
|
964 |
** https://fossil-scm.org/fossil/doc/trunk/www/grep.md |
|
c5a98aa…
|
drh
|
965 |
** |
|
c5a98aa…
|
drh
|
966 |
** Options: |
|
f5f4471…
|
drh
|
967 |
** -c|--count Suppress normal output; instead print a count |
|
f5f4471…
|
drh
|
968 |
** of the number of matching files |
|
f5f4471…
|
drh
|
969 |
** -i|--ignore-case Ignore case |
|
f5f4471…
|
drh
|
970 |
** -l|--files-with-matches List only hash for each match |
|
f5f4471…
|
drh
|
971 |
** --once Stop searching after the first match |
|
2f78b2c…
|
danield
|
972 |
** -s|--no-messages Suppress error messages about nonexistent |
|
f5f4471…
|
drh
|
973 |
** or unreadable files |
|
f5f4471…
|
drh
|
974 |
** -v|--invert-match Invert the sense of matching. Show only |
|
f5f4471…
|
drh
|
975 |
** files that have no matches. Implies -l |
|
f5f4471…
|
drh
|
976 |
** --verbose Show each file as it is analyzed |
|
c5a98aa…
|
drh
|
977 |
*/ |
|
c5a98aa…
|
drh
|
978 |
void re_grep_cmd(void){ |
|
c5a98aa…
|
drh
|
979 |
u32 flags = 0; |
|
c5a98aa…
|
drh
|
980 |
int bVerbose = 0; |
|
c5a98aa…
|
drh
|
981 |
ReCompiled *pRe; |
|
c5a98aa…
|
drh
|
982 |
const char *zErr; |
|
c5a98aa…
|
drh
|
983 |
int ignoreCase = 0; |
|
c5a98aa…
|
drh
|
984 |
Blob fullName; |
|
f5f4471…
|
drh
|
985 |
int ii; |
|
f5f4471…
|
drh
|
986 |
int nMatch = 0; |
|
f5f4471…
|
drh
|
987 |
int bNoMsg; |
|
f5f4471…
|
drh
|
988 |
int cntFlag; |
|
f5f4471…
|
drh
|
989 |
int bOnce; |
|
f5f4471…
|
drh
|
990 |
int bInvert; |
|
f5f4471…
|
drh
|
991 |
int nSearch = 0; |
|
f5f4471…
|
drh
|
992 |
Stmt q; |
|
f5f4471…
|
drh
|
993 |
|
|
c5a98aa…
|
drh
|
994 |
|
|
c5a98aa…
|
drh
|
995 |
if( find_option("ignore-case","i",0)!=0 ) ignoreCase = 1; |
|
c5a98aa…
|
drh
|
996 |
if( find_option("files-with-matches","l",0)!=0 ) flags |= GREP_EXISTS; |
|
f5f4471…
|
drh
|
997 |
if( find_option("verbose",0,0)!=0 ) bVerbose = 1; |
|
74d5ce3…
|
florian
|
998 |
if( g.fQuiet ) flags |= GREP_QUIET|GREP_EXISTS; |
|
f5f4471…
|
drh
|
999 |
bNoMsg = find_option("no-messages","s",0)!=0; |
|
f5f4471…
|
drh
|
1000 |
bOnce = find_option("once",0,0)!=0; |
|
f5f4471…
|
drh
|
1001 |
bInvert = find_option("invert-match","v",0)!=0; |
|
f5f4471…
|
drh
|
1002 |
if( bInvert ){ |
|
f5f4471…
|
drh
|
1003 |
flags |= GREP_QUIET|GREP_EXISTS; |
|
f5f4471…
|
drh
|
1004 |
} |
|
f5f4471…
|
drh
|
1005 |
cntFlag = find_option("count","c",0)!=0; |
|
f5f4471…
|
drh
|
1006 |
if( cntFlag ){ |
|
f5f4471…
|
drh
|
1007 |
flags |= GREP_QUIET|GREP_EXISTS; |
|
f5f4471…
|
drh
|
1008 |
} |
|
c5a98aa…
|
drh
|
1009 |
db_find_and_open_repository(0, 0); |
|
c5a98aa…
|
drh
|
1010 |
verify_all_options(); |
|
f6502e8…
|
wyoung
|
1011 |
if( g.argc<4 ){ |
|
f5f4471…
|
drh
|
1012 |
usage("REGEXP FILENAME ..."); |
|
c5a98aa…
|
drh
|
1013 |
} |
|
82888a0…
|
drh
|
1014 |
zErr = fossil_re_compile(&pRe, g.argv[2], ignoreCase); |
|
c5a98aa…
|
drh
|
1015 |
if( zErr ) fossil_fatal("%s", zErr); |
|
c5a98aa…
|
drh
|
1016 |
|
|
f5f4471…
|
drh
|
1017 |
add_content_sql_commands(g.db); |
|
f5f4471…
|
drh
|
1018 |
db_multi_exec("CREATE TEMP TABLE arglist(iname,fname,fnid);"); |
|
f5f4471…
|
drh
|
1019 |
for(ii=3; ii<g.argc; ii++){ |
|
f5f4471…
|
drh
|
1020 |
const char *zTarget = g.argv[ii]; |
|
f5f4471…
|
drh
|
1021 |
if( file_tree_name(zTarget, &fullName, 0, 1) ){ |
|
f5f4471…
|
drh
|
1022 |
int fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", |
|
f5f4471…
|
drh
|
1023 |
blob_str(&fullName)); |
|
f5f4471…
|
drh
|
1024 |
if( !fnid ){ |
|
f5f4471…
|
drh
|
1025 |
if( bNoMsg ) continue; |
|
f5f4471…
|
drh
|
1026 |
if( file_size(zTarget, ExtFILE)<0 ){ |
|
f5f4471…
|
drh
|
1027 |
fossil_fatal("no such file: %s", zTarget); |
|
f5f4471…
|
drh
|
1028 |
} |
|
f5f4471…
|
drh
|
1029 |
fossil_fatal("not a managed file: %s", zTarget); |
|
f5f4471…
|
drh
|
1030 |
}else{ |
|
f5f4471…
|
drh
|
1031 |
db_multi_exec( |
|
f5f4471…
|
drh
|
1032 |
"INSERT INTO arglist(iname,fname,fnid) VALUES(%Q,%Q,%d)", |
|
f5f4471…
|
drh
|
1033 |
zTarget, blob_str(&fullName), fnid); |
|
f5f4471…
|
drh
|
1034 |
} |
|
f5f4471…
|
drh
|
1035 |
} |
|
f5f4471…
|
drh
|
1036 |
blob_reset(&fullName); |
|
f5f4471…
|
drh
|
1037 |
} |
|
f5f4471…
|
drh
|
1038 |
db_prepare(&q, |
|
f5f4471…
|
drh
|
1039 |
" SELECT" |
|
f5f4471…
|
drh
|
1040 |
" A.uuid," /* file hash */ |
|
f5f4471…
|
drh
|
1041 |
" A.rid," /* file rid */ |
|
f5f4471…
|
drh
|
1042 |
" B.uuid," /* check-in hash */ |
|
f5f4471…
|
drh
|
1043 |
" datetime(min(event.mtime))," /* check-in time */ |
|
f5f4471…
|
drh
|
1044 |
" arglist.iname" /* file name */ |
|
f5f4471…
|
drh
|
1045 |
" FROM arglist, mlink, blob A, blob B, event" |
|
f5f4471…
|
drh
|
1046 |
" WHERE mlink.mid=event.objid" |
|
f5f4471…
|
drh
|
1047 |
" AND mlink.fid=A.rid" |
|
f5f4471…
|
drh
|
1048 |
" AND mlink.mid=B.rid" |
|
f5f4471…
|
drh
|
1049 |
" AND mlink.fnid=arglist.fnid" |
|
f5f4471…
|
drh
|
1050 |
" GROUP BY A.uuid" |
|
f5f4471…
|
drh
|
1051 |
" ORDER BY min(event.mtime) DESC;" |
|
f5f4471…
|
drh
|
1052 |
); |
|
f5f4471…
|
drh
|
1053 |
while( db_step(&q)==SQLITE_ROW ){ |
|
f5f4471…
|
drh
|
1054 |
const char *zFileHash = db_column_text(&q,0); |
|
f5f4471…
|
drh
|
1055 |
int rid = db_column_int(&q,1); |
|
f5f4471…
|
drh
|
1056 |
const char *zCkinHash = db_column_text(&q,2); |
|
f5f4471…
|
drh
|
1057 |
const char *zDate = db_column_text(&q,3); |
|
f5f4471…
|
drh
|
1058 |
const char *zFN = db_column_text(&q,4); |
|
f5f4471…
|
drh
|
1059 |
char *zLabel; |
|
f5f4471…
|
drh
|
1060 |
Blob cx; |
|
f5f4471…
|
drh
|
1061 |
content_get(rid, &cx); |
|
f5f4471…
|
drh
|
1062 |
zLabel = mprintf("%.16s %s %S checkin %S", zDate, zFN,zFileHash,zCkinHash); |
|
f5f4471…
|
drh
|
1063 |
if( bVerbose ) fossil_print("Scanning: %s\n", zLabel); |
|
f5f4471…
|
drh
|
1064 |
nSearch++; |
|
f5f4471…
|
drh
|
1065 |
nMatch += grep_buffer(pRe, zLabel, blob_str(&cx), flags); |
|
f5f4471…
|
drh
|
1066 |
blob_reset(&cx); |
|
f5f4471…
|
drh
|
1067 |
if( bInvert && cntFlag==0 ){ |
|
f5f4471…
|
drh
|
1068 |
if( nMatch==0 ){ |
|
f5f4471…
|
drh
|
1069 |
fossil_print("== %s\n", zLabel); |
|
f5f4471…
|
drh
|
1070 |
if( bOnce ) nMatch = 1; |
|
f5f4471…
|
drh
|
1071 |
}else{ |
|
f5f4471…
|
drh
|
1072 |
nMatch = 0; |
|
f5f4471…
|
drh
|
1073 |
} |
|
f5f4471…
|
drh
|
1074 |
} |
|
f5f4471…
|
drh
|
1075 |
fossil_free(zLabel); |
|
f5f4471…
|
drh
|
1076 |
if( nMatch ){ |
|
f5f4471…
|
drh
|
1077 |
if( (flags & GREP_QUIET)!=0 ) break; |
|
f5f4471…
|
drh
|
1078 |
if( bOnce ) break; |
|
f5f4471…
|
drh
|
1079 |
} |
|
f5f4471…
|
drh
|
1080 |
} |
|
f5f4471…
|
drh
|
1081 |
db_finalize(&q); |
|
f5f4471…
|
drh
|
1082 |
re_free(pRe); |
|
f5f4471…
|
drh
|
1083 |
if( cntFlag ){ |
|
f5f4471…
|
drh
|
1084 |
if( bInvert ){ |
|
f5f4471…
|
drh
|
1085 |
fossil_print("%d\n", nSearch-nMatch); |
|
f5f4471…
|
drh
|
1086 |
}else{ |
|
f5f4471…
|
drh
|
1087 |
fossil_print("%d\n", nMatch); |
|
f5f4471…
|
drh
|
1088 |
} |
|
f5f4471…
|
drh
|
1089 |
} |
|
8779bd0…
|
drh
|
1090 |
} |
|
8779bd0…
|
drh
|
1091 |
|
|
8779bd0…
|
drh
|
1092 |
/* |
|
8779bd0…
|
drh
|
1093 |
** WEBPAGE: re_rules |
|
8779bd0…
|
drh
|
1094 |
** |
|
8779bd0…
|
drh
|
1095 |
** Show a summary of the regular expression matching rules for Fossil. |
|
8779bd0…
|
drh
|
1096 |
*/ |
|
8779bd0…
|
drh
|
1097 |
void re_rules_page(void){ |
|
8779bd0…
|
drh
|
1098 |
style_set_current_feature("wiki"); |
|
8779bd0…
|
drh
|
1099 |
style_header("Regular Expression Syntax"); |
|
8779bd0…
|
drh
|
1100 |
@ <p>Syntax rules for regular expression matching in Fossil:</p> |
|
8779bd0…
|
drh
|
1101 |
@ |
|
8779bd0…
|
drh
|
1102 |
@ <table border="0" cellpadding="0" cellspacing="0"> |
|
8779bd0…
|
drh
|
1103 |
@ <tr><th>   <th>Pattern |
|
8779bd0…
|
drh
|
1104 |
@ <th>   <th align="left">Match |
|
8779bd0…
|
drh
|
1105 |
@ <tr><td><td><i>X</i><b>*</b> |
|
8779bd0…
|
drh
|
1106 |
@ <td><td>Zero or more occurrences of <i>X</i> |
|
8779bd0…
|
drh
|
1107 |
@ <tr><td><td><i>X</i><b>+</b> |
|
8779bd0…
|
drh
|
1108 |
@ <td><td>One or more occurrences of <i>X</i> |
|
8779bd0…
|
drh
|
1109 |
@ <tr><td><td><i>X</i><b>?</b> |
|
8779bd0…
|
drh
|
1110 |
@ <td><td>Zero or one occurrences of <i>X</i> |
|
8779bd0…
|
drh
|
1111 |
@ <tr><td><td><i>X</i><b>{</b><i>P</i><b>,</b><i>Q</i><b>}</b> |
|
8779bd0…
|
drh
|
1112 |
@ <td><td>Between P and Q occurrences of <i>X</i> |
|
8779bd0…
|
drh
|
1113 |
@ <tr><td><td><b>(</b><i>X</i><b>)</b> |
|
8779bd0…
|
drh
|
1114 |
@ <td><td><i>X</i> |
|
8779bd0…
|
drh
|
1115 |
@ <tr><td><td><i>X</i><b>|</b><i>Y</i> |
|
8779bd0…
|
drh
|
1116 |
@ <td><td><i>X</i> or <i>Y</i> |
|
8779bd0…
|
drh
|
1117 |
@ <tr><td><td><b>^</b><i>X</i> |
|
8779bd0…
|
drh
|
1118 |
@ <td><td><i>X</i> at the beginning of the string |
|
8779bd0…
|
drh
|
1119 |
@ <tr><td><td><i>X</i><b>$</b> |
|
8779bd0…
|
drh
|
1120 |
@ <td><td><i>X</i> at the end of the string |
|
8779bd0…
|
drh
|
1121 |
@ <tr><td><td><b>.</b> |
|
8779bd0…
|
drh
|
1122 |
@ <td><td>Any single character |
|
8779bd0…
|
drh
|
1123 |
@ <tr><td><td><b>\</b><i>C</i> |
|
8779bd0…
|
drh
|
1124 |
@ <td><td>Character <i>C</i> if <i>C</i> is one of: <b>\{}()[]|*+?</b> |
|
8779bd0…
|
drh
|
1125 |
@ <tr><td><td><b>\</b><i>C</i> |
|
8779bd0…
|
drh
|
1126 |
@ <td><td>C-language escapes if <i>C</i> is one of: <b>afnrtv</b> |
|
8779bd0…
|
drh
|
1127 |
@ <tr><td><td><b>\u</b><i>HHHH</i> |
|
8779bd0…
|
drh
|
1128 |
@ <td><td>Unicode character U+HHHH where <i>HHHH</i> is four hex digits |
|
8779bd0…
|
drh
|
1129 |
@ <tr><td><td><b>\</b><i>HH</i> |
|
8779bd0…
|
drh
|
1130 |
@ <td><td>Unicode character U+00HH where <i>HH</i> is two hex digits |
|
8779bd0…
|
drh
|
1131 |
@ <tr><td><td><b>[</b><i>abc</i><b>]</b> |
|
8779bd0…
|
drh
|
1132 |
@ <td><td>Any single character from <i>abc</i> |
|
8779bd0…
|
drh
|
1133 |
@ <tr><td><td><b>[^</b><i>abc</i><b>]</b> |
|
8779bd0…
|
drh
|
1134 |
@ <td><td>Any single character not in <i>abc</i> |
|
8779bd0…
|
drh
|
1135 |
@ <tr><td><td><b>[</b><i>a-z</i><b>]</b> |
|
8779bd0…
|
drh
|
1136 |
@ <td><td>Any single character between <i>a</i> and <i>z</i>, inclusive |
|
8779bd0…
|
drh
|
1137 |
@ <tr><td><td><b>[^</b><i>a-z</i><b>]</b> |
|
8779bd0…
|
drh
|
1138 |
@ <td><td>Any single character not between <i>a</i> and <i>z</i> |
|
8779bd0…
|
drh
|
1139 |
@ <tr><td><td><b>\b</b> |
|
8779bd0…
|
drh
|
1140 |
@ <td><td>Word boundary |
|
8779bd0…
|
drh
|
1141 |
@ <tr><td><td><b>\w</b> |
|
8779bd0…
|
drh
|
1142 |
@ <td><td>A word character: a-zA-Z0-9 or _ |
|
8779bd0…
|
drh
|
1143 |
@ <tr><td><td><b>\W</b> |
|
8779bd0…
|
drh
|
1144 |
@ <td><td>A non-word character |
|
8779bd0…
|
drh
|
1145 |
@ <tr><td><td><b>\d</b> |
|
8779bd0…
|
drh
|
1146 |
@ <td><td>A digit. 0-9 |
|
8779bd0…
|
drh
|
1147 |
@ <tr><td><td><b>\D</b> |
|
8779bd0…
|
drh
|
1148 |
@ <td><td>A non-digit character |
|
8779bd0…
|
drh
|
1149 |
@ <tr><td><td><b>\s</b> |
|
8779bd0…
|
drh
|
1150 |
@ <td><td>A whitespace character |
|
8779bd0…
|
drh
|
1151 |
@ <tr><td><td><b>\S</b> |
|
8779bd0…
|
drh
|
1152 |
@ <td><td>A non-whitespace character |
|
8779bd0…
|
drh
|
1153 |
@ </table> |
|
8779bd0…
|
drh
|
1154 |
@ |
|
8779bd0…
|
drh
|
1155 |
@ <p>In the "Pattern" column of the table above:</p> |
|
8779bd0…
|
drh
|
1156 |
@ <ul> |
|
8779bd0…
|
drh
|
1157 |
@ <li> "<i>X</i>" and "<i>Y</i>" mean any subpattern |
|
8779bd0…
|
drh
|
1158 |
@ <li> "<i>P</i>" and "<i>Q</i>" mean integers |
|
8779bd0…
|
drh
|
1159 |
@ <li> "<i>C</i>" means a single character |
|
8779bd0…
|
drh
|
1160 |
@ <li> "<i>H</i>" means a hexadecimal digit |
|
8779bd0…
|
drh
|
1161 |
@ <li> "<i>abc</i>" means any sequences of one or more characters |
|
8779bd0…
|
drh
|
1162 |
@ <li> "<i>a-z</i>" means any single character, a single "<b>-</b>" |
|
8779bd0…
|
drh
|
1163 |
@ character, and then one additional character. |
|
8779bd0…
|
drh
|
1164 |
@ <li> All other symbols in the patterns are literal text |
|
8779bd0…
|
drh
|
1165 |
@ </ul> |
|
8779bd0…
|
drh
|
1166 |
@ |
|
8779bd0…
|
drh
|
1167 |
@ <p>The "<i>X</i><b>|</b><i>Y</i>" pattern has lower precedence |
|
8779bd0…
|
drh
|
1168 |
@ than the others. Use "<b>(</b>...<b>)</b>" for grouping, as |
|
8779bd0…
|
drh
|
1169 |
@ necessary. |
|
8779bd0…
|
drh
|
1170 |
style_finish_page(); |
|
e2552de…
|
drh
|
1171 |
} |