Fossil SCM

Add the /re_rules page.

drh 2025-08-22 13:59 trunk
Commit 8779bd0b3a60ee2c4e8fc5cac72da1c1a54024a3f32e713588806d01c2cbce8d
1 file changed +81
+81
--- src/regexp.c
+++ src/regexp.c
@@ -1029,5 +1029,86 @@
10291029
}else{
10301030
fossil_print("%d\n", nMatch);
10311031
}
10321032
}
10331033
}
1034
+
1035
+/*
1036
+** WEBPAGE: re_rules
1037
+**
1038
+** Show a summary of the regular expression matching rules for Fossil.
1039
+*/
1040
+void re_rules_page(void){
1041
+ style_set_current_feature("wiki");
1042
+ style_header("Regular Expression Syntax");
1043
+ @ <p>Syntax rules for regular expression matching in Fossil:</p>
1044
+ @
1045
+ @ <table border="0" cellpadding="0" cellspacing="0">
1046
+ @ <tr><th>&emsp;&emsp;&emsp;<th>Pattern
1047
+ @ <th>&emsp;&emsp;&emsp;<th align="left">Match
1048
+ @ <tr><td><td><i>X</i><b>*</b>
1049
+ @ <td><td>Zero or more occurrences of <i>X</i>
1050
+ @ <tr><td><td><i>X</i><b>+</b>
1051
+ @ <td><td>One or more occurrences of <i>X</i>
1052
+ @ <tr><td><td><i>X</i><b>?</b>
1053
+ @ <td><td>Zero or one occurrences of <i>X</i>
1054
+ @ <tr><td><td><i>X</i><b>{</b><i>P</i><b>,</b><i>Q</i><b>}</b>
1055
+ @ <td><td>Between P and Q occurrences of <i>X</i>
1056
+ @ <tr><td><td><b>(</b><i>X</i><b>)</b>
1057
+ @ <td><td><i>X</i>
1058
+ @ <tr><td><td><i>X</i><b>|</b><i>Y</i>
1059
+ @ <td><td><i>X</i> or <i>Y</i>
1060
+ @ <tr><td><td><b>^</b><i>X</i>
1061
+ @ <td><td><i>X</i> at the beginning of the string
1062
+ @ <tr><td><td><i>X</i><b>$</b>
1063
+ @ <td><td><i>X</i> at the end of the string
1064
+ @ <tr><td><td><b>.</b>
1065
+ @ <td><td>Any single character
1066
+ @ <tr><td><td><b>\</b><i>C</i>
1067
+ @ <td><td>Character <i>C</i> if <i>C</i> is one of: <b>\{}()[]|*+?</b>
1068
+ @ <tr><td><td><b>\</b><i>C</i>
1069
+ @ <td><td>C-language escapes if <i>C</i> is one of: <b>afnrtv</b>
1070
+ @ <tr><td><td><b>\u</b><i>HHHH</i>
1071
+ @ <td><td>Unicode character U+HHHH where <i>HHHH</i> is four hex digits
1072
+ @ <tr><td><td><b>\</b><i>HH</i>
1073
+ @ <td><td>Unicode character U+00HH where <i>HH</i> is two hex digits
1074
+ @ <tr><td><td><b>[</b><i>abc</i><b>]</b>
1075
+ @ <td><td>Any single character from <i>abc</i>
1076
+ @ <tr><td><td><b>[^</b><i>abc</i><b>]</b>
1077
+ @ <td><td>Any single character not in <i>abc</i>
1078
+ @ <tr><td><td><b>[</b><i>a-z</i><b>]</b>
1079
+ @ <td><td>Any single character between <i>a</i> and <i>z</i>, inclusive
1080
+ @ <tr><td><td><b>[^</b><i>a-z</i><b>]</b>
1081
+ @ <td><td>Any single character not between <i>a</i> and <i>z</i>
1082
+ @ <tr><td><td><b>\b</b>
1083
+ @ <td><td>Word boundary
1084
+ @ <tr><td><td><b>\w</b>
1085
+ @ <td><td>A word character: a-zA-Z0-9 or _
1086
+ @ <tr><td><td><b>\W</b>
1087
+ @ <td><td>A non-word character
1088
+ @ <tr><td><td><b>\d</b>
1089
+ @ <td><td>A digit. 0-9
1090
+ @ <tr><td><td><b>\D</b>
1091
+ @ <td><td>A non-digit character
1092
+ @ <tr><td><td><b>\s</b>
1093
+ @ <td><td>A whitespace character
1094
+ @ <tr><td><td><b>\S</b>
1095
+ @ <td><td>A non-whitespace character
1096
+ @ </table>
1097
+ @
1098
+ @ <p>In the "Pattern" column of the table above:</p>
1099
+ @ <ul>
1100
+ @ <li> "<i>X</i>" and "<i>Y</i>" mean any subpattern
1101
+ @ <li> "<i>P</i>" and "<i>Q</i>" mean integers
1102
+ @ <li> "<i>C</i>" means a single character
1103
+ @ <li> "<i>H</i>" means a hexadecimal digit
1104
+ @ <li> "<i>abc</i>" means any sequences of one or more characters
1105
+ @ <li> "<i>a-z</i>" means any single character, a single "<b>-</b>"
1106
+ @ character, and then one additional character.
1107
+ @ <li> All other symbols in the patterns are literal text
1108
+ @ </ul>
1109
+ @
1110
+ @ <p>The "<i>X</i><b>|</b><i>Y</i>" pattern has lower precedence
1111
+ @ than the others. Use "<b>(</b>...<b>)</b>" for grouping, as
1112
+ @ necessary.
1113
+ style_finish_page();
1114
+}
10341115
--- src/regexp.c
+++ src/regexp.c
@@ -1029,5 +1029,86 @@
1029 }else{
1030 fossil_print("%d\n", nMatch);
1031 }
1032 }
1033 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1034
--- src/regexp.c
+++ src/regexp.c
@@ -1029,5 +1029,86 @@
1029 }else{
1030 fossil_print("%d\n", nMatch);
1031 }
1032 }
1033 }
1034
1035 /*
1036 ** WEBPAGE: re_rules
1037 **
1038 ** Show a summary of the regular expression matching rules for Fossil.
1039 */
1040 void re_rules_page(void){
1041 style_set_current_feature("wiki");
1042 style_header("Regular Expression Syntax");
1043 @ <p>Syntax rules for regular expression matching in Fossil:</p>
1044 @
1045 @ <table border="0" cellpadding="0" cellspacing="0">
1046 @ <tr><th>&emsp;&emsp;&emsp;<th>Pattern
1047 @ <th>&emsp;&emsp;&emsp;<th align="left">Match
1048 @ <tr><td><td><i>X</i><b>*</b>
1049 @ <td><td>Zero or more occurrences of <i>X</i>
1050 @ <tr><td><td><i>X</i><b>+</b>
1051 @ <td><td>One or more occurrences of <i>X</i>
1052 @ <tr><td><td><i>X</i><b>?</b>
1053 @ <td><td>Zero or one occurrences of <i>X</i>
1054 @ <tr><td><td><i>X</i><b>{</b><i>P</i><b>,</b><i>Q</i><b>}</b>
1055 @ <td><td>Between P and Q occurrences of <i>X</i>
1056 @ <tr><td><td><b>(</b><i>X</i><b>)</b>
1057 @ <td><td><i>X</i>
1058 @ <tr><td><td><i>X</i><b>|</b><i>Y</i>
1059 @ <td><td><i>X</i> or <i>Y</i>
1060 @ <tr><td><td><b>^</b><i>X</i>
1061 @ <td><td><i>X</i> at the beginning of the string
1062 @ <tr><td><td><i>X</i><b>$</b>
1063 @ <td><td><i>X</i> at the end of the string
1064 @ <tr><td><td><b>.</b>
1065 @ <td><td>Any single character
1066 @ <tr><td><td><b>\</b><i>C</i>
1067 @ <td><td>Character <i>C</i> if <i>C</i> is one of: <b>\{}()[]|*+?</b>
1068 @ <tr><td><td><b>\</b><i>C</i>
1069 @ <td><td>C-language escapes if <i>C</i> is one of: <b>afnrtv</b>
1070 @ <tr><td><td><b>\u</b><i>HHHH</i>
1071 @ <td><td>Unicode character U+HHHH where <i>HHHH</i> is four hex digits
1072 @ <tr><td><td><b>\</b><i>HH</i>
1073 @ <td><td>Unicode character U+00HH where <i>HH</i> is two hex digits
1074 @ <tr><td><td><b>[</b><i>abc</i><b>]</b>
1075 @ <td><td>Any single character from <i>abc</i>
1076 @ <tr><td><td><b>[^</b><i>abc</i><b>]</b>
1077 @ <td><td>Any single character not in <i>abc</i>
1078 @ <tr><td><td><b>[</b><i>a-z</i><b>]</b>
1079 @ <td><td>Any single character between <i>a</i> and <i>z</i>, inclusive
1080 @ <tr><td><td><b>[^</b><i>a-z</i><b>]</b>
1081 @ <td><td>Any single character not between <i>a</i> and <i>z</i>
1082 @ <tr><td><td><b>\b</b>
1083 @ <td><td>Word boundary
1084 @ <tr><td><td><b>\w</b>
1085 @ <td><td>A word character: a-zA-Z0-9 or _
1086 @ <tr><td><td><b>\W</b>
1087 @ <td><td>A non-word character
1088 @ <tr><td><td><b>\d</b>
1089 @ <td><td>A digit. 0-9
1090 @ <tr><td><td><b>\D</b>
1091 @ <td><td>A non-digit character
1092 @ <tr><td><td><b>\s</b>
1093 @ <td><td>A whitespace character
1094 @ <tr><td><td><b>\S</b>
1095 @ <td><td>A non-whitespace character
1096 @ </table>
1097 @
1098 @ <p>In the "Pattern" column of the table above:</p>
1099 @ <ul>
1100 @ <li> "<i>X</i>" and "<i>Y</i>" mean any subpattern
1101 @ <li> "<i>P</i>" and "<i>Q</i>" mean integers
1102 @ <li> "<i>C</i>" means a single character
1103 @ <li> "<i>H</i>" means a hexadecimal digit
1104 @ <li> "<i>abc</i>" means any sequences of one or more characters
1105 @ <li> "<i>a-z</i>" means any single character, a single "<b>-</b>"
1106 @ character, and then one additional character.
1107 @ <li> All other symbols in the patterns are literal text
1108 @ </ul>
1109 @
1110 @ <p>The "<i>X</i><b>|</b><i>Y</i>" pattern has lower precedence
1111 @ than the others. Use "<b>(</b>...<b>)</b>" for grouping, as
1112 @ necessary.
1113 style_finish_page();
1114 }
1115

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button