Fossil SCM

Enhance the robot-restrict setting with the ability to block robots from specific extensions.

drh 2025-11-22 20:53 trunk
Commit 423159860cc5a1907ac1ef53de5e25ce5c2bdf557089da6c1ccc9bf9cf26ad54
3 files changed +4 +16 -8 +14 -9
--- src/extcgi.c
+++ src/extcgi.c
@@ -171,10 +171,11 @@
171171
FILE *fromChild = 0; /* FILE for reading from child */
172172
int pidChild = 0; /* Process id of the child */
173173
int rc; /* Reply code from subroutine call */
174174
int nContent = -1; /* Content length */
175175
const char *zPathInfo; /* Original PATH_INFO value */
176
+ char *zRestrictTag; /* Tag to restrict specific documents */
176177
Blob reply; /* The reply */
177178
char zLine[1000]; /* One line of the CGI reply */
178179
const char *zSrvSw; /* SERVER_SOFTWARE */
179180
180181
zPathInfo = P("PATH_INFO");
@@ -229,10 +230,13 @@
229230
zFailReason = "path does not match any file or script";
230231
goto ext_not_found;
231232
}
232233
assert( nScript>=nRoot+1 );
233234
style_set_current_page("ext/%s", &zScript[nRoot+1]);
235
+ zRestrictTag = mprintf("ext/%s", &zScript[nRoot+1]);
236
+ if( robot_restrict(zRestrictTag) ) return;
237
+ fossil_free(zRestrictTag);
234238
zMime = P("mimetype");
235239
if( zMime==0 ) zMime = mimetype_from_name(zScript);
236240
if( zMime==0 ) zMime = "application/octet-stream";
237241
if( !file_isexe(zScript, ExtFILE) ){
238242
/* File is not executable. Must be a regular file. In that case,
239243
--- src/extcgi.c
+++ src/extcgi.c
@@ -171,10 +171,11 @@
171 FILE *fromChild = 0; /* FILE for reading from child */
172 int pidChild = 0; /* Process id of the child */
173 int rc; /* Reply code from subroutine call */
174 int nContent = -1; /* Content length */
175 const char *zPathInfo; /* Original PATH_INFO value */
 
176 Blob reply; /* The reply */
177 char zLine[1000]; /* One line of the CGI reply */
178 const char *zSrvSw; /* SERVER_SOFTWARE */
179
180 zPathInfo = P("PATH_INFO");
@@ -229,10 +230,13 @@
229 zFailReason = "path does not match any file or script";
230 goto ext_not_found;
231 }
232 assert( nScript>=nRoot+1 );
233 style_set_current_page("ext/%s", &zScript[nRoot+1]);
 
 
 
234 zMime = P("mimetype");
235 if( zMime==0 ) zMime = mimetype_from_name(zScript);
236 if( zMime==0 ) zMime = "application/octet-stream";
237 if( !file_isexe(zScript, ExtFILE) ){
238 /* File is not executable. Must be a regular file. In that case,
239
--- src/extcgi.c
+++ src/extcgi.c
@@ -171,10 +171,11 @@
171 FILE *fromChild = 0; /* FILE for reading from child */
172 int pidChild = 0; /* Process id of the child */
173 int rc; /* Reply code from subroutine call */
174 int nContent = -1; /* Content length */
175 const char *zPathInfo; /* Original PATH_INFO value */
176 char *zRestrictTag; /* Tag to restrict specific documents */
177 Blob reply; /* The reply */
178 char zLine[1000]; /* One line of the CGI reply */
179 const char *zSrvSw; /* SERVER_SOFTWARE */
180
181 zPathInfo = P("PATH_INFO");
@@ -229,10 +230,13 @@
230 zFailReason = "path does not match any file or script";
231 goto ext_not_found;
232 }
233 assert( nScript>=nRoot+1 );
234 style_set_current_page("ext/%s", &zScript[nRoot+1]);
235 zRestrictTag = mprintf("ext/%s", &zScript[nRoot+1]);
236 if( robot_restrict(zRestrictTag) ) return;
237 fossil_free(zRestrictTag);
238 zMime = P("mimetype");
239 if( zMime==0 ) zMime = mimetype_from_name(zScript);
240 if( zMime==0 ) zMime = "application/octet-stream";
241 if( !file_isexe(zScript, ExtFILE) ){
242 /* File is not executable. Must be a regular file. In that case,
243
+16 -8
--- src/robot.c
+++ src/robot.c
@@ -164,11 +164,11 @@
164164
}
165165
166166
/* Condition 4: If there is a "token=VALUE" query parameter with a
167167
** valid VALUE argument, then assume that the request is coming from
168168
** either an interactive human session, or an authorized robot that we
169
- ** want to treat as human. All it through and also set the robot cookie.
169
+ ** want to treat as human. Allow it through and also set the robot cookie.
170170
*/
171171
z = P("token");
172172
if( z!=0 ){
173173
if( db_exists("SELECT 1 FROM config"
174174
" WHERE name='token-%q'"
@@ -265,17 +265,20 @@
265265
** should be disallowed. "Unauthenticated" means the user is "nobody".
266266
** The recommended value for this setting is:
267267
**
268268
** timelineX,diff,annotate,fileage,file,finfo,reports,tree,download,hexdump
269269
**
270
-** The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
271
-** /vpatch. The "annotate" tag also covers /blame and /praise. "zip"
272
-** also covers /tarball and /sqlar. If a tag has an "X" character appended
273
-** then it only applies if query parameters are such that the page is
274
-** particularly difficult to compute. In all other case, the tag should
275
-** exactly match the page name. Useful "X" tags include "timelineX" and
276
-** "zipX". See the [[robot-zip-leaf]] and [[robot-zip-tag]] settings
270
+** Usually the tag should exactly match the page name. The "diff" tag
271
+** covers all diffing pages such as /vdiff, /fdiff, and /vpatch. The
272
+** "annotate" tag also covers /blame and /praise. "zip" also covers
273
+** /tarball and /sqlar. If a tag has an "X" character appended then it
274
+** only applies if query parameters are such that the page is particularly
275
+** difficult to compute. Useful "X" tags include "timelineX" and "zipX".
276
+** The "ext" tag matches all extension, but a tag of the form "ext/PATH"
277
+** only matches the extension at PATH.
278
+**
279
+** See the [[robot-zip-leaf]] and [[robot-zip-tag]] settings
277280
** for additional controls associated with the "zipX" restriction.
278281
**
279282
** Change this setting "off" to disable all robot restrictions.
280283
*/
281284
/*
@@ -324,19 +327,24 @@
324327
}
325328
326329
/*
327330
** Return true if zTag matches one of the tags in the robot-restrict
328331
** setting.
332
+**
333
+** A zTag of "*" matches anything.
329334
*/
330335
static int robot_restrict_has_tag(const char *zTag){
331336
static const char *zGlob = 0;
332337
if( zGlob==0 ){
333338
zGlob = db_get("robot-restrict",robot_restrict_default());
334339
if( zGlob==0 ) zGlob = "";
335340
}
336341
if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
337342
return 0;
343
+ }
344
+ if( zTag==0 || (zTag[0]=='*' && zTag[1]==0) ){
345
+ return 1;
338346
}
339347
return glob_multi_match(zGlob,zTag);
340348
}
341349
342350
/*
343351
--- src/robot.c
+++ src/robot.c
@@ -164,11 +164,11 @@
164 }
165
166 /* Condition 4: If there is a "token=VALUE" query parameter with a
167 ** valid VALUE argument, then assume that the request is coming from
168 ** either an interactive human session, or an authorized robot that we
169 ** want to treat as human. All it through and also set the robot cookie.
170 */
171 z = P("token");
172 if( z!=0 ){
173 if( db_exists("SELECT 1 FROM config"
174 " WHERE name='token-%q'"
@@ -265,17 +265,20 @@
265 ** should be disallowed. "Unauthenticated" means the user is "nobody".
266 ** The recommended value for this setting is:
267 **
268 ** timelineX,diff,annotate,fileage,file,finfo,reports,tree,download,hexdump
269 **
270 ** The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
271 ** /vpatch. The "annotate" tag also covers /blame and /praise. "zip"
272 ** also covers /tarball and /sqlar. If a tag has an "X" character appended
273 ** then it only applies if query parameters are such that the page is
274 ** particularly difficult to compute. In all other case, the tag should
275 ** exactly match the page name. Useful "X" tags include "timelineX" and
276 ** "zipX". See the [[robot-zip-leaf]] and [[robot-zip-tag]] settings
 
 
 
277 ** for additional controls associated with the "zipX" restriction.
278 **
279 ** Change this setting "off" to disable all robot restrictions.
280 */
281 /*
@@ -324,19 +327,24 @@
324 }
325
326 /*
327 ** Return true if zTag matches one of the tags in the robot-restrict
328 ** setting.
 
 
329 */
330 static int robot_restrict_has_tag(const char *zTag){
331 static const char *zGlob = 0;
332 if( zGlob==0 ){
333 zGlob = db_get("robot-restrict",robot_restrict_default());
334 if( zGlob==0 ) zGlob = "";
335 }
336 if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
337 return 0;
 
 
 
338 }
339 return glob_multi_match(zGlob,zTag);
340 }
341
342 /*
343
--- src/robot.c
+++ src/robot.c
@@ -164,11 +164,11 @@
164 }
165
166 /* Condition 4: If there is a "token=VALUE" query parameter with a
167 ** valid VALUE argument, then assume that the request is coming from
168 ** either an interactive human session, or an authorized robot that we
169 ** want to treat as human. Allow it through and also set the robot cookie.
170 */
171 z = P("token");
172 if( z!=0 ){
173 if( db_exists("SELECT 1 FROM config"
174 " WHERE name='token-%q'"
@@ -265,17 +265,20 @@
265 ** should be disallowed. "Unauthenticated" means the user is "nobody".
266 ** The recommended value for this setting is:
267 **
268 ** timelineX,diff,annotate,fileage,file,finfo,reports,tree,download,hexdump
269 **
270 ** Usually the tag should exactly match the page name. The "diff" tag
271 ** covers all diffing pages such as /vdiff, /fdiff, and /vpatch. The
272 ** "annotate" tag also covers /blame and /praise. "zip" also covers
273 ** /tarball and /sqlar. If a tag has an "X" character appended then it
274 ** only applies if query parameters are such that the page is particularly
275 ** difficult to compute. Useful "X" tags include "timelineX" and "zipX".
276 ** The "ext" tag matches all extension, but a tag of the form "ext/PATH"
277 ** only matches the extension at PATH.
278 **
279 ** See the [[robot-zip-leaf]] and [[robot-zip-tag]] settings
280 ** for additional controls associated with the "zipX" restriction.
281 **
282 ** Change this setting "off" to disable all robot restrictions.
283 */
284 /*
@@ -324,19 +327,24 @@
327 }
328
329 /*
330 ** Return true if zTag matches one of the tags in the robot-restrict
331 ** setting.
332 **
333 ** A zTag of "*" matches anything.
334 */
335 static int robot_restrict_has_tag(const char *zTag){
336 static const char *zGlob = 0;
337 if( zGlob==0 ){
338 zGlob = db_get("robot-restrict",robot_restrict_default());
339 if( zGlob==0 ) zGlob = "";
340 }
341 if( zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){
342 return 0;
343 }
344 if( zTag==0 || (zTag[0]=='*' && zTag[1]==0) ){
345 return 1;
346 }
347 return glob_multi_match(zGlob,zTag);
348 }
349
350 /*
351
+14 -9
--- src/setup.c
+++ src/setup.c
@@ -490,20 +490,25 @@
490490
@ A captcha test is is rendered instead.
491491
@ The default value for this setting is:
492492
@ <p>
493493
@ &emsp;&emsp;&emsp;<tt>%h(robot_restrict_default())</tt>
494494
@ <p>
495
- @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
496
- @ /vpatch. The "annotate" tag covers /annotate and also /blame and
497
- @ /praise. The "zip" covers itself and also /tarball and /sqlar.
498
- @ If a tag has an "X" character appended (ex: "timelineX") then it only
499
- @ applies if query parameters are such that the page is expensive
500
- @ and/or unusual. In all other case, the tag should exactly match
501
- @ the page name.
502
- @
495
+ @ Usually the tag should exactly match the page name. Exceptions:
496
+ @ <ul>
497
+ @ <li> The "diff" tag covers all diffing pages such as /vdiff,
498
+ @ /fdiff, and /vpatch.
499
+ @ <li> The "annotate" tag covers /annotate and also /blame and
500
+ @ /praise.
501
+ @ <li> The "zip" covers itself and also /tarball and /sqlar.
502
+ @ <li> If a tag has an "X" character appended (ex: "timelineX")
503
+ @ then it only applies if query parameters are such that
504
+ @ the page is expensive and/or unusual.
505
+ @ <li> The "ext" tag covers all extensions, but a tag like
506
+ @ "ext/PATH" only covers the specific extension at PATH.
507
+ @ </ul>
503508
@ To disable robot restrictions, change this setting to "off".
504
- @ (Property: robot-restrict)
509
+ @ (Property: <a href="%R/help/robot-restrict">robot-restrict</a>)
505510
@ <br>
506511
textarea_attribute("", 2, 80,
507512
"robot-restrict", "rbrestrict", robot_restrict_default(), 0);
508513
509514
@ <p><b>Exception #1</b><br>
510515
--- src/setup.c
+++ src/setup.c
@@ -490,20 +490,25 @@
490 @ A captcha test is is rendered instead.
491 @ The default value for this setting is:
492 @ <p>
493 @ &emsp;&emsp;&emsp;<tt>%h(robot_restrict_default())</tt>
494 @ <p>
495 @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
496 @ /vpatch. The "annotate" tag covers /annotate and also /blame and
497 @ /praise. The "zip" covers itself and also /tarball and /sqlar.
498 @ If a tag has an "X" character appended (ex: "timelineX") then it only
499 @ applies if query parameters are such that the page is expensive
500 @ and/or unusual. In all other case, the tag should exactly match
501 @ the page name.
502 @
 
 
 
 
 
503 @ To disable robot restrictions, change this setting to "off".
504 @ (Property: robot-restrict)
505 @ <br>
506 textarea_attribute("", 2, 80,
507 "robot-restrict", "rbrestrict", robot_restrict_default(), 0);
508
509 @ <p><b>Exception #1</b><br>
510
--- src/setup.c
+++ src/setup.c
@@ -490,20 +490,25 @@
490 @ A captcha test is is rendered instead.
491 @ The default value for this setting is:
492 @ <p>
493 @ &emsp;&emsp;&emsp;<tt>%h(robot_restrict_default())</tt>
494 @ <p>
495 @ Usually the tag should exactly match the page name. Exceptions:
496 @ <ul>
497 @ <li> The "diff" tag covers all diffing pages such as /vdiff,
498 @ /fdiff, and /vpatch.
499 @ <li> The "annotate" tag covers /annotate and also /blame and
500 @ /praise.
501 @ <li> The "zip" covers itself and also /tarball and /sqlar.
502 @ <li> If a tag has an "X" character appended (ex: "timelineX")
503 @ then it only applies if query parameters are such that
504 @ the page is expensive and/or unusual.
505 @ <li> The "ext" tag covers all extensions, but a tag like
506 @ "ext/PATH" only covers the specific extension at PATH.
507 @ </ul>
508 @ To disable robot restrictions, change this setting to "off".
509 @ (Property: <a href="%R/help/robot-restrict">robot-restrict</a>)
510 @ <br>
511 textarea_attribute("", 2, 80,
512 "robot-restrict", "rbrestrict", robot_restrict_default(), 0);
513
514 @ <p><b>Exception #1</b><br>
515

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button