Fossil SCM

Only apply the complex-request restriction to pages listed in the robot-restrict setting. Deprecate the robot-limiter and robot-allow settings.

drh 2024-07-27 14:30 trunk
Commit cee1af5a3731d2c44e35abba2623da6ae570520fde9690ec86bbe127448d0884
2 files changed +17 -49 +8 -17
+17 -49
--- src/login.c
+++ src/login.c
@@ -1253,41 +1253,25 @@
12531253
}
12541254
fossil_free(zDecode);
12551255
return uid;
12561256
}
12571257
1258
-/*
1259
-** SETTING: robot-limiter boolean default=off
1260
-** If enabled, HTTP requests with one or more query parameters and
1261
-** without a REFERER string and without a valid login cookie are
1262
-** assumed to be hostile robots and are redirected to the honeypot.
1263
-** See also the robot-allow and robot-restrict settings which can
1264
-** be used to override the value of this setting for specific pages.
1265
-*/
1266
-/*
1267
-** SETTING: robot-allow width=40 block-text
1268
-** The VALUE of this setting is a list of GLOB patterns which match
1269
-** pages for which the robot-limiter is overwritten to false. If this
1270
-** setting is missing or an empty string, then it is assumed to match
1271
-** nothing.
1272
-*/
12731258
/*
12741259
** SETTING: robot-restrict width=40 block-text
1275
-** The VALUE of this setting is a list of GLOB patterns which match
1276
-** pages for which the robot-limiter setting should be enforced.
1277
-** In other words, if the robot-limiter is true and this setting either
1278
-** does not exist or is empty or matches the current page, then a
1279
-** redirect to the honeypot is issues. If this setting exists
1280
-** but does not match the current page, then the robot-limiter setting
1281
-** is overridden to false.
1260
+** The VALUE of this setting is a list of GLOB patterns that match
1261
+** pages for which complex HTTP requests from robots should be disallowed.
1262
+** The recommended value for this setting is:
1263
+**
1264
+** timeline,vdiff,fdiff,annotate,blame
1265
+**
12821266
*/
12831267
12841268
/*
12851269
** Check to see if the current HTTP request is a complex request that
12861270
** is coming from a robot and if access should restricted for such robots.
12871271
** For the purposes of this module, a "complex request" is an HTTP
1288
-** request with one or more query parameters.
1272
+** request with one or more query parameters other than "name".
12891273
**
12901274
** If this routine determines that robots should be restricted, then
12911275
** this routine publishes a redirect to the honeypot and exits without
12921276
** returning to the caller.
12931277
**
@@ -1298,46 +1282,30 @@
12981282
** * The REFERER field of the HTTP header is missing or empty.
12991283
** * There are one or more query parameters other than "name".
13001284
**
13011285
** Robot restrictions are governed by settings.
13021286
**
1303
-** robot-limiter The restrictions implemented by this routine only
1304
-** apply if this setting exists and is true.
1305
-**
1306
-** robot-allow If this setting exists and the page of the request
1307
-** matches the comma-separate GLOB list that is the
1308
-** value of this setting, then no robot restrictions
1309
-** are applied.
1310
-**
1311
-** robot-restrict If this setting exists then robot restrictions only
1312
-** apply to pages that match the comma-separated
1313
-** GLOB list that is the value of this setting.
1287
+** robot-restrict The value is a list of GLOB patterns for pages
1288
+** that should restrict robot access. No restrictions
1289
+** are applied if this setting is undefined or is
1290
+** an empty string.
13141291
*/
13151292
void login_restrict_robot_access(void){
13161293
const char *zReferer;
13171294
const char *zGlob;
13181295
Glob *pGlob;
13191296
int go = 1;
13201297
if( g.zLogin!=0 ) return;
13211298
zReferer = P("HTTP_REFERER");
13221299
if( zReferer && zReferer[0]!=0 ) return;
1323
- if( !db_get_boolean("robot-limiter",0) ) return;
1300
+ zGlob = db_get("robot-restrict",0);
1301
+ if( zGlob==0 || zGlob[0]==0 ) return;
13241302
if( cgi_qp_count()<1 ) return;
1325
- zGlob = db_get("robot-allow",0);
1326
- if( zGlob && zGlob[0] ){
1327
- pGlob = glob_create(zGlob);
1328
- go = glob_match(pGlob, g.zPath);
1329
- glob_free(pGlob);
1330
- if( go ) return;
1331
- }
1332
- zGlob = db_get("robot-restrict",0);
1333
- if( zGlob && zGlob[0] ){
1334
- pGlob = glob_create(zGlob);
1335
- go = glob_match(pGlob, g.zPath);
1336
- glob_free(pGlob);
1337
- if( !go ) return;
1338
- }
1303
+ pGlob = glob_create(zGlob);
1304
+ go = glob_match(pGlob, g.zPath);
1305
+ glob_free(pGlob);
1306
+ if( !go ) return;
13391307
13401308
/* If we reach this point, it means we have a situation where we
13411309
** want to restrict the activity of a robot.
13421310
*/
13431311
cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600);
13441312
--- src/login.c
+++ src/login.c
@@ -1253,41 +1253,25 @@
1253 }
1254 fossil_free(zDecode);
1255 return uid;
1256 }
1257
1258 /*
1259 ** SETTING: robot-limiter boolean default=off
1260 ** If enabled, HTTP requests with one or more query parameters and
1261 ** without a REFERER string and without a valid login cookie are
1262 ** assumed to be hostile robots and are redirected to the honeypot.
1263 ** See also the robot-allow and robot-restrict settings which can
1264 ** be used to override the value of this setting for specific pages.
1265 */
1266 /*
1267 ** SETTING: robot-allow width=40 block-text
1268 ** The VALUE of this setting is a list of GLOB patterns which match
1269 ** pages for which the robot-limiter is overwritten to false. If this
1270 ** setting is missing or an empty string, then it is assumed to match
1271 ** nothing.
1272 */
1273 /*
1274 ** SETTING: robot-restrict width=40 block-text
1275 ** The VALUE of this setting is a list of GLOB patterns which match
1276 ** pages for which the robot-limiter setting should be enforced.
1277 ** In other words, if the robot-limiter is true and this setting either
1278 ** does not exist or is empty or matches the current page, then a
1279 ** redirect to the honeypot is issues. If this setting exists
1280 ** but does not match the current page, then the robot-limiter setting
1281 ** is overridden to false.
1282 */
1283
1284 /*
1285 ** Check to see if the current HTTP request is a complex request that
1286 ** is coming from a robot and if access should restricted for such robots.
1287 ** For the purposes of this module, a "complex request" is an HTTP
1288 ** request with one or more query parameters.
1289 **
1290 ** If this routine determines that robots should be restricted, then
1291 ** this routine publishes a redirect to the honeypot and exits without
1292 ** returning to the caller.
1293 **
@@ -1298,46 +1282,30 @@
1298 ** * The REFERER field of the HTTP header is missing or empty.
1299 ** * There are one or more query parameters other than "name".
1300 **
1301 ** Robot restrictions are governed by settings.
1302 **
1303 ** robot-limiter The restrictions implemented by this routine only
1304 ** apply if this setting exists and is true.
1305 **
1306 ** robot-allow If this setting exists and the page of the request
1307 ** matches the comma-separate GLOB list that is the
1308 ** value of this setting, then no robot restrictions
1309 ** are applied.
1310 **
1311 ** robot-restrict If this setting exists then robot restrictions only
1312 ** apply to pages that match the comma-separated
1313 ** GLOB list that is the value of this setting.
1314 */
1315 void login_restrict_robot_access(void){
1316 const char *zReferer;
1317 const char *zGlob;
1318 Glob *pGlob;
1319 int go = 1;
1320 if( g.zLogin!=0 ) return;
1321 zReferer = P("HTTP_REFERER");
1322 if( zReferer && zReferer[0]!=0 ) return;
1323 if( !db_get_boolean("robot-limiter",0) ) return;
 
1324 if( cgi_qp_count()<1 ) return;
1325 zGlob = db_get("robot-allow",0);
1326 if( zGlob && zGlob[0] ){
1327 pGlob = glob_create(zGlob);
1328 go = glob_match(pGlob, g.zPath);
1329 glob_free(pGlob);
1330 if( go ) return;
1331 }
1332 zGlob = db_get("robot-restrict",0);
1333 if( zGlob && zGlob[0] ){
1334 pGlob = glob_create(zGlob);
1335 go = glob_match(pGlob, g.zPath);
1336 glob_free(pGlob);
1337 if( !go ) return;
1338 }
1339
1340 /* If we reach this point, it means we have a situation where we
1341 ** want to restrict the activity of a robot.
1342 */
1343 cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600);
1344
--- src/login.c
+++ src/login.c
@@ -1253,41 +1253,25 @@
1253 }
1254 fossil_free(zDecode);
1255 return uid;
1256 }
1257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1258 /*
1259 ** SETTING: robot-restrict width=40 block-text
1260 ** The VALUE of this setting is a list of GLOB patterns that match
1261 ** pages for which complex HTTP requests from robots should be disallowed.
1262 ** The recommended value for this setting is:
1263 **
1264 ** timeline,vdiff,fdiff,annotate,blame
1265 **
 
1266 */
1267
1268 /*
1269 ** Check to see if the current HTTP request is a complex request that
1270 ** is coming from a robot and if access should restricted for such robots.
1271 ** For the purposes of this module, a "complex request" is an HTTP
1272 ** request with one or more query parameters other than "name".
1273 **
1274 ** If this routine determines that robots should be restricted, then
1275 ** this routine publishes a redirect to the honeypot and exits without
1276 ** returning to the caller.
1277 **
@@ -1298,46 +1282,30 @@
1282 ** * The REFERER field of the HTTP header is missing or empty.
1283 ** * There are one or more query parameters other than "name".
1284 **
1285 ** Robot restrictions are governed by settings.
1286 **
1287 ** robot-restrict The value is a list of GLOB patterns for pages
1288 ** that should restrict robot access. No restrictions
1289 ** are applied if this setting is undefined or is
1290 ** an empty string.
 
 
 
 
 
 
 
1291 */
1292 void login_restrict_robot_access(void){
1293 const char *zReferer;
1294 const char *zGlob;
1295 Glob *pGlob;
1296 int go = 1;
1297 if( g.zLogin!=0 ) return;
1298 zReferer = P("HTTP_REFERER");
1299 if( zReferer && zReferer[0]!=0 ) return;
1300 zGlob = db_get("robot-restrict",0);
1301 if( zGlob==0 || zGlob[0]==0 ) return;
1302 if( cgi_qp_count()<1 ) return;
1303 pGlob = glob_create(zGlob);
1304 go = glob_match(pGlob, g.zPath);
1305 glob_free(pGlob);
1306 if( !go ) return;
 
 
 
 
 
 
 
 
 
 
1307
1308 /* If we reach this point, it means we have a situation where we
1309 ** want to restrict the activity of a robot.
1310 */
1311 cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600);
1312
+8 -17
--- src/setup.c
+++ src/setup.c
@@ -491,31 +491,22 @@
491491
@ access to the /proc virtual filesystem is required, which means this limit
492492
@ might not work inside a chroot() jail.
493493
@ (Property: "max-loadavg")</p>
494494
495495
@ <hr>
496
- onoff_attribute("Prohibit robots from issuing complex requests",
497
- "robot-limiter", "rlb", 0, 0);
496
+ @ <p><b>Do not allow robots to make complex requests
497
+ @ against the following pages.</b>
498498
@ <p> A "complex request" is an HTTP request that has one or more query
499499
@ parameters. Some robots will spend hours juggling around query parameters
500500
@ or even forging fake query parameters in an effort to discover new
501501
@ behavior or to find an SQL injection opportunity or similar. This can
502
- @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence,
503
- @ it is recommended to turn this feature on to stop such nefarious behavior.
504
- @ (Property: robot-limiter)
505
- @
506
- @ <p> When enabled, complex requests from user "nobody" without a Referer
507
- @ redirect to the honeypot.
508
- @
509
- @ <p> Additional settings below allow positive and negative overrides of
510
- @ this complex request limiter.
511
- @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br>
512
- textarea_attribute("", 4, 80,
513
- "robot-allow", "rballow", "", 0);
514
- @ <p><b>Restrict Robots From Seeing Only These Pages</b>
515
- @ (Property: robot-restrict)<br>
516
- textarea_attribute("", 4, 80,
502
+ @ waste hours of CPU time and gigabytes of bandwidth on the server. A
503
+ @ suggested value for this setting is:
504
+ @ "<tt>timeline,vdiff,fdiff,annotate,blame</tt>".
505
+ @ (Property: robot-restrict)
506
+ @ <p>
507
+ textarea_attribute("", 2, 80,
517508
"robot-restrict", "rbrestrict", "", 0);
518509
519510
@ <hr>
520511
@ <p><input type="submit" name="submit" value="Apply Changes"></p>
521512
@ </div></form>
522513
--- src/setup.c
+++ src/setup.c
@@ -491,31 +491,22 @@
491 @ access to the /proc virtual filesystem is required, which means this limit
492 @ might not work inside a chroot() jail.
493 @ (Property: "max-loadavg")</p>
494
495 @ <hr>
496 onoff_attribute("Prohibit robots from issuing complex requests",
497 "robot-limiter", "rlb", 0, 0);
498 @ <p> A "complex request" is an HTTP request that has one or more query
499 @ parameters. Some robots will spend hours juggling around query parameters
500 @ or even forging fake query parameters in an effort to discover new
501 @ behavior or to find an SQL injection opportunity or similar. This can
502 @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence,
503 @ it is recommended to turn this feature on to stop such nefarious behavior.
504 @ (Property: robot-limiter)
505 @
506 @ <p> When enabled, complex requests from user "nobody" without a Referer
507 @ redirect to the honeypot.
508 @
509 @ <p> Additional settings below allow positive and negative overrides of
510 @ this complex request limiter.
511 @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br>
512 textarea_attribute("", 4, 80,
513 "robot-allow", "rballow", "", 0);
514 @ <p><b>Restrict Robots From Seeing Only These Pages</b>
515 @ (Property: robot-restrict)<br>
516 textarea_attribute("", 4, 80,
517 "robot-restrict", "rbrestrict", "", 0);
518
519 @ <hr>
520 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
521 @ </div></form>
522
--- src/setup.c
+++ src/setup.c
@@ -491,31 +491,22 @@
491 @ access to the /proc virtual filesystem is required, which means this limit
492 @ might not work inside a chroot() jail.
493 @ (Property: "max-loadavg")</p>
494
495 @ <hr>
496 @ <p><b>Do not allow robots to make complex requests
497 @ against the following pages.</b>
498 @ <p> A "complex request" is an HTTP request that has one or more query
499 @ parameters. Some robots will spend hours juggling around query parameters
500 @ or even forging fake query parameters in an effort to discover new
501 @ behavior or to find an SQL injection opportunity or similar. This can
502 @ waste hours of CPU time and gigabytes of bandwidth on the server. A
503 @ suggested value for this setting is:
504 @ "<tt>timeline,vdiff,fdiff,annotate,blame</tt>".
505 @ (Property: robot-restrict)
506 @ <p>
507 textarea_attribute("", 2, 80,
 
 
 
 
 
 
 
 
 
508 "robot-restrict", "rbrestrict", "", 0);
509
510 @ <hr>
511 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
512 @ </div></form>
513

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button