Fossil SCM

Add the complex-requests-from-robots limiter.

drh 2024-07-26 17:49 trunk
Commit 1a0b3043073b1f2b9274a247df7c6f777e170043c01483ae006e4a611423422e
+13
--- src/cgi.c
+++ src/cgi.c
@@ -897,10 +897,23 @@
897897
}
898898
return;
899899
}
900900
}
901901
}
902
+
903
+/*
904
+** Return the number of query parameters. Cookies and environment variables
905
+** do not count. Also, do not count the special QP "name".
906
+*/
907
+int cgi_qp_count(void){
908
+ int cnt = 0;
909
+ int i;
910
+ for(i=0; i<nUsedQP; i++){
911
+ if( aParamQP[i].isQP && fossil_strcmp(aParamQP[i].zName,"name")!=0 ) cnt++;
912
+ }
913
+ return cnt;
914
+}
902915
903916
/*
904917
** Add an environment varaible value to the parameter set. The zName
905918
** portion is fixed but a copy is be made of zValue.
906919
*/
907920
--- src/cgi.c
+++ src/cgi.c
@@ -897,10 +897,23 @@
897 }
898 return;
899 }
900 }
901 }
 
 
 
 
 
 
 
 
 
 
 
 
 
902
903 /*
904 ** Add an environment varaible value to the parameter set. The zName
905 ** portion is fixed but a copy is be made of zValue.
906 */
907
--- src/cgi.c
+++ src/cgi.c
@@ -897,10 +897,23 @@
897 }
898 return;
899 }
900 }
901 }
902
903 /*
904 ** Return the number of query parameters. Cookies and environment variables
905 ** do not count. Also, do not count the special QP "name".
906 */
907 int cgi_qp_count(void){
908 int cnt = 0;
909 int i;
910 for(i=0; i<nUsedQP; i++){
911 if( aParamQP[i].isQP && fossil_strcmp(aParamQP[i].zName,"name")!=0 ) cnt++;
912 }
913 return cnt;
914 }
915
916 /*
917 ** Add an environment varaible value to the parameter set. The zName
918 ** portion is fixed but a copy is be made of zValue.
919 */
920
+91
--- src/login.c
+++ src/login.c
@@ -1250,10 +1250,98 @@
12501250
}
12511251
fossil_free(zDecode);
12521252
return uid;
12531253
}
12541254
1255
+/*
1256
+** SETTING: robot-limiter boolean default=off
1257
+** If enabled, HTTP requests with one or more query parameters and
1258
+** without a REFERER string and without a valid login cookie are
1259
+** assumed to be hostile robots and are redirected to the honeypot.
1260
+** See also the robot-allow and robot-restrict settings which can
1261
+** be used to override the value of this setting for specific pages.
1262
+*/
1263
+/*
1264
+** SETTING: robot-allow width=40 block-text
1265
+** The VALUE of this setting is a list of GLOB patterns which match
1266
+** pages for which the robot-limiter is overwritten to false. If this
1267
+** setting is missing or an empty string, then it is assumed to match
1268
+** nothing.
1269
+*/
1270
+/*
1271
+** SETTING: robot-restrict width=40 block-text
1272
+** The VALUE of this setting is a list of GLOB patterns which match
1273
+** pages for which the robot-limiter setting should be enforced.
1274
+** In other words, if the robot-limiter is true and this setting either
1275
+** does not exist or is empty or matches the current page, then a
1276
+** redirect to the honeypot is issues. If this setting exists
1277
+** but does not match the current page, then the robot-limiter setting
1278
+** is overridden to false.
1279
+*/
1280
+
1281
+/*
1282
+** Check to see if the current HTTP request is a complex request that
1283
+** is coming from a robot and if access should restricted for such robots.
1284
+** For the purposes of this module, a "complex request" is an HTTP
1285
+** request with one or more query parameters.
1286
+**
1287
+** If this routine determines that robots should be restricted, then
1288
+** this routine publishes a redirect to the honeypot and exits without
1289
+** returning to the caller.
1290
+**
1291
+** This routine believes that this is a complex request is coming from
1292
+** a robot if all of the following are true:
1293
+**
1294
+** * The user is "nobody".
1295
+** * The REFERER field of the HTTP header is missing or empty.
1296
+** * There are one or more query parameters other than "name".
1297
+**
1298
+** Robot restrictions are governed by settings.
1299
+**
1300
+** robot-limiter The restrictions implemented by this routine only
1301
+** apply if this setting exists and is true.
1302
+**
1303
+** robot-allow If this setting exists and the page of the request
1304
+** matches the comma-separate GLOB list that is the
1305
+** value of this setting, then no robot restrictions
1306
+** are applied.
1307
+**
1308
+** robot-restrict If this setting exists then robot restrictions only
1309
+** apply to pages that match the comma-separated
1310
+** GLOB list that is the value of this setting.
1311
+*/
1312
+void login_restrict_robot_access(void){
1313
+ const char *zReferer;
1314
+ const char *zGlob;
1315
+ Glob *pGlob;
1316
+ int go = 1;
1317
+ if( g.zLogin!=0 ) return;
1318
+ zReferer = P("HTTP_REFERER");
1319
+ if( zReferer && zReferer[0]!=0 ) return;
1320
+ if( !db_get_boolean("robot-limiter",0) ) return;
1321
+ if( cgi_qp_count()<1 ) return;
1322
+ zGlob = db_get("robot-allow",0);
1323
+ if( zGlob && zGlob[0] ){
1324
+ pGlob = glob_create(zGlob);
1325
+ go = glob_match(pGlob, g.zPath);
1326
+ glob_free(pGlob);
1327
+ if( go ) return;
1328
+ }
1329
+ zGlob = db_get("robot-restrict",0);
1330
+ if( zGlob && zGlob[0] ){
1331
+ pGlob = glob_create(zGlob);
1332
+ go = glob_match(pGlob, g.zPath);
1333
+ glob_free(pGlob);
1334
+ if( !go ) return;
1335
+ }
1336
+
1337
+ /* If we reach this point, it means we have a situation where we
1338
+ ** want to restrict the activity of a robot.
1339
+ */
1340
+ cgi_redirectf("%R/honeypot");
1341
+}
1342
+
12551343
/*
12561344
** This routine examines the login cookie to see if it exists and
12571345
** is valid. If the login cookie checks out, it then sets global
12581346
** variables appropriately.
12591347
**
@@ -1413,10 +1501,13 @@
14131501
}
14141502
login_create_csrf_secret("none");
14151503
}
14161504
14171505
login_set_uid(uid, zCap);
1506
+
1507
+ /* Maybe restrict access to robots */
1508
+ login_restrict_robot_access();
14181509
}
14191510
14201511
/*
14211512
** Set the current logged in user to be uid. zCap is precomputed
14221513
** (override) capabilities. If zCap==0, then look up the capabilities
14231514
--- src/login.c
+++ src/login.c
@@ -1250,10 +1250,98 @@
1250 }
1251 fossil_free(zDecode);
1252 return uid;
1253 }
1254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1255 /*
1256 ** This routine examines the login cookie to see if it exists and
1257 ** is valid. If the login cookie checks out, it then sets global
1258 ** variables appropriately.
1259 **
@@ -1413,10 +1501,13 @@
1413 }
1414 login_create_csrf_secret("none");
1415 }
1416
1417 login_set_uid(uid, zCap);
 
 
 
1418 }
1419
1420 /*
1421 ** Set the current logged in user to be uid. zCap is precomputed
1422 ** (override) capabilities. If zCap==0, then look up the capabilities
1423
--- src/login.c
+++ src/login.c
@@ -1250,10 +1250,98 @@
1250 }
1251 fossil_free(zDecode);
1252 return uid;
1253 }
1254
1255 /*
1256 ** SETTING: robot-limiter boolean default=off
1257 ** If enabled, HTTP requests with one or more query parameters and
1258 ** without a REFERER string and without a valid login cookie are
1259 ** assumed to be hostile robots and are redirected to the honeypot.
1260 ** See also the robot-allow and robot-restrict settings which can
1261 ** be used to override the value of this setting for specific pages.
1262 */
1263 /*
1264 ** SETTING: robot-allow width=40 block-text
1265 ** The VALUE of this setting is a list of GLOB patterns which match
1266 ** pages for which the robot-limiter is overwritten to false. If this
1267 ** setting is missing or an empty string, then it is assumed to match
1268 ** nothing.
1269 */
1270 /*
1271 ** SETTING: robot-restrict width=40 block-text
1272 ** The VALUE of this setting is a list of GLOB patterns which match
1273 ** pages for which the robot-limiter setting should be enforced.
1274 ** In other words, if the robot-limiter is true and this setting either
1275 ** does not exist or is empty or matches the current page, then a
1276 ** redirect to the honeypot is issues. If this setting exists
1277 ** but does not match the current page, then the robot-limiter setting
1278 ** is overridden to false.
1279 */
1280
1281 /*
1282 ** Check to see if the current HTTP request is a complex request that
1283 ** is coming from a robot and if access should restricted for such robots.
1284 ** For the purposes of this module, a "complex request" is an HTTP
1285 ** request with one or more query parameters.
1286 **
1287 ** If this routine determines that robots should be restricted, then
1288 ** this routine publishes a redirect to the honeypot and exits without
1289 ** returning to the caller.
1290 **
1291 ** This routine believes that this is a complex request is coming from
1292 ** a robot if all of the following are true:
1293 **
1294 ** * The user is "nobody".
1295 ** * The REFERER field of the HTTP header is missing or empty.
1296 ** * There are one or more query parameters other than "name".
1297 **
1298 ** Robot restrictions are governed by settings.
1299 **
1300 ** robot-limiter The restrictions implemented by this routine only
1301 ** apply if this setting exists and is true.
1302 **
1303 ** robot-allow If this setting exists and the page of the request
1304 ** matches the comma-separate GLOB list that is the
1305 ** value of this setting, then no robot restrictions
1306 ** are applied.
1307 **
1308 ** robot-restrict If this setting exists then robot restrictions only
1309 ** apply to pages that match the comma-separated
1310 ** GLOB list that is the value of this setting.
1311 */
1312 void login_restrict_robot_access(void){
1313 const char *zReferer;
1314 const char *zGlob;
1315 Glob *pGlob;
1316 int go = 1;
1317 if( g.zLogin!=0 ) return;
1318 zReferer = P("HTTP_REFERER");
1319 if( zReferer && zReferer[0]!=0 ) return;
1320 if( !db_get_boolean("robot-limiter",0) ) return;
1321 if( cgi_qp_count()<1 ) return;
1322 zGlob = db_get("robot-allow",0);
1323 if( zGlob && zGlob[0] ){
1324 pGlob = glob_create(zGlob);
1325 go = glob_match(pGlob, g.zPath);
1326 glob_free(pGlob);
1327 if( go ) return;
1328 }
1329 zGlob = db_get("robot-restrict",0);
1330 if( zGlob && zGlob[0] ){
1331 pGlob = glob_create(zGlob);
1332 go = glob_match(pGlob, g.zPath);
1333 glob_free(pGlob);
1334 if( !go ) return;
1335 }
1336
1337 /* If we reach this point, it means we have a situation where we
1338 ** want to restrict the activity of a robot.
1339 */
1340 cgi_redirectf("%R/honeypot");
1341 }
1342
1343 /*
1344 ** This routine examines the login cookie to see if it exists and
1345 ** is valid. If the login cookie checks out, it then sets global
1346 ** variables appropriately.
1347 **
@@ -1413,10 +1501,13 @@
1501 }
1502 login_create_csrf_secret("none");
1503 }
1504
1505 login_set_uid(uid, zCap);
1506
1507 /* Maybe restrict access to robots */
1508 login_restrict_robot_access();
1509 }
1510
1511 /*
1512 ** Set the current logged in user to be uid. zCap is precomputed
1513 ** (override) capabilities. If zCap==0, then look up the capabilities
1514
+7 -4
--- src/main.c
+++ src/main.c
@@ -2994,10 +2994,11 @@
29942994
** using this command interactively over SSH. A better solution would be
29952995
** to use a different command for "ssh" sync, but we cannot do that without
29962996
** breaking legacy.
29972997
**
29982998
** Options:
2999
+** --nobody Pretend to be user "nobody"
29993000
** --test Do not do special "sync" processing when operating
30003001
** over an SSH link
30013002
** --th-trace Trace TH1 execution (for debugging purposes)
30023003
** --usercap CAP User capability string (Default: "sxy")
30033004
**
@@ -3007,16 +3008,18 @@
30073008
const char *zUserCap;
30083009
int bTest = 0;
30093010
30103011
Th_InitTraceLog();
30113012
zUserCap = find_option("usercap",0,1);
3012
- if( zUserCap==0 ){
3013
- g.useLocalauth = 1;
3014
- zUserCap = "sxy";
3013
+ if( !find_option("nobody",0,0) ){
3014
+ if( zUserCap==0 ){
3015
+ g.useLocalauth = 1;
3016
+ zUserCap = "sxy";
3017
+ }
3018
+ login_set_capabilities(zUserCap, 0);
30153019
}
30163020
bTest = find_option("test",0,0)!=0;
3017
- login_set_capabilities(zUserCap, 0);
30183021
g.httpIn = stdin;
30193022
g.httpOut = stdout;
30203023
fossil_binary_mode(g.httpOut);
30213024
fossil_binary_mode(g.httpIn);
30223025
g.zExtRoot = find_option("extroot",0,1);
30233026
--- src/main.c
+++ src/main.c
@@ -2994,10 +2994,11 @@
2994 ** using this command interactively over SSH. A better solution would be
2995 ** to use a different command for "ssh" sync, but we cannot do that without
2996 ** breaking legacy.
2997 **
2998 ** Options:
 
2999 ** --test Do not do special "sync" processing when operating
3000 ** over an SSH link
3001 ** --th-trace Trace TH1 execution (for debugging purposes)
3002 ** --usercap CAP User capability string (Default: "sxy")
3003 **
@@ -3007,16 +3008,18 @@
3007 const char *zUserCap;
3008 int bTest = 0;
3009
3010 Th_InitTraceLog();
3011 zUserCap = find_option("usercap",0,1);
3012 if( zUserCap==0 ){
3013 g.useLocalauth = 1;
3014 zUserCap = "sxy";
 
 
 
3015 }
3016 bTest = find_option("test",0,0)!=0;
3017 login_set_capabilities(zUserCap, 0);
3018 g.httpIn = stdin;
3019 g.httpOut = stdout;
3020 fossil_binary_mode(g.httpOut);
3021 fossil_binary_mode(g.httpIn);
3022 g.zExtRoot = find_option("extroot",0,1);
3023
--- src/main.c
+++ src/main.c
@@ -2994,10 +2994,11 @@
2994 ** using this command interactively over SSH. A better solution would be
2995 ** to use a different command for "ssh" sync, but we cannot do that without
2996 ** breaking legacy.
2997 **
2998 ** Options:
2999 ** --nobody Pretend to be user "nobody"
3000 ** --test Do not do special "sync" processing when operating
3001 ** over an SSH link
3002 ** --th-trace Trace TH1 execution (for debugging purposes)
3003 ** --usercap CAP User capability string (Default: "sxy")
3004 **
@@ -3007,16 +3008,18 @@
3008 const char *zUserCap;
3009 int bTest = 0;
3010
3011 Th_InitTraceLog();
3012 zUserCap = find_option("usercap",0,1);
3013 if( !find_option("nobody",0,0) ){
3014 if( zUserCap==0 ){
3015 g.useLocalauth = 1;
3016 zUserCap = "sxy";
3017 }
3018 login_set_capabilities(zUserCap, 0);
3019 }
3020 bTest = find_option("test",0,0)!=0;
 
3021 g.httpIn = stdin;
3022 g.httpOut = stdout;
3023 fossil_binary_mode(g.httpOut);
3024 fossil_binary_mode(g.httpIn);
3025 g.zExtRoot = find_option("extroot",0,1);
3026
+24
--- src/setup.c
+++ src/setup.c
@@ -490,10 +490,34 @@
490490
@ This limit is only enforced on Unix servers. On Linux systems,
491491
@ access to the /proc virtual filesystem is required, which means this limit
492492
@ might not work inside a chroot() jail.
493493
@ (Property: "max-loadavg")</p>
494494
495
+ @ <hr>
496
+ onoff_attribute("Prohibit robots from issuing complex requests",
497
+ "robot-limiter", "rlb", 0, 0);
498
+ @ <p> A "complex request" is an HTTP request that has one or more query
499
+ @ parameters. Some robots will spend hours juggling around query parameters
500
+ @ or even forging fake query parameters in an effort to discover new
501
+ @ behavior or to find an SQL injection opportunity or similar. This can
502
+ @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence,
503
+ @ it is recommended to turn this feature on to stop such nefarious behavior.
504
+ @ (Property: robot-limiter)
505
+ @
506
+ @ <p> When enabled, complex requests from user "nobody" without a Referer
507
+ @ redirect to the honeypot.
508
+ @
509
+ @ <p> Additional settings below allow positive and negative overrides of
510
+ @ this complex request limiter.
511
+ @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br>
512
+ textarea_attribute("", 4, 80,
513
+ "robot-allow", "rballow", "", 0);
514
+ @ <p><b>Restrict Robots From Seeing Only These Pages</b>
515
+ @ (Property: robot-restrict)<br>
516
+ textarea_attribute("", 4, 80,
517
+ "robot-restrict", "rbrestrict", "", 0);
518
+
495519
@ <hr>
496520
@ <p><input type="submit" name="submit" value="Apply Changes"></p>
497521
@ </div></form>
498522
db_end_transaction(0);
499523
style_finish_page();
500524
--- src/setup.c
+++ src/setup.c
@@ -490,10 +490,34 @@
490 @ This limit is only enforced on Unix servers. On Linux systems,
491 @ access to the /proc virtual filesystem is required, which means this limit
492 @ might not work inside a chroot() jail.
493 @ (Property: "max-loadavg")</p>
494
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495 @ <hr>
496 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
497 @ </div></form>
498 db_end_transaction(0);
499 style_finish_page();
500
--- src/setup.c
+++ src/setup.c
@@ -490,10 +490,34 @@
490 @ This limit is only enforced on Unix servers. On Linux systems,
491 @ access to the /proc virtual filesystem is required, which means this limit
492 @ might not work inside a chroot() jail.
493 @ (Property: "max-loadavg")</p>
494
495 @ <hr>
496 onoff_attribute("Prohibit robots from issuing complex requests",
497 "robot-limiter", "rlb", 0, 0);
498 @ <p> A "complex request" is an HTTP request that has one or more query
499 @ parameters. Some robots will spend hours juggling around query parameters
500 @ or even forging fake query parameters in an effort to discover new
501 @ behavior or to find an SQL injection opportunity or similar. This can
502 @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence,
503 @ it is recommended to turn this feature on to stop such nefarious behavior.
504 @ (Property: robot-limiter)
505 @
506 @ <p> When enabled, complex requests from user "nobody" without a Referer
507 @ redirect to the honeypot.
508 @
509 @ <p> Additional settings below allow positive and negative overrides of
510 @ this complex request limiter.
511 @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br>
512 textarea_attribute("", 4, 80,
513 "robot-allow", "rballow", "", 0);
514 @ <p><b>Restrict Robots From Seeing Only These Pages</b>
515 @ (Property: robot-restrict)<br>
516 textarea_attribute("", 4, 80,
517 "robot-restrict", "rbrestrict", "", 0);
518
519 @ <hr>
520 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
521 @ </div></form>
522 db_end_transaction(0);
523 style_finish_page();
524

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button