Fossil SCM
Only apply the complex-request restriction to pages listed in the robot-restrict setting. Deprecate the robot-limiter and robot-allow settings.
Commit
cee1af5a3731d2c44e35abba2623da6ae570520fde9690ec86bbe127448d0884
Parent
61e62c02a10f0e6…
2 files changed
+17
-49
+8
-17
+17
-49
| --- src/login.c | ||
| +++ src/login.c | ||
| @@ -1253,41 +1253,25 @@ | ||
| 1253 | 1253 | } |
| 1254 | 1254 | fossil_free(zDecode); |
| 1255 | 1255 | return uid; |
| 1256 | 1256 | } |
| 1257 | 1257 | |
| 1258 | -/* | |
| 1259 | -** SETTING: robot-limiter boolean default=off | |
| 1260 | -** If enabled, HTTP requests with one or more query parameters and | |
| 1261 | -** without a REFERER string and without a valid login cookie are | |
| 1262 | -** assumed to be hostile robots and are redirected to the honeypot. | |
| 1263 | -** See also the robot-allow and robot-restrict settings which can | |
| 1264 | -** be used to override the value of this setting for specific pages. | |
| 1265 | -*/ | |
| 1266 | -/* | |
| 1267 | -** SETTING: robot-allow width=40 block-text | |
| 1268 | -** The VALUE of this setting is a list of GLOB patterns which match | |
| 1269 | -** pages for which the robot-limiter is overwritten to false. If this | |
| 1270 | -** setting is missing or an empty string, then it is assumed to match | |
| 1271 | -** nothing. | |
| 1272 | -*/ | |
| 1273 | 1258 | /* |
| 1274 | 1259 | ** SETTING: robot-restrict width=40 block-text |
| 1275 | -** The VALUE of this setting is a list of GLOB patterns which match | |
| 1276 | -** pages for which the robot-limiter setting should be enforced. | |
| 1277 | -** In other words, if the robot-limiter is true and this setting either | |
| 1278 | -** does not exist or is empty or matches the current page, then a | |
| 1279 | -** redirect to the honeypot is issues. If this setting exists | |
| 1280 | -** but does not match the current page, then the robot-limiter setting | |
| 1281 | -** is overridden to false. | |
| 1260 | +** The VALUE of this setting is a list of GLOB patterns that match | |
| 1261 | +** pages for which complex HTTP requests from robots should be disallowed. | |
| 1262 | +** The recommended value for this setting is: | |
| 1263 | +** | |
| 1264 | +** timeline,vdiff,fdiff,annotate,blame | |
| 1265 | +** | |
| 1282 | 1266 | */ |
| 1283 | 1267 | |
| 1284 | 1268 | /* |
| 1285 | 1269 | ** Check to see if the current HTTP request is a complex request that |
| 1286 | 1270 | ** is coming from a robot and if access should restricted for such robots. |
| 1287 | 1271 | ** For the purposes of this module, a "complex request" is an HTTP |
| 1288 | -** request with one or more query parameters. | |
| 1272 | +** request with one or more query parameters other than "name". | |
| 1289 | 1273 | ** |
| 1290 | 1274 | ** If this routine determines that robots should be restricted, then |
| 1291 | 1275 | ** this routine publishes a redirect to the honeypot and exits without |
| 1292 | 1276 | ** returning to the caller. |
| 1293 | 1277 | ** |
| @@ -1298,46 +1282,30 @@ | ||
| 1298 | 1282 | ** * The REFERER field of the HTTP header is missing or empty. |
| 1299 | 1283 | ** * There are one or more query parameters other than "name". |
| 1300 | 1284 | ** |
| 1301 | 1285 | ** Robot restrictions are governed by settings. |
| 1302 | 1286 | ** |
| 1303 | -** robot-limiter The restrictions implemented by this routine only | |
| 1304 | -** apply if this setting exists and is true. | |
| 1305 | -** | |
| 1306 | -** robot-allow If this setting exists and the page of the request | |
| 1307 | -** matches the comma-separate GLOB list that is the | |
| 1308 | -** value of this setting, then no robot restrictions | |
| 1309 | -** are applied. | |
| 1310 | -** | |
| 1311 | -** robot-restrict If this setting exists then robot restrictions only | |
| 1312 | -** apply to pages that match the comma-separated | |
| 1313 | -** GLOB list that is the value of this setting. | |
| 1287 | +** robot-restrict The value is a list of GLOB patterns for pages | |
| 1288 | +** that should restrict robot access. No restrictions | |
| 1289 | +** are applied if this setting is undefined or is | |
| 1290 | +** an empty string. | |
| 1314 | 1291 | */ |
| 1315 | 1292 | void login_restrict_robot_access(void){ |
| 1316 | 1293 | const char *zReferer; |
| 1317 | 1294 | const char *zGlob; |
| 1318 | 1295 | Glob *pGlob; |
| 1319 | 1296 | int go = 1; |
| 1320 | 1297 | if( g.zLogin!=0 ) return; |
| 1321 | 1298 | zReferer = P("HTTP_REFERER"); |
| 1322 | 1299 | if( zReferer && zReferer[0]!=0 ) return; |
| 1323 | - if( !db_get_boolean("robot-limiter",0) ) return; | |
| 1300 | + zGlob = db_get("robot-restrict",0); | |
| 1301 | + if( zGlob==0 || zGlob[0]==0 ) return; | |
| 1324 | 1302 | if( cgi_qp_count()<1 ) return; |
| 1325 | - zGlob = db_get("robot-allow",0); | |
| 1326 | - if( zGlob && zGlob[0] ){ | |
| 1327 | - pGlob = glob_create(zGlob); | |
| 1328 | - go = glob_match(pGlob, g.zPath); | |
| 1329 | - glob_free(pGlob); | |
| 1330 | - if( go ) return; | |
| 1331 | - } | |
| 1332 | - zGlob = db_get("robot-restrict",0); | |
| 1333 | - if( zGlob && zGlob[0] ){ | |
| 1334 | - pGlob = glob_create(zGlob); | |
| 1335 | - go = glob_match(pGlob, g.zPath); | |
| 1336 | - glob_free(pGlob); | |
| 1337 | - if( !go ) return; | |
| 1338 | - } | |
| 1303 | + pGlob = glob_create(zGlob); | |
| 1304 | + go = glob_match(pGlob, g.zPath); | |
| 1305 | + glob_free(pGlob); | |
| 1306 | + if( !go ) return; | |
| 1339 | 1307 | |
| 1340 | 1308 | /* If we reach this point, it means we have a situation where we |
| 1341 | 1309 | ** want to restrict the activity of a robot. |
| 1342 | 1310 | */ |
| 1343 | 1311 | cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600); |
| 1344 | 1312 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -1253,41 +1253,25 @@ | |
| 1253 | } |
| 1254 | fossil_free(zDecode); |
| 1255 | return uid; |
| 1256 | } |
| 1257 | |
| 1258 | /* |
| 1259 | ** SETTING: robot-limiter boolean default=off |
| 1260 | ** If enabled, HTTP requests with one or more query parameters and |
| 1261 | ** without a REFERER string and without a valid login cookie are |
| 1262 | ** assumed to be hostile robots and are redirected to the honeypot. |
| 1263 | ** See also the robot-allow and robot-restrict settings which can |
| 1264 | ** be used to override the value of this setting for specific pages. |
| 1265 | */ |
| 1266 | /* |
| 1267 | ** SETTING: robot-allow width=40 block-text |
| 1268 | ** The VALUE of this setting is a list of GLOB patterns which match |
| 1269 | ** pages for which the robot-limiter is overwritten to false. If this |
| 1270 | ** setting is missing or an empty string, then it is assumed to match |
| 1271 | ** nothing. |
| 1272 | */ |
| 1273 | /* |
| 1274 | ** SETTING: robot-restrict width=40 block-text |
| 1275 | ** The VALUE of this setting is a list of GLOB patterns which match |
| 1276 | ** pages for which the robot-limiter setting should be enforced. |
| 1277 | ** In other words, if the robot-limiter is true and this setting either |
| 1278 | ** does not exist or is empty or matches the current page, then a |
| 1279 | ** redirect to the honeypot is issues. If this setting exists |
| 1280 | ** but does not match the current page, then the robot-limiter setting |
| 1281 | ** is overridden to false. |
| 1282 | */ |
| 1283 | |
| 1284 | /* |
| 1285 | ** Check to see if the current HTTP request is a complex request that |
| 1286 | ** is coming from a robot and if access should restricted for such robots. |
| 1287 | ** For the purposes of this module, a "complex request" is an HTTP |
| 1288 | ** request with one or more query parameters. |
| 1289 | ** |
| 1290 | ** If this routine determines that robots should be restricted, then |
| 1291 | ** this routine publishes a redirect to the honeypot and exits without |
| 1292 | ** returning to the caller. |
| 1293 | ** |
| @@ -1298,46 +1282,30 @@ | |
| 1298 | ** * The REFERER field of the HTTP header is missing or empty. |
| 1299 | ** * There are one or more query parameters other than "name". |
| 1300 | ** |
| 1301 | ** Robot restrictions are governed by settings. |
| 1302 | ** |
| 1303 | ** robot-limiter The restrictions implemented by this routine only |
| 1304 | ** apply if this setting exists and is true. |
| 1305 | ** |
| 1306 | ** robot-allow If this setting exists and the page of the request |
| 1307 | ** matches the comma-separate GLOB list that is the |
| 1308 | ** value of this setting, then no robot restrictions |
| 1309 | ** are applied. |
| 1310 | ** |
| 1311 | ** robot-restrict If this setting exists then robot restrictions only |
| 1312 | ** apply to pages that match the comma-separated |
| 1313 | ** GLOB list that is the value of this setting. |
| 1314 | */ |
| 1315 | void login_restrict_robot_access(void){ |
| 1316 | const char *zReferer; |
| 1317 | const char *zGlob; |
| 1318 | Glob *pGlob; |
| 1319 | int go = 1; |
| 1320 | if( g.zLogin!=0 ) return; |
| 1321 | zReferer = P("HTTP_REFERER"); |
| 1322 | if( zReferer && zReferer[0]!=0 ) return; |
| 1323 | if( !db_get_boolean("robot-limiter",0) ) return; |
| 1324 | if( cgi_qp_count()<1 ) return; |
| 1325 | zGlob = db_get("robot-allow",0); |
| 1326 | if( zGlob && zGlob[0] ){ |
| 1327 | pGlob = glob_create(zGlob); |
| 1328 | go = glob_match(pGlob, g.zPath); |
| 1329 | glob_free(pGlob); |
| 1330 | if( go ) return; |
| 1331 | } |
| 1332 | zGlob = db_get("robot-restrict",0); |
| 1333 | if( zGlob && zGlob[0] ){ |
| 1334 | pGlob = glob_create(zGlob); |
| 1335 | go = glob_match(pGlob, g.zPath); |
| 1336 | glob_free(pGlob); |
| 1337 | if( !go ) return; |
| 1338 | } |
| 1339 | |
| 1340 | /* If we reach this point, it means we have a situation where we |
| 1341 | ** want to restrict the activity of a robot. |
| 1342 | */ |
| 1343 | cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600); |
| 1344 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -1253,41 +1253,25 @@ | |
| 1253 | } |
| 1254 | fossil_free(zDecode); |
| 1255 | return uid; |
| 1256 | } |
| 1257 | |
| 1258 | /* |
| 1259 | ** SETTING: robot-restrict width=40 block-text |
| 1260 | ** The VALUE of this setting is a list of GLOB patterns that match |
| 1261 | ** pages for which complex HTTP requests from robots should be disallowed. |
| 1262 | ** The recommended value for this setting is: |
| 1263 | ** |
| 1264 | ** timeline,vdiff,fdiff,annotate,blame |
| 1265 | ** |
| 1266 | */ |
| 1267 | |
| 1268 | /* |
| 1269 | ** Check to see if the current HTTP request is a complex request that |
| 1270 | ** is coming from a robot and if access should restricted for such robots. |
| 1271 | ** For the purposes of this module, a "complex request" is an HTTP |
| 1272 | ** request with one or more query parameters other than "name". |
| 1273 | ** |
| 1274 | ** If this routine determines that robots should be restricted, then |
| 1275 | ** this routine publishes a redirect to the honeypot and exits without |
| 1276 | ** returning to the caller. |
| 1277 | ** |
| @@ -1298,46 +1282,30 @@ | |
| 1282 | ** * The REFERER field of the HTTP header is missing or empty. |
| 1283 | ** * There are one or more query parameters other than "name". |
| 1284 | ** |
| 1285 | ** Robot restrictions are governed by settings. |
| 1286 | ** |
| 1287 | ** robot-restrict The value is a list of GLOB patterns for pages |
| 1288 | ** that should restrict robot access. No restrictions |
| 1289 | ** are applied if this setting is undefined or is |
| 1290 | ** an empty string. |
| 1291 | */ |
| 1292 | void login_restrict_robot_access(void){ |
| 1293 | const char *zReferer; |
| 1294 | const char *zGlob; |
| 1295 | Glob *pGlob; |
| 1296 | int go = 1; |
| 1297 | if( g.zLogin!=0 ) return; |
| 1298 | zReferer = P("HTTP_REFERER"); |
| 1299 | if( zReferer && zReferer[0]!=0 ) return; |
| 1300 | zGlob = db_get("robot-restrict",0); |
| 1301 | if( zGlob==0 || zGlob[0]==0 ) return; |
| 1302 | if( cgi_qp_count()<1 ) return; |
| 1303 | pGlob = glob_create(zGlob); |
| 1304 | go = glob_match(pGlob, g.zPath); |
| 1305 | glob_free(pGlob); |
| 1306 | if( !go ) return; |
| 1307 | |
| 1308 | /* If we reach this point, it means we have a situation where we |
| 1309 | ** want to restrict the activity of a robot. |
| 1310 | */ |
| 1311 | cgi_set_cookie("fossil-goto", cgi_reconstruct_original_url(), 0, 600); |
| 1312 |
+8
-17
| --- src/setup.c | ||
| +++ src/setup.c | ||
| @@ -491,31 +491,22 @@ | ||
| 491 | 491 | @ access to the /proc virtual filesystem is required, which means this limit |
| 492 | 492 | @ might not work inside a chroot() jail. |
| 493 | 493 | @ (Property: "max-loadavg")</p> |
| 494 | 494 | |
| 495 | 495 | @ <hr> |
| 496 | - onoff_attribute("Prohibit robots from issuing complex requests", | |
| 497 | - "robot-limiter", "rlb", 0, 0); | |
| 496 | + @ <p><b>Do not allow robots to make complex requests | |
| 497 | + @ against the following pages.</b> | |
| 498 | 498 | @ <p> A "complex request" is an HTTP request that has one or more query |
| 499 | 499 | @ parameters. Some robots will spend hours juggling around query parameters |
| 500 | 500 | @ or even forging fake query parameters in an effort to discover new |
| 501 | 501 | @ behavior or to find an SQL injection opportunity or similar. This can |
| 502 | - @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence, | |
| 503 | - @ it is recommended to turn this feature on to stop such nefarious behavior. | |
| 504 | - @ (Property: robot-limiter) | |
| 505 | - @ | |
| 506 | - @ <p> When enabled, complex requests from user "nobody" without a Referer | |
| 507 | - @ redirect to the honeypot. | |
| 508 | - @ | |
| 509 | - @ <p> Additional settings below allow positive and negative overrides of | |
| 510 | - @ this complex request limiter. | |
| 511 | - @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br> | |
| 512 | - textarea_attribute("", 4, 80, | |
| 513 | - "robot-allow", "rballow", "", 0); | |
| 514 | - @ <p><b>Restrict Robots From Seeing Only These Pages</b> | |
| 515 | - @ (Property: robot-restrict)<br> | |
| 516 | - textarea_attribute("", 4, 80, | |
| 502 | + @ waste hours of CPU time and gigabytes of bandwidth on the server. A | |
| 503 | + @ suggested value for this setting is: | |
| 504 | + @ "<tt>timeline,vdiff,fdiff,annotate,blame</tt>". | |
| 505 | + @ (Property: robot-restrict) | |
| 506 | + @ <p> | |
| 507 | + textarea_attribute("", 2, 80, | |
| 517 | 508 | "robot-restrict", "rbrestrict", "", 0); |
| 518 | 509 | |
| 519 | 510 | @ <hr> |
| 520 | 511 | @ <p><input type="submit" name="submit" value="Apply Changes"></p> |
| 521 | 512 | @ </div></form> |
| 522 | 513 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -491,31 +491,22 @@ | |
| 491 | @ access to the /proc virtual filesystem is required, which means this limit |
| 492 | @ might not work inside a chroot() jail. |
| 493 | @ (Property: "max-loadavg")</p> |
| 494 | |
| 495 | @ <hr> |
| 496 | onoff_attribute("Prohibit robots from issuing complex requests", |
| 497 | "robot-limiter", "rlb", 0, 0); |
| 498 | @ <p> A "complex request" is an HTTP request that has one or more query |
| 499 | @ parameters. Some robots will spend hours juggling around query parameters |
| 500 | @ or even forging fake query parameters in an effort to discover new |
| 501 | @ behavior or to find an SQL injection opportunity or similar. This can |
| 502 | @ waste hours of CPU time and gigabytes of bandwidth on the server. Hence, |
| 503 | @ it is recommended to turn this feature on to stop such nefarious behavior. |
| 504 | @ (Property: robot-limiter) |
| 505 | @ |
| 506 | @ <p> When enabled, complex requests from user "nobody" without a Referer |
| 507 | @ redirect to the honeypot. |
| 508 | @ |
| 509 | @ <p> Additional settings below allow positive and negative overrides of |
| 510 | @ this complex request limiter. |
| 511 | @ <p><b>Allow Robots To See These Pages</b> (Property: robot-allow)<br> |
| 512 | textarea_attribute("", 4, 80, |
| 513 | "robot-allow", "rballow", "", 0); |
| 514 | @ <p><b>Restrict Robots From Seeing Only These Pages</b> |
| 515 | @ (Property: robot-restrict)<br> |
| 516 | textarea_attribute("", 4, 80, |
| 517 | "robot-restrict", "rbrestrict", "", 0); |
| 518 | |
| 519 | @ <hr> |
| 520 | @ <p><input type="submit" name="submit" value="Apply Changes"></p> |
| 521 | @ </div></form> |
| 522 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -491,31 +491,22 @@ | |
| 491 | @ access to the /proc virtual filesystem is required, which means this limit |
| 492 | @ might not work inside a chroot() jail. |
| 493 | @ (Property: "max-loadavg")</p> |
| 494 | |
| 495 | @ <hr> |
| 496 | @ <p><b>Do not allow robots to make complex requests |
| 497 | @ against the following pages.</b> |
| 498 | @ <p> A "complex request" is an HTTP request that has one or more query |
| 499 | @ parameters. Some robots will spend hours juggling around query parameters |
| 500 | @ or even forging fake query parameters in an effort to discover new |
| 501 | @ behavior or to find an SQL injection opportunity or similar. This can |
| 502 | @ waste hours of CPU time and gigabytes of bandwidth on the server. A |
| 503 | @ suggested value for this setting is: |
| 504 | @ "<tt>timeline,vdiff,fdiff,annotate,blame</tt>". |
| 505 | @ (Property: robot-restrict) |
| 506 | @ <p> |
| 507 | textarea_attribute("", 2, 80, |
| 508 | "robot-restrict", "rbrestrict", "", 0); |
| 509 | |
| 510 | @ <hr> |
| 511 | @ <p><input type="submit" name="submit" value="Apply Changes"></p> |
| 512 | @ </div></form> |
| 513 |