Fossil SCM

Simplifications to the logic that tries to keep robots out.

drh 2025-08-15 23:18 trunk
Commit 02adced1c1abfd0458619d2a52aa9eb10d6a70d57c3249ce9a25aa84330c902e
+1 -2
--- src/diff.c
+++ src/diff.c
@@ -3788,12 +3788,11 @@
37883788
int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
37893789
37903790
/* Gather query parameters */
37913791
login_check_credentials();
37923792
if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
3793
- if( exclude_spiders(0) ) return;
3794
- if( robot_squelch(990) ) return;
3793
+ if( robot_restrict("annotate") ) return;
37953794
fossil_nice_default();
37963795
zFilename = P("filename");
37973796
zRevision = PD("checkin",0);
37983797
zOrigin = P("origin");
37993798
zLimit = P("limit");
38003799
--- src/diff.c
+++ src/diff.c
@@ -3788,12 +3788,11 @@
3788 int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
3789
3790 /* Gather query parameters */
3791 login_check_credentials();
3792 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
3793 if( exclude_spiders(0) ) return;
3794 if( robot_squelch(990) ) return;
3795 fossil_nice_default();
3796 zFilename = P("filename");
3797 zRevision = PD("checkin",0);
3798 zOrigin = P("origin");
3799 zLimit = P("limit");
3800
--- src/diff.c
+++ src/diff.c
@@ -3788,12 +3788,11 @@
3788 int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
3789
3790 /* Gather query parameters */
3791 login_check_credentials();
3792 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
3793 if( robot_restrict("annotate") ) return;
 
3794 fossil_nice_default();
3795 zFilename = P("filename");
3796 zRevision = PD("checkin",0);
3797 zOrigin = P("origin");
3798 zLimit = P("limit");
3799
+1 -1
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -1522,11 +1522,11 @@
15221522
DiffConfig DCfg;
15231523
cgi_check_for_malice();
15241524
login_check_credentials();
15251525
if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
15261526
if( zFrom==0 || zTo==0 ) fossil_redirect_home();
1527
- if( robot_squelch(800) ) return;
1527
+ if( robot_restrict("diff") ) return;
15281528
15291529
fossil_nice_default();
15301530
cgi_set_content_type("text/plain");
15311531
diff_config_init(&DCfg, DIFF_VERBOSE);
15321532
diff_two_versions(zFrom, zTo, &DCfg, 0);
15331533
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -1522,11 +1522,11 @@
1522 DiffConfig DCfg;
1523 cgi_check_for_malice();
1524 login_check_credentials();
1525 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1526 if( zFrom==0 || zTo==0 ) fossil_redirect_home();
1527 if( robot_squelch(800) ) return;
1528
1529 fossil_nice_default();
1530 cgi_set_content_type("text/plain");
1531 diff_config_init(&DCfg, DIFF_VERBOSE);
1532 diff_two_versions(zFrom, zTo, &DCfg, 0);
1533
--- src/diffcmd.c
+++ src/diffcmd.c
@@ -1522,11 +1522,11 @@
1522 DiffConfig DCfg;
1523 cgi_check_for_malice();
1524 login_check_credentials();
1525 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1526 if( zFrom==0 || zTo==0 ) fossil_redirect_home();
1527 if( robot_restrict("diff") ) return;
1528
1529 fossil_nice_default();
1530 cgi_set_content_type("text/plain");
1531 diff_config_init(&DCfg, DIFF_VERBOSE);
1532 diff_two_versions(zFrom, zTo, &DCfg, 0);
1533
+2 -6
--- src/info.c
+++ src/info.c
@@ -1421,11 +1421,11 @@
14211421
Blob qpGlob; /* glob= query parameter for generated links */
14221422
int bInvert = PB("inv");
14231423
14241424
login_check_credentials();
14251425
if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1426
- if( robot_squelch(950) ) return;
1426
+ if( robot_restrict("diff") ) return;
14271427
login_anonymous_available();
14281428
fossil_nice_default();
14291429
blob_init(&qp, 0, 0);
14301430
blob_init(&qpGlob, 0, 0);
14311431
diffType = preferred_diff_type();
@@ -1975,11 +1975,11 @@
19751975
int verbose = PB("verbose");
19761976
DiffConfig DCfg;
19771977
19781978
login_check_credentials();
19791979
if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1980
- if( robot_squelch(800) ) return;
1980
+ if( robot_restrict("diff") ) return;
19811981
diff_config_init(&DCfg, 0);
19821982
diffType = preferred_diff_type();
19831983
if( P("from") && P("to") ){
19841984
v1 = artifact_from_ci_and_filename("from");
19851985
v2 = artifact_from_ci_and_filename("to");
@@ -2712,14 +2712,10 @@
27122712
style_set_current_feature("artifact");
27132713
if( fossil_strcmp(g.zPath, "docfile")==0 ){
27142714
isFile = 1;
27152715
docOnly = 1;
27162716
}
2717
- iCost = 200;
2718
- if( isFile ) iCost += 100;
2719
- if( zCI ) iCost += 100;
2720
- if( robot_squelch(iCost) ) return;
27212717
27222718
/* Capture and normalize the name= and ci= query parameters */
27232719
if( zName==0 ){
27242720
zName = P("filename");
27252721
if( zName==0 ){
27262722
--- src/info.c
+++ src/info.c
@@ -1421,11 +1421,11 @@
1421 Blob qpGlob; /* glob= query parameter for generated links */
1422 int bInvert = PB("inv");
1423
1424 login_check_credentials();
1425 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1426 if( robot_squelch(950) ) return;
1427 login_anonymous_available();
1428 fossil_nice_default();
1429 blob_init(&qp, 0, 0);
1430 blob_init(&qpGlob, 0, 0);
1431 diffType = preferred_diff_type();
@@ -1975,11 +1975,11 @@
1975 int verbose = PB("verbose");
1976 DiffConfig DCfg;
1977
1978 login_check_credentials();
1979 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1980 if( robot_squelch(800) ) return;
1981 diff_config_init(&DCfg, 0);
1982 diffType = preferred_diff_type();
1983 if( P("from") && P("to") ){
1984 v1 = artifact_from_ci_and_filename("from");
1985 v2 = artifact_from_ci_and_filename("to");
@@ -2712,14 +2712,10 @@
2712 style_set_current_feature("artifact");
2713 if( fossil_strcmp(g.zPath, "docfile")==0 ){
2714 isFile = 1;
2715 docOnly = 1;
2716 }
2717 iCost = 200;
2718 if( isFile ) iCost += 100;
2719 if( zCI ) iCost += 100;
2720 if( robot_squelch(iCost) ) return;
2721
2722 /* Capture and normalize the name= and ci= query parameters */
2723 if( zName==0 ){
2724 zName = P("filename");
2725 if( zName==0 ){
2726
--- src/info.c
+++ src/info.c
@@ -1421,11 +1421,11 @@
1421 Blob qpGlob; /* glob= query parameter for generated links */
1422 int bInvert = PB("inv");
1423
1424 login_check_credentials();
1425 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1426 if( robot_restrict("diff") ) return;
1427 login_anonymous_available();
1428 fossil_nice_default();
1429 blob_init(&qp, 0, 0);
1430 blob_init(&qpGlob, 0, 0);
1431 diffType = preferred_diff_type();
@@ -1975,11 +1975,11 @@
1975 int verbose = PB("verbose");
1976 DiffConfig DCfg;
1977
1978 login_check_credentials();
1979 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
1980 if( robot_restrict("diff") ) return;
1981 diff_config_init(&DCfg, 0);
1982 diffType = preferred_diff_type();
1983 if( P("from") && P("to") ){
1984 v1 = artifact_from_ci_and_filename("from");
1985 v2 = artifact_from_ci_and_filename("to");
@@ -2712,14 +2712,10 @@
2712 style_set_current_feature("artifact");
2713 if( fossil_strcmp(g.zPath, "docfile")==0 ){
2714 isFile = 1;
2715 docOnly = 1;
2716 }
 
 
 
 
2717
2718 /* Capture and normalize the name= and ci= query parameters */
2719 if( zName==0 ){
2720 zName = P("filename");
2721 if( zName==0 ){
2722
+5 -90
--- src/login.c
+++ src/login.c
@@ -1303,98 +1303,10 @@
13031303
}
13041304
fossil_free(zDecode);
13051305
return uid;
13061306
}
13071307
1308
-/*
1309
-** SETTING: robot-restrict width=40 block-text
1310
-** The VALUE of this setting is a list of GLOB patterns that match
1311
-** pages for which complex HTTP requests from robots should be disallowed.
1312
-** The recommended value for this setting is:
1313
-**
1314
-** timeline,vdiff,fdiff,annotate,blame
1315
-**
1316
-*/
1317
-
1318
-/*
1319
-** Check to see if the current HTTP request is a complex request that
1320
-** is coming from a robot and if access should restricted for such robots.
1321
-** For the purposes of this module, a "complex request" is an HTTP
1322
-** request with one or more query parameters other than "name".
1323
-**
1324
-** If this routine determines that robots should be restricted, then
1325
-** this routine publishes a redirect to the honeypot and exits without
1326
-** returning to the caller.
1327
-**
1328
-** This routine believes that this is a complex request is coming from
1329
-** a robot if all of the following are true:
1330
-**
1331
-** * The user is "nobody".
1332
-** * Either the REFERER field of the HTTP header is missing or empty,
1333
-** or the USERAGENT field of the HTTP header suggests that
1334
-** the request as coming from a robot.
1335
-** * There are one or more query parameters other than "name".
1336
-**
1337
-** Robot restrictions are governed by settings.
1338
-**
1339
-** robot-restrict The value is a list of GLOB patterns for pages
1340
-** that should restrict robot access. No restrictions
1341
-** are applied if this setting is undefined or is
1342
-** an empty string.
1343
-*/
1344
-void login_restrict_robot_access(void){
1345
- const char *zGlob;
1346
- int isMatch = 1;
1347
- int nQP; /* Number of query parameters other than name= */
1348
- if( g.zLogin!=0 ) return;
1349
- zGlob = db_get("robot-restrict",0);
1350
- if( zGlob==0 || zGlob[0]==0 ) return;
1351
- if( g.isHuman ){
1352
- const char *zReferer;
1353
- const char *zAccept;
1354
- const char *zBr;
1355
- zReferer = P("HTTP_REFERER");
1356
- if( zReferer && zReferer[0]!=0 ) return;
1357
-
1358
- /* Robots typically do not accept the brotli encoding, at least not
1359
- ** at the time of this writing (2025-04-01), but standard web-browser
1360
- ** all generally do accept brotli. So if brotli is accepted,
1361
- ** assume we are not talking to a robot. We might want to revisit this
1362
- ** heuristic in the future...
1363
- */
1364
- if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0
1365
- && (zBr = strstr(zAccept,"br"))!=0
1366
- && !fossil_isalnum(zBr[2])
1367
- && (zBr==zAccept || !fossil_isalnum(zBr[-1]))
1368
- ){
1369
- return;
1370
- }
1371
- }
1372
- nQP = cgi_qp_count();
1373
- if( nQP<1 ) return;
1374
- isMatch = glob_multi_match(zGlob, g.zPath);
1375
- if( !isMatch ) return;
1376
-
1377
- /* Check for exceptions to the restriction on the number of query
1378
- ** parameters. */
1379
- zGlob = db_get("robot-restrict-qp",0);
1380
- if( zGlob && zGlob[0] ){
1381
- char *zPath = mprintf("%s/%d", g.zPath, nQP);
1382
- isMatch = glob_multi_match(zGlob, zPath);
1383
- fossil_free(zPath);
1384
- if( isMatch ) return;
1385
- }
1386
-
1387
- /* If we reach this point, it means we have a situation where we
1388
- ** want to restrict the activity of a robot.
1389
- */
1390
- g.isHuman = 0;
1391
- (void)exclude_spiders(0);
1392
- cgi_reply();
1393
- fossil_exit(0);
1394
-}
1395
-
13961308
/*
13971309
** When this routine is called, we know that the request does not
13981310
** have a login on the present repository. This routine checks to
13991311
** see if their login cookie might be for another member of the
14001312
** login-group.
@@ -1604,12 +1516,15 @@
16041516
login_create_csrf_secret("none");
16051517
}
16061518
16071519
login_set_uid(uid, zCap);
16081520
1609
- /* Maybe restrict access to robots */
1610
- login_restrict_robot_access();
1521
+ /* Maybe restrict access by robots */
1522
+ if( g.zLogin==0 && robot_restrict(g.zPath) ){
1523
+ cgi_reply();
1524
+ fossil_exit(0);
1525
+ }
16111526
}
16121527
16131528
/*
16141529
** Set the current logged in user to be uid. zCap is precomputed
16151530
** (override) capabilities. If zCap==0, then look up the capabilities
16161531
--- src/login.c
+++ src/login.c
@@ -1303,98 +1303,10 @@
1303 }
1304 fossil_free(zDecode);
1305 return uid;
1306 }
1307
1308 /*
1309 ** SETTING: robot-restrict width=40 block-text
1310 ** The VALUE of this setting is a list of GLOB patterns that match
1311 ** pages for which complex HTTP requests from robots should be disallowed.
1312 ** The recommended value for this setting is:
1313 **
1314 ** timeline,vdiff,fdiff,annotate,blame
1315 **
1316 */
1317
1318 /*
1319 ** Check to see if the current HTTP request is a complex request that
1320 ** is coming from a robot and if access should restricted for such robots.
1321 ** For the purposes of this module, a "complex request" is an HTTP
1322 ** request with one or more query parameters other than "name".
1323 **
1324 ** If this routine determines that robots should be restricted, then
1325 ** this routine publishes a redirect to the honeypot and exits without
1326 ** returning to the caller.
1327 **
1328 ** This routine believes that this is a complex request is coming from
1329 ** a robot if all of the following are true:
1330 **
1331 ** * The user is "nobody".
1332 ** * Either the REFERER field of the HTTP header is missing or empty,
1333 ** or the USERAGENT field of the HTTP header suggests that
1334 ** the request as coming from a robot.
1335 ** * There are one or more query parameters other than "name".
1336 **
1337 ** Robot restrictions are governed by settings.
1338 **
1339 ** robot-restrict The value is a list of GLOB patterns for pages
1340 ** that should restrict robot access. No restrictions
1341 ** are applied if this setting is undefined or is
1342 ** an empty string.
1343 */
1344 void login_restrict_robot_access(void){
1345 const char *zGlob;
1346 int isMatch = 1;
1347 int nQP; /* Number of query parameters other than name= */
1348 if( g.zLogin!=0 ) return;
1349 zGlob = db_get("robot-restrict",0);
1350 if( zGlob==0 || zGlob[0]==0 ) return;
1351 if( g.isHuman ){
1352 const char *zReferer;
1353 const char *zAccept;
1354 const char *zBr;
1355 zReferer = P("HTTP_REFERER");
1356 if( zReferer && zReferer[0]!=0 ) return;
1357
1358 /* Robots typically do not accept the brotli encoding, at least not
1359 ** at the time of this writing (2025-04-01), but standard web-browser
1360 ** all generally do accept brotli. So if brotli is accepted,
1361 ** assume we are not talking to a robot. We might want to revisit this
1362 ** heuristic in the future...
1363 */
1364 if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0
1365 && (zBr = strstr(zAccept,"br"))!=0
1366 && !fossil_isalnum(zBr[2])
1367 && (zBr==zAccept || !fossil_isalnum(zBr[-1]))
1368 ){
1369 return;
1370 }
1371 }
1372 nQP = cgi_qp_count();
1373 if( nQP<1 ) return;
1374 isMatch = glob_multi_match(zGlob, g.zPath);
1375 if( !isMatch ) return;
1376
1377 /* Check for exceptions to the restriction on the number of query
1378 ** parameters. */
1379 zGlob = db_get("robot-restrict-qp",0);
1380 if( zGlob && zGlob[0] ){
1381 char *zPath = mprintf("%s/%d", g.zPath, nQP);
1382 isMatch = glob_multi_match(zGlob, zPath);
1383 fossil_free(zPath);
1384 if( isMatch ) return;
1385 }
1386
1387 /* If we reach this point, it means we have a situation where we
1388 ** want to restrict the activity of a robot.
1389 */
1390 g.isHuman = 0;
1391 (void)exclude_spiders(0);
1392 cgi_reply();
1393 fossil_exit(0);
1394 }
1395
1396 /*
1397 ** When this routine is called, we know that the request does not
1398 ** have a login on the present repository. This routine checks to
1399 ** see if their login cookie might be for another member of the
1400 ** login-group.
@@ -1604,12 +1516,15 @@
1604 login_create_csrf_secret("none");
1605 }
1606
1607 login_set_uid(uid, zCap);
1608
1609 /* Maybe restrict access to robots */
1610 login_restrict_robot_access();
 
 
 
1611 }
1612
1613 /*
1614 ** Set the current logged in user to be uid. zCap is precomputed
1615 ** (override) capabilities. If zCap==0, then look up the capabilities
1616
--- src/login.c
+++ src/login.c
@@ -1303,98 +1303,10 @@
1303 }
1304 fossil_free(zDecode);
1305 return uid;
1306 }
1307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1308 /*
1309 ** When this routine is called, we know that the request does not
1310 ** have a login on the present repository. This routine checks to
1311 ** see if their login cookie might be for another member of the
1312 ** login-group.
@@ -1604,12 +1516,15 @@
1516 login_create_csrf_secret("none");
1517 }
1518
1519 login_set_uid(uid, zCap);
1520
1521 /* Maybe restrict access by robots */
1522 if( g.zLogin==0 && robot_restrict(g.zPath) ){
1523 cgi_reply();
1524 fossil_exit(0);
1525 }
1526 }
1527
1528 /*
1529 ** Set the current logged in user to be uid. zCap is precomputed
1530 ** (override) capabilities. If zCap==0, then look up the capabilities
1531
+40 -42
--- src/robot.c
+++ src/robot.c
@@ -22,20 +22,10 @@
2222
#include "config.h"
2323
#include "robot.h"
2424
#include <assert.h>
2525
#include <time.h>
2626
27
-/*
28
-** SETTING: robot-squelch width=10 default=200
29
-** The VALUE of is an integer between 0 and 1000 that determines how
30
-** readily Fossil will squelch requests from robots. A value of 0
31
-** means "never squelch requests". A value of 1000 means "always
32
-** squelch requests from user 'nobody'". For values greater than 0
33
-** and less than 1000, the decision to squelch is based on a variety
34
-** of heuristics, but is more likely to occur the larger the number.
35
-*/
36
-
3727
/*
3828
** Rewrite the current page with a robot squelch captcha and return 1.
3929
**
4030
** Or, if valid proof-of-work is present as either a query parameter or
4131
** as a cookie, then return 0.
@@ -120,49 +110,57 @@
120110
@ </script>
121111
style_finish_page();
122112
return 1;
123113
}
124114
115
+/*
116
+** SETTING: robot-restrict width=40 block-text
117
+** The VALUE of this setting is a list of GLOB patterns that match
118
+** pages for which complex HTTP requests from unauthenicated clients
119
+** should be disallowed. "Unauthenticated" means the user is "nobody".
120
+** The recommended value for this setting is:
121
+**
122
+** timeline,diff,annotate,zip,fileage,file
123
+**
124
+** The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
125
+** /vpatch. The "annotate" tag also covers /blame and /praise. "zip"
126
+** also covers /tarball and /sqlar. If a tag has an "X" character appended,
127
+** then it only applies if query parameters are such that the page is
128
+** particularly difficult to compute.
129
+**
130
+** In all other case, the tag should exactly match the page name.
131
+*/
125132
126133
/*
127
-** WEBPAGE functions can invoke this routine with an argument
128
-** that is between 0 and 1000. Based on that argument, and on
129
-** other factors, this routine decides whether or not to squelch
130
-** the request. "Squelch" in this context, means to require the
131
-** client to show proof-of-work before the request is processed.
132
-** The idea here is to prevent server overload due to excess robot
133
-** traffic. If a robot (or any client application really) wants us
134
-** to spend a lot of CPU computing some result for it, then it needs
135
-** to first demonstrate good faith by doing some make-work for us.
136
-**
137
-** This routine returns true for a squelch and false if the original
138
-** request should go through.
139
-**
140
-** The input parameter is an estimate of how much CPU time
141
-** and bandwidth is needed to compute a response. The higher the
142
-** value of this parameter, the more likely this routine is to squelch
143
-** the page. A value of zero means "never squelch". A value of
144
-** 1000 means always squelch if the user is "nobody".
145
-**
146
-** Squelching only happens if the user is "nobody". If the request
147
-** comes from any other user, including user "anonymous", the request
148
-** is never squelched.
149
-*/
150
-int robot_squelch(int n){
151
- const char *zToken;
152
- int iSquelch;
153
- assert( n>=0 && n<=1000 );
154
- if( g.zLogin ) return 0; /* Logged in users always get through */
155
- if( n==0 ) return 0; /* Squelch is completely disabled */
134
+** Return the default restriction GLOB
135
+*/
136
+const char *robot_restrict_default(void){
137
+ return "timeline,diff,annotate,zip,fileage,file";
138
+}
139
+/*
140
+** Check to see if the page named in the argument is on the
141
+** robot-restrict list. If it is on the list and if the user
142
+** is "nobody" then bring up a captcha to test to make sure that
143
+** client is not a robot.
144
+**
145
+** This routine returns true if a captcha was rendered and if subsequent
146
+** page generation should be aborted. It returns false if the page
147
+** should not be restricted and should be rendered normally.
148
+*/
149
+int robot_restrict(const char *zPage){
150
+ const char *zGlob;
151
+ const char *zToken;
152
+ if( g.zLogin ) return 0; /* Logged in users always get through */
153
+ zGlob = db_get("robot-restrict",robot_restrict_default());
154
+ if( zGlob==0 || zGlob[0]==0 ) return 0;
155
+ if( !glob_multi_match(zGlob, zPage) ) return 0;
156156
zToken = P("token");
157157
if( zToken!=0
158158
&& db_exists("SELECT 1 FROM config WHERE name='token-%q'", zToken)
159159
){
160160
return 0; /* There is a valid token= query parameter */
161161
}
162
- iSquelch = db_get_int("robot-squelch",200);
163
- if( iSquelch<=0 ) return 0;
164
- if( n+iSquelch>=1000 && robot_proofofwork() ){
162
+ if( robot_proofofwork() ){
165163
return 1;
166164
}
167165
return 0;
168166
}
169167
--- src/robot.c
+++ src/robot.c
@@ -22,20 +22,10 @@
22 #include "config.h"
23 #include "robot.h"
24 #include <assert.h>
25 #include <time.h>
26
27 /*
28 ** SETTING: robot-squelch width=10 default=200
29 ** The VALUE of is an integer between 0 and 1000 that determines how
30 ** readily Fossil will squelch requests from robots. A value of 0
31 ** means "never squelch requests". A value of 1000 means "always
32 ** squelch requests from user 'nobody'". For values greater than 0
33 ** and less than 1000, the decision to squelch is based on a variety
34 ** of heuristics, but is more likely to occur the larger the number.
35 */
36
37 /*
38 ** Rewrite the current page with a robot squelch captcha and return 1.
39 **
40 ** Or, if valid proof-of-work is present as either a query parameter or
41 ** as a cookie, then return 0.
@@ -120,49 +110,57 @@
120 @ </script>
121 style_finish_page();
122 return 1;
123 }
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
126 /*
127 ** WEBPAGE functions can invoke this routine with an argument
128 ** that is between 0 and 1000. Based on that argument, and on
129 ** other factors, this routine decides whether or not to squelch
130 ** the request. "Squelch" in this context, means to require the
131 ** client to show proof-of-work before the request is processed.
132 ** The idea here is to prevent server overload due to excess robot
133 ** traffic. If a robot (or any client application really) wants us
134 ** to spend a lot of CPU computing some result for it, then it needs
135 ** to first demonstrate good faith by doing some make-work for us.
136 **
137 ** This routine returns true for a squelch and false if the original
138 ** request should go through.
139 **
140 ** The input parameter is an estimate of how much CPU time
141 ** and bandwidth is needed to compute a response. The higher the
142 ** value of this parameter, the more likely this routine is to squelch
143 ** the page. A value of zero means "never squelch". A value of
144 ** 1000 means always squelch if the user is "nobody".
145 **
146 ** Squelching only happens if the user is "nobody". If the request
147 ** comes from any other user, including user "anonymous", the request
148 ** is never squelched.
149 */
150 int robot_squelch(int n){
151 const char *zToken;
152 int iSquelch;
153 assert( n>=0 && n<=1000 );
154 if( g.zLogin ) return 0; /* Logged in users always get through */
155 if( n==0 ) return 0; /* Squelch is completely disabled */
156 zToken = P("token");
157 if( zToken!=0
158 && db_exists("SELECT 1 FROM config WHERE name='token-%q'", zToken)
159 ){
160 return 0; /* There is a valid token= query parameter */
161 }
162 iSquelch = db_get_int("robot-squelch",200);
163 if( iSquelch<=0 ) return 0;
164 if( n+iSquelch>=1000 && robot_proofofwork() ){
165 return 1;
166 }
167 return 0;
168 }
169
--- src/robot.c
+++ src/robot.c
@@ -22,20 +22,10 @@
22 #include "config.h"
23 #include "robot.h"
24 #include <assert.h>
25 #include <time.h>
26
 
 
 
 
 
 
 
 
 
 
27 /*
28 ** Rewrite the current page with a robot squelch captcha and return 1.
29 **
30 ** Or, if valid proof-of-work is present as either a query parameter or
31 ** as a cookie, then return 0.
@@ -120,49 +110,57 @@
110 @ </script>
111 style_finish_page();
112 return 1;
113 }
114
115 /*
116 ** SETTING: robot-restrict width=40 block-text
117 ** The VALUE of this setting is a list of GLOB patterns that match
118 ** pages for which complex HTTP requests from unauthenicated clients
119 ** should be disallowed. "Unauthenticated" means the user is "nobody".
120 ** The recommended value for this setting is:
121 **
122 ** timeline,diff,annotate,zip,fileage,file
123 **
124 ** The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
125 ** /vpatch. The "annotate" tag also covers /blame and /praise. "zip"
126 ** also covers /tarball and /sqlar. If a tag has an "X" character appended,
127 ** then it only applies if query parameters are such that the page is
128 ** particularly difficult to compute.
129 **
130 ** In all other case, the tag should exactly match the page name.
131 */
132
133 /*
134 ** Return the default restriction GLOB
135 */
136 const char *robot_restrict_default(void){
137 return "timeline,diff,annotate,zip,fileage,file";
138 }
139 /*
140 ** Check to see if the page named in the argument is on the
141 ** robot-restrict list. If it is on the list and if the user
142 ** is "nobody" then bring up a captcha to test to make sure that
143 ** client is not a robot.
144 **
145 ** This routine returns true if a captcha was rendered and if subsequent
146 ** page generation should be aborted. It returns false if the page
147 ** should not be restricted and should be rendered normally.
148 */
149 int robot_restrict(const char *zPage){
150 const char *zGlob;
151 const char *zToken;
152 if( g.zLogin ) return 0; /* Logged in users always get through */
153 zGlob = db_get("robot-restrict",robot_restrict_default());
154 if( zGlob==0 || zGlob[0]==0 ) return 0;
155 if( !glob_multi_match(zGlob, zPage) ) return 0;
 
 
 
 
 
 
 
156 zToken = P("token");
157 if( zToken!=0
158 && db_exists("SELECT 1 FROM config WHERE name='token-%q'", zToken)
159 ){
160 return 0; /* There is a valid token= query parameter */
161 }
162 if( robot_proofofwork() ){
 
 
163 return 1;
164 }
165 return 0;
166 }
167
+20 -37
--- src/setup.c
+++ src/setup.c
@@ -495,20 +495,29 @@
495495
@
496496
@ <form action="%R/setup_robot" method="post"><div>
497497
login_insert_csrf_secret();
498498
@ <input type="submit" name="submit" value="Apply Changes"></p>
499499
@ <hr>
500
- entry_attribute("Robot Squelch", 6, "robot-squelch", "rsq", "200", 0);
501
- @ <p>The "squelch" setting determines how aggressive Fossil is about
502
- @ trying to weed out robots using captchas. Squelch only applies to
503
- @ expensive requests from user "nobody". The higher the squelch setting,
504
- @ the more likely the request is to generate a captcha instead of the
505
- @ requested page. Squelch can be any integer between 0 and 1000.
506
- @ 0 means squelch is disabled and all requests go through without a
507
- @ captcha. 1000 means every expensive request from user "nobody" gets
508
- @ a captcha.
509
- @ (Property: "robot-squelch")</p>
500
+ @ <p><b>Do not allow robots access to these pages.</b>
501
+ @ <p> If the page name matches the GLOB pattern of this setting, and the
502
+ @ users is "nobody", and the client has not previously passed a captcha
503
+ @ test to show that it is not a robot, then the page is not displayed.
504
+ @ A captcha test is is rendered instead.
505
+ @ The recommended value for this setting is:
506
+ @ <p>
507
+ @ &emsp;&emsp;&emsp;<tt>%h(robot_restrict_default())</tt>
508
+ @ <p>
509
+ @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
510
+ @ /vpatch. The "annotate" tag covers /annotate and also /blame and
511
+ @ /praise. The "zip" covers itself and also /tarball and /sqlar. If a
512
+ @ tag has an "X" character appended, then it only applies if query
513
+ @ parameters are such that the page is particularly difficult to compute.
514
+ @ In all other case, the tag should exactly match the page name.
515
+ @ (Property: robot-restrict)
516
+ @ <br>
517
+ textarea_attribute("", 2, 80,
518
+ "robot-restrict", "rbrestrict", robot_restrict_default(), 0);
510519
511520
@ <hr>
512521
addAutoHyperlinkSettings();
513522
514523
@ <hr>
@@ -520,37 +529,11 @@
520529
@ computations here. Set this to 0.0 to disable the load average limit.
521530
@ This limit is only enforced on Unix servers. On Linux systems,
522531
@ access to the /proc virtual filesystem is required, which means this limit
523532
@ might not work inside a chroot() jail.
524533
@ (Property: "max-loadavg")</p>
525
-
526
- @ <hr>
527
- @ <p><b>Do not allow robots to make complex requests
528
- @ against the following pages.</b>
529
- @ <p> A "complex request" is an HTTP request that has one or more query
530
- @ parameters. Some robots will spend hours juggling around query parameters
531
- @ or even forging fake query parameters in an effort to discover new
532
- @ behavior or to find an SQL injection opportunity or similar. This can
533
- @ waste hours of CPU time and gigabytes of bandwidth on the server. A
534
- @ suggested value for this setting is:
535
- @ "<tt>timeline,*diff,vpatch,annotate,blame,praise,dir,tree</tt>".
536
- @ (Property: robot-restrict)
537
- @ <br>
538
- textarea_attribute("", 2, 80,
539
- "robot-restrict", "rbrestrict", "", 0);
540
- @ <br> The following comma-separated GLOB pattern allows for exceptions
541
- @ in the maximum number of query parameters before a request is considered
542
- @ complex. If this GLOB pattern exists and is non-empty and if it
543
- @ matches against the pagename followed by "/" and the number of query
544
- @ parameters, then the request is allowed through. For example, the
545
- @ suggested pattern of "timeline/[012]" allows the /timeline page to
546
- @ pass with up to 2 query parameters besides "name".
547
- @ (Property: robot-restrict-qp)
548
- @ <br>
549
- textarea_attribute("", 2, 80,
550
- "robot-restrict-qp", "rbrestrictqp", "", 0);
551
-
534
+ @
552535
@ <hr>
553536
@ <p><input type="submit" name="submit" value="Apply Changes"></p>
554537
@ </div></form>
555538
db_end_transaction(0);
556539
style_finish_page();
557540
--- src/setup.c
+++ src/setup.c
@@ -495,20 +495,29 @@
495 @
496 @ <form action="%R/setup_robot" method="post"><div>
497 login_insert_csrf_secret();
498 @ <input type="submit" name="submit" value="Apply Changes"></p>
499 @ <hr>
500 entry_attribute("Robot Squelch", 6, "robot-squelch", "rsq", "200", 0);
501 @ <p>The "squelch" setting determines how aggressive Fossil is about
502 @ trying to weed out robots using captchas. Squelch only applies to
503 @ expensive requests from user "nobody". The higher the squelch setting,
504 @ the more likely the request is to generate a captcha instead of the
505 @ requested page. Squelch can be any integer between 0 and 1000.
506 @ 0 means squelch is disabled and all requests go through without a
507 @ captcha. 1000 means every expensive request from user "nobody" gets
508 @ a captcha.
509 @ (Property: "robot-squelch")</p>
 
 
 
 
 
 
 
 
 
510
511 @ <hr>
512 addAutoHyperlinkSettings();
513
514 @ <hr>
@@ -520,37 +529,11 @@
520 @ computations here. Set this to 0.0 to disable the load average limit.
521 @ This limit is only enforced on Unix servers. On Linux systems,
522 @ access to the /proc virtual filesystem is required, which means this limit
523 @ might not work inside a chroot() jail.
524 @ (Property: "max-loadavg")</p>
525
526 @ <hr>
527 @ <p><b>Do not allow robots to make complex requests
528 @ against the following pages.</b>
529 @ <p> A "complex request" is an HTTP request that has one or more query
530 @ parameters. Some robots will spend hours juggling around query parameters
531 @ or even forging fake query parameters in an effort to discover new
532 @ behavior or to find an SQL injection opportunity or similar. This can
533 @ waste hours of CPU time and gigabytes of bandwidth on the server. A
534 @ suggested value for this setting is:
535 @ "<tt>timeline,*diff,vpatch,annotate,blame,praise,dir,tree</tt>".
536 @ (Property: robot-restrict)
537 @ <br>
538 textarea_attribute("", 2, 80,
539 "robot-restrict", "rbrestrict", "", 0);
540 @ <br> The following comma-separated GLOB pattern allows for exceptions
541 @ in the maximum number of query parameters before a request is considered
542 @ complex. If this GLOB pattern exists and is non-empty and if it
543 @ matches against the pagename followed by "/" and the number of query
544 @ parameters, then the request is allowed through. For example, the
545 @ suggested pattern of "timeline/[012]" allows the /timeline page to
546 @ pass with up to 2 query parameters besides "name".
547 @ (Property: robot-restrict-qp)
548 @ <br>
549 textarea_attribute("", 2, 80,
550 "robot-restrict-qp", "rbrestrictqp", "", 0);
551
552 @ <hr>
553 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
554 @ </div></form>
555 db_end_transaction(0);
556 style_finish_page();
557
--- src/setup.c
+++ src/setup.c
@@ -495,20 +495,29 @@
495 @
496 @ <form action="%R/setup_robot" method="post"><div>
497 login_insert_csrf_secret();
498 @ <input type="submit" name="submit" value="Apply Changes"></p>
499 @ <hr>
500 @ <p><b>Do not allow robots access to these pages.</b>
501 @ <p> If the page name matches the GLOB pattern of this setting, and the
502 @ users is "nobody", and the client has not previously passed a captcha
503 @ test to show that it is not a robot, then the page is not displayed.
504 @ A captcha test is is rendered instead.
505 @ The recommended value for this setting is:
506 @ <p>
507 @ &emsp;&emsp;&emsp;<tt>%h(robot_restrict_default())</tt>
508 @ <p>
509 @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and
510 @ /vpatch. The "annotate" tag covers /annotate and also /blame and
511 @ /praise. The "zip" covers itself and also /tarball and /sqlar. If a
512 @ tag has an "X" character appended, then it only applies if query
513 @ parameters are such that the page is particularly difficult to compute.
514 @ In all other case, the tag should exactly match the page name.
515 @ (Property: robot-restrict)
516 @ <br>
517 textarea_attribute("", 2, 80,
518 "robot-restrict", "rbrestrict", robot_restrict_default(), 0);
519
520 @ <hr>
521 addAutoHyperlinkSettings();
522
523 @ <hr>
@@ -520,37 +529,11 @@
529 @ computations here. Set this to 0.0 to disable the load average limit.
530 @ This limit is only enforced on Unix servers. On Linux systems,
531 @ access to the /proc virtual filesystem is required, which means this limit
532 @ might not work inside a chroot() jail.
533 @ (Property: "max-loadavg")</p>
534 @
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535 @ <hr>
536 @ <p><input type="submit" name="submit" value="Apply Changes"></p>
537 @ </div></form>
538 db_end_transaction(0);
539 style_finish_page();
540
+1 -1
--- src/tar.c
+++ src/tar.c
@@ -760,11 +760,11 @@
760760
Blob tarball; /* Tarball accumulated here */
761761
const char *z;
762762
763763
login_check_credentials();
764764
if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
765
- if( robot_squelch(900) ) return;
765
+ if( robot_restrict("zip") ) return;
766766
fossil_nice_default();
767767
zName = fossil_strdup(PD("name",""));
768768
z = P("r");
769769
if( z==0 ) z = P("uuid");
770770
if( z==0 ) z = tar_uuid_from_name(&zName);
771771
--- src/tar.c
+++ src/tar.c
@@ -760,11 +760,11 @@
760 Blob tarball; /* Tarball accumulated here */
761 const char *z;
762
763 login_check_credentials();
764 if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
765 if( robot_squelch(900) ) return;
766 fossil_nice_default();
767 zName = fossil_strdup(PD("name",""));
768 z = P("r");
769 if( z==0 ) z = P("uuid");
770 if( z==0 ) z = tar_uuid_from_name(&zName);
771
--- src/tar.c
+++ src/tar.c
@@ -760,11 +760,11 @@
760 Blob tarball; /* Tarball accumulated here */
761 const char *z;
762
763 login_check_credentials();
764 if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
765 if( robot_restrict("zip") ) return;
766 fossil_nice_default();
767 zName = fossil_strdup(PD("name",""));
768 z = P("r");
769 if( z==0 ) z = P("uuid");
770 if( z==0 ) z = tar_uuid_from_name(&zName);
771
+1 -1
--- src/zip.c
+++ src/zip.c
@@ -1012,11 +1012,11 @@
10121012
int eType = ARCHIVE_ZIP; /* Type of archive to generate */
10131013
char *zType; /* Human-readable archive type */
10141014
10151015
login_check_credentials();
10161016
if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
1017
- if( robot_squelch(900) ) return;
1017
+ if( robot_restrict("zip") ) return;
10181018
if( fossil_strcmp(g.zPath, "sqlar")==0 ){
10191019
eType = ARCHIVE_SQLAR;
10201020
zType = "SQL";
10211021
/* For some reason, SQL-archives are like catnip for robots. So
10221022
** don't allow them to be downloaded by user "nobody" */
10231023
--- src/zip.c
+++ src/zip.c
@@ -1012,11 +1012,11 @@
1012 int eType = ARCHIVE_ZIP; /* Type of archive to generate */
1013 char *zType; /* Human-readable archive type */
1014
1015 login_check_credentials();
1016 if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
1017 if( robot_squelch(900) ) return;
1018 if( fossil_strcmp(g.zPath, "sqlar")==0 ){
1019 eType = ARCHIVE_SQLAR;
1020 zType = "SQL";
1021 /* For some reason, SQL-archives are like catnip for robots. So
1022 ** don't allow them to be downloaded by user "nobody" */
1023
--- src/zip.c
+++ src/zip.c
@@ -1012,11 +1012,11 @@
1012 int eType = ARCHIVE_ZIP; /* Type of archive to generate */
1013 char *zType; /* Human-readable archive type */
1014
1015 login_check_credentials();
1016 if( !g.perm.Zip ){ login_needed(g.anon.Zip); return; }
1017 if( robot_restrict("zip") ) return;
1018 if( fossil_strcmp(g.zPath, "sqlar")==0 ){
1019 eType = ARCHIVE_SQLAR;
1020 zType = "SQL";
1021 /* For some reason, SQL-archives are like catnip for robots. So
1022 ** don't allow them to be downloaded by user "nobody" */
1023

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button