Fossil SCM

Further improvements to the squelch captcha.

drh 2025-08-15 18:47 robot-squelch
Commit 055908da97cbc460a414712eca93bcbf40148344a605f2208697505b10db7d2c
3 files changed +15 +2 -2 +55 -24
+15
--- src/cgi.c
+++ src/cgi.c
@@ -1612,10 +1612,25 @@
16121612
}
16131613
}
16141614
CGIDEBUG(("no-match [%s]\n", zName));
16151615
return zDefault;
16161616
}
1617
+
1618
+/*
1619
+** Return TRUE if the specific parameter exists and is a query parameter.
1620
+** Return FALSE if the parameter is a cookie or environment variable.
1621
+*/
1622
+int cgi_is_qp(const char *zName){
1623
+ int i;
1624
+ if( zName==0 || fossil_isupper(zName) ) return 0;
1625
+ for(i=0; i<nUsedQP; i++){
1626
+ if( fossil_strcmp(aParamQP[i].zName,zName)==0 ){
1627
+ return aParamQP[i].isQP;
1628
+ }
1629
+ }
1630
+ return 0;
1631
+}
16171632
16181633
/*
16191634
** Renders the "begone, spider" page and exits.
16201635
*/
16211636
static void cgi_begone_spider(const char *zName){
16221637
--- src/cgi.c
+++ src/cgi.c
@@ -1612,10 +1612,25 @@
1612 }
1613 }
1614 CGIDEBUG(("no-match [%s]\n", zName));
1615 return zDefault;
1616 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1617
1618 /*
1619 ** Renders the "begone, spider" page and exits.
1620 */
1621 static void cgi_begone_spider(const char *zName){
1622
--- src/cgi.c
+++ src/cgi.c
@@ -1612,10 +1612,25 @@
1612 }
1613 }
1614 CGIDEBUG(("no-match [%s]\n", zName));
1615 return zDefault;
1616 }
1617
1618 /*
1619 ** Return TRUE if the specific parameter exists and is a query parameter.
1620 ** Return FALSE if the parameter is a cookie or environment variable.
1621 */
1622 int cgi_is_qp(const char *zName){
1623 int i;
1624 if( zName==0 || fossil_isupper(zName) ) return 0;
1625 for(i=0; i<nUsedQP; i++){
1626 if( fossil_strcmp(aParamQP[i].zName,zName)==0 ){
1627 return aParamQP[i].isQP;
1628 }
1629 }
1630 return 0;
1631 }
1632
1633 /*
1634 ** Renders the "begone, spider" page and exits.
1635 */
1636 static void cgi_begone_spider(const char *zName){
1637
+2 -2
--- src/diff.c
+++ src/diff.c
@@ -3787,13 +3787,13 @@
37873787
unsigned clr1, clr2, clr;
37883788
int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
37893789
37903790
/* Gather query parameters */
37913791
login_check_credentials();
3792
- if( !g.perm.Read || g.zLogin==0 ){ login_needed(g.anon.Read); return; }
3792
+ if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
37933793
if( exclude_spiders(0) ) return;
3794
- if( robot_squelch(950) ) return;
3794
+ if( robot_squelch(990) ) return;
37953795
fossil_nice_default();
37963796
zFilename = P("filename");
37973797
zRevision = PD("checkin",0);
37983798
zOrigin = P("origin");
37993799
zLimit = P("limit");
38003800
--- src/diff.c
+++ src/diff.c
@@ -3787,13 +3787,13 @@
3787 unsigned clr1, clr2, clr;
3788 int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
3789
3790 /* Gather query parameters */
3791 login_check_credentials();
3792 if( !g.perm.Read || g.zLogin==0 ){ login_needed(g.anon.Read); return; }
3793 if( exclude_spiders(0) ) return;
3794 if( robot_squelch(950) ) return;
3795 fossil_nice_default();
3796 zFilename = P("filename");
3797 zRevision = PD("checkin",0);
3798 zOrigin = P("origin");
3799 zLimit = P("limit");
3800
--- src/diff.c
+++ src/diff.c
@@ -3787,13 +3787,13 @@
3787 unsigned clr1, clr2, clr;
3788 int bBlame = g.zPath[0]!='a';/* True for BLAME output. False for ANNOTATE. */
3789
3790 /* Gather query parameters */
3791 login_check_credentials();
3792 if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
3793 if( exclude_spiders(0) ) return;
3794 if( robot_squelch(990) ) return;
3795 fossil_nice_default();
3796 zFilename = P("filename");
3797 zRevision = PD("checkin",0);
3798 zOrigin = P("origin");
3799 zLimit = P("limit");
3800
+55 -24
--- src/robot.c
+++ src/robot.c
@@ -20,10 +20,11 @@
2020
** Fossil is run as a service.
2121
*/
2222
#include "config.h"
2323
#include "robot.h"
2424
#include <assert.h>
25
+#include <time.h>
2526
2627
/*
2728
** SETTING: robot-squelch width=10 default=200
2829
** The VALUE of is an integer between 0 and 1000 that determines how
2930
** readily Fossil will squelch requests from robots. A value of 0
@@ -32,42 +33,63 @@
3233
** and less than 1000, the decision to squelch is based on a variety
3334
** of heuristics, but is more likely to occur the larger the number.
3435
*/
3536
3637
/*
37
-** Rewrite the current page with a robot squelch captcha.
38
+** Rewrite the current page with a robot squelch captcha and return 1.
39
+**
40
+** Or, if valid proof-of-work is present as either a query parameter or
41
+** as a cookie, then return 0.
3842
*/
39
-static int robot_send_captcha(void){
40
- unsigned h = 0;
43
+static int robot_proofofwork(void){
44
+ sqlite3_int64 tm;
45
+ unsigned h1, h2;
46
+ int k;
4147
const char *z;
48
+ const char *az[2];
4249
4350
/* Construct a proof-of-work value based on the IP address of the
44
- ** sender and the sender's user-agent string. */
45
- z = P("REMOTE_ADDR");
46
- if( z ){
47
- while( *z ){ h = (h + *(unsigned char*)(z++))*0x9e3779b1; }
48
- }
49
- z = P("HTTP_USER_AGENT");
50
- if( z ){
51
- while( *z ){ h = (h + *(unsigned char*)(z++))*0x9e3779b1; }
52
- }
53
- h %= 1000000000;
51
+ ** sender and the sender's user-agent string. The current time also
52
+ ** affects the pow value, so actually compute two values, one for the
53
+ ** current 900-second interval and one for the previous. Either can
54
+ ** match. The pow-value is an integer between 100,000,000 and
55
+ ** 999,999,999. */
56
+ az[0] = P("REMOTE_ADDR");
57
+ az[1] = P("HTTP_USER_AGENT");
58
+ tm = time(0);
59
+ h1 = (unsigned)((tm&0xffffffff) / 900);
60
+ h2 = h1 - 1;
61
+ for(k=0; k<2; k++){
62
+ z = az[k];
63
+ if( z==0 ) continue;
64
+ while( *z ){
65
+ h1 = (h1 + *(unsigned char*)z)*0x9e3779b1;
66
+ h2 = (h2 + *(unsigned char*)z)*0x9e3779b1;
67
+ z++;
68
+ }
69
+ }
70
+ h1 = (h1 % 900000000) + 100000000;
71
+ h2 = (h2 % 900000000) + 100000000;
5472
5573
/* If there is already a proof-of-work cookie with this value
5674
** that means that the user agent has already authenticated.
5775
*/
5876
z = P("fossil-proofofwork");
59
- if( z && atoi(z)==h ){
77
+ if( z
78
+ && (atoi(z)==h1 || atoi(z)==h2)
79
+ && !cgi_is_qp("fossil-proofofwork") ){
6080
return 0;
6181
}
6282
6383
/* Check for a proof query parameter. If found, that means that
6484
** the captcha has just now passed, so set the proof-of-work cookie
6585
** in addition to letting the request through.
6686
*/
6787
z = P("proof");
68
- if( z && atoi(z)==h ){
88
+ if( z
89
+ && (atoi(z)==h1 || atoi(z)==h2)
90
+ ){
6991
cgi_set_cookie("fossil-proofofwork",z,"/",900);
7092
return 0;
7193
}
7294
cgi_tag_query_parameter("proof");
7395
@@ -81,16 +103,22 @@
81103
cgi_query_parameters_to_hidden();
82104
@ <input id="vx" type="hidden" name="proof" value="0">
83105
@ <input id="cx" type="submit" value="Wait..." disabled>
84106
@ </form>
85107
@ <script nonce='%s(style_nonce())'>
86
- @ function enableHuman(){
87
- @ document.getElementById("vx").value = %u(h);
88
- @ document.getElementById("cx").value = "Ok";
89
- @ document.getElementById("cx").disabled = false;
108
+ @ function Nhtot1520(x){return document.getElementById(x);}
109
+ @ function Aoxlxzajv(h){\
110
+ @ Nhtot1520("vx").value=h;\
111
+ @ Nhtot1520("cx").value="Ok";\
112
+ @ Nhtot1520("cx").disabled=false;\
113
+ @ }
114
+ @ function Vhcnyarsm(h,a){\
115
+ @ if(a>0){setTimeout(Vhcnyarsm,1,h+a,a-1);}else{Aoxlxzajv(h);}\
90116
@ }
91
- @ setTimeout(function(){enableHuman();}, 500);
117
+ k = 200 + h2%99;
118
+ h2 = (k*k + k)/2;
119
+ @ setTimeout(function(){Vhcnyarsm(%u(h1-h2),%u(k));},10);
92120
@ </script>
93121
style_finish_page();
94122
return 1;
95123
}
96124
@@ -97,13 +125,16 @@
97125
98126
/*
99127
** WEBPAGE functions can invoke this routine with an argument
100128
** that is between 0 and 1000. Based on that argument, and on
101129
** other factors, this routine decides whether or not to squelch
102
-** the request. "Squelch" in this context, means paint a captcha
103
-** rather than complete the original request. The idea here is to
104
-** prevent server overload due to excess robot traffic.
130
+** the request. "Squelch" in this context, means to require the
131
+** client to show proof-of-work before the request is processed.
132
+** The idea here is to prevent server overload due to excess robot
133
+** traffic. If a robot (or any client application really) wants us
134
+** to spend a lot of CPU computing some result for it, then it needs
135
+** to first demonstrate good faith by doing some make-work for us.
105136
**
106137
** This routine returns true for a squelch and false if the original
107138
** request should go through.
108139
**
109140
** The input parameter is an estimate of how much CPU time
@@ -128,10 +159,10 @@
128159
){
129160
return 0; /* There is a valid token= query parameter */
130161
}
131162
iSquelch = db_get_int("robot-squelch",200);
132163
if( iSquelch<=0 ) return 0;
133
- if( n+iSquelch>=1000 && robot_send_captcha() ){
164
+ if( n+iSquelch>=1000 && robot_proofofwork() ){
134165
return 1;
135166
}
136167
return 0;
137168
}
138169
--- src/robot.c
+++ src/robot.c
@@ -20,10 +20,11 @@
20 ** Fossil is run as a service.
21 */
22 #include "config.h"
23 #include "robot.h"
24 #include <assert.h>
 
25
26 /*
27 ** SETTING: robot-squelch width=10 default=200
28 ** The VALUE of is an integer between 0 and 1000 that determines how
29 ** readily Fossil will squelch requests from robots. A value of 0
@@ -32,42 +33,63 @@
32 ** and less than 1000, the decision to squelch is based on a variety
33 ** of heuristics, but is more likely to occur the larger the number.
34 */
35
36 /*
37 ** Rewrite the current page with a robot squelch captcha.
 
 
 
38 */
39 static int robot_send_captcha(void){
40 unsigned h = 0;
 
 
41 const char *z;
 
42
43 /* Construct a proof-of-work value based on the IP address of the
44 ** sender and the sender's user-agent string. */
45 z = P("REMOTE_ADDR");
46 if( z ){
47 while( *z ){ h = (h + *(unsigned char*)(z++))*0x9e3779b1; }
48 }
49 z = P("HTTP_USER_AGENT");
50 if( z ){
51 while( *z ){ h = (h + *(unsigned char*)(z++))*0x9e3779b1; }
52 }
53 h %= 1000000000;
 
 
 
 
 
 
 
 
 
 
 
54
55 /* If there is already a proof-of-work cookie with this value
56 ** that means that the user agent has already authenticated.
57 */
58 z = P("fossil-proofofwork");
59 if( z && atoi(z)==h ){
 
 
60 return 0;
61 }
62
63 /* Check for a proof query parameter. If found, that means that
64 ** the captcha has just now passed, so set the proof-of-work cookie
65 ** in addition to letting the request through.
66 */
67 z = P("proof");
68 if( z && atoi(z)==h ){
 
 
69 cgi_set_cookie("fossil-proofofwork",z,"/",900);
70 return 0;
71 }
72 cgi_tag_query_parameter("proof");
73
@@ -81,16 +103,22 @@
81 cgi_query_parameters_to_hidden();
82 @ <input id="vx" type="hidden" name="proof" value="0">
83 @ <input id="cx" type="submit" value="Wait..." disabled>
84 @ </form>
85 @ <script nonce='%s(style_nonce())'>
86 @ function enableHuman(){
87 @ document.getElementById("vx").value = %u(h);
88 @ document.getElementById("cx").value = "Ok";
89 @ document.getElementById("cx").disabled = false;
 
 
 
 
90 @ }
91 @ setTimeout(function(){enableHuman();}, 500);
 
 
92 @ </script>
93 style_finish_page();
94 return 1;
95 }
96
@@ -97,13 +125,16 @@
97
98 /*
99 ** WEBPAGE functions can invoke this routine with an argument
100 ** that is between 0 and 1000. Based on that argument, and on
101 ** other factors, this routine decides whether or not to squelch
102 ** the request. "Squelch" in this context, means paint a captcha
103 ** rather than complete the original request. The idea here is to
104 ** prevent server overload due to excess robot traffic.
 
 
 
105 **
106 ** This routine returns true for a squelch and false if the original
107 ** request should go through.
108 **
109 ** The input parameter is an estimate of how much CPU time
@@ -128,10 +159,10 @@
128 ){
129 return 0; /* There is a valid token= query parameter */
130 }
131 iSquelch = db_get_int("robot-squelch",200);
132 if( iSquelch<=0 ) return 0;
133 if( n+iSquelch>=1000 && robot_send_captcha() ){
134 return 1;
135 }
136 return 0;
137 }
138
--- src/robot.c
+++ src/robot.c
@@ -20,10 +20,11 @@
20 ** Fossil is run as a service.
21 */
22 #include "config.h"
23 #include "robot.h"
24 #include <assert.h>
25 #include <time.h>
26
27 /*
28 ** SETTING: robot-squelch width=10 default=200
29 ** The VALUE of is an integer between 0 and 1000 that determines how
30 ** readily Fossil will squelch requests from robots. A value of 0
@@ -32,42 +33,63 @@
33 ** and less than 1000, the decision to squelch is based on a variety
34 ** of heuristics, but is more likely to occur the larger the number.
35 */
36
37 /*
38 ** Rewrite the current page with a robot squelch captcha and return 1.
39 **
40 ** Or, if valid proof-of-work is present as either a query parameter or
41 ** as a cookie, then return 0.
42 */
43 static int robot_proofofwork(void){
44 sqlite3_int64 tm;
45 unsigned h1, h2;
46 int k;
47 const char *z;
48 const char *az[2];
49
50 /* Construct a proof-of-work value based on the IP address of the
51 ** sender and the sender's user-agent string. The current time also
52 ** affects the pow value, so actually compute two values, one for the
53 ** current 900-second interval and one for the previous. Either can
54 ** match. The pow-value is an integer between 100,000,000 and
55 ** 999,999,999. */
56 az[0] = P("REMOTE_ADDR");
57 az[1] = P("HTTP_USER_AGENT");
58 tm = time(0);
59 h1 = (unsigned)((tm&0xffffffff) / 900);
60 h2 = h1 - 1;
61 for(k=0; k<2; k++){
62 z = az[k];
63 if( z==0 ) continue;
64 while( *z ){
65 h1 = (h1 + *(unsigned char*)z)*0x9e3779b1;
66 h2 = (h2 + *(unsigned char*)z)*0x9e3779b1;
67 z++;
68 }
69 }
70 h1 = (h1 % 900000000) + 100000000;
71 h2 = (h2 % 900000000) + 100000000;
72
73 /* If there is already a proof-of-work cookie with this value
74 ** that means that the user agent has already authenticated.
75 */
76 z = P("fossil-proofofwork");
77 if( z
78 && (atoi(z)==h1 || atoi(z)==h2)
79 && !cgi_is_qp("fossil-proofofwork") ){
80 return 0;
81 }
82
83 /* Check for a proof query parameter. If found, that means that
84 ** the captcha has just now passed, so set the proof-of-work cookie
85 ** in addition to letting the request through.
86 */
87 z = P("proof");
88 if( z
89 && (atoi(z)==h1 || atoi(z)==h2)
90 ){
91 cgi_set_cookie("fossil-proofofwork",z,"/",900);
92 return 0;
93 }
94 cgi_tag_query_parameter("proof");
95
@@ -81,16 +103,22 @@
103 cgi_query_parameters_to_hidden();
104 @ <input id="vx" type="hidden" name="proof" value="0">
105 @ <input id="cx" type="submit" value="Wait..." disabled>
106 @ </form>
107 @ <script nonce='%s(style_nonce())'>
108 @ function Nhtot1520(x){return document.getElementById(x);}
109 @ function Aoxlxzajv(h){\
110 @ Nhtot1520("vx").value=h;\
111 @ Nhtot1520("cx").value="Ok";\
112 @ Nhtot1520("cx").disabled=false;\
113 @ }
114 @ function Vhcnyarsm(h,a){\
115 @ if(a>0){setTimeout(Vhcnyarsm,1,h+a,a-1);}else{Aoxlxzajv(h);}\
116 @ }
117 k = 200 + h2%99;
118 h2 = (k*k + k)/2;
119 @ setTimeout(function(){Vhcnyarsm(%u(h1-h2),%u(k));},10);
120 @ </script>
121 style_finish_page();
122 return 1;
123 }
124
@@ -97,13 +125,16 @@
125
126 /*
127 ** WEBPAGE functions can invoke this routine with an argument
128 ** that is between 0 and 1000. Based on that argument, and on
129 ** other factors, this routine decides whether or not to squelch
130 ** the request. "Squelch" in this context, means to require the
131 ** client to show proof-of-work before the request is processed.
132 ** The idea here is to prevent server overload due to excess robot
133 ** traffic. If a robot (or any client application really) wants us
134 ** to spend a lot of CPU computing some result for it, then it needs
135 ** to first demonstrate good faith by doing some make-work for us.
136 **
137 ** This routine returns true for a squelch and false if the original
138 ** request should go through.
139 **
140 ** The input parameter is an estimate of how much CPU time
@@ -128,10 +159,10 @@
159 ){
160 return 0; /* There is a valid token= query parameter */
161 }
162 iSquelch = db_get_int("robot-squelch",200);
163 if( iSquelch<=0 ) return 0;
164 if( n+iSquelch>=1000 && robot_proofofwork() ){
165 return 1;
166 }
167 return 0;
168 }
169

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button