Fossil SCM
Refactor the code in robot.c to make interfaces available to other parts of the system.
Commit
4fa618faf1e70f8bca15e469a825f2a4a2ef0c5119a7ce76ed9f97ac11769cc1
Parent
3f6a6bdce421212…
1 file changed
+175
-65
+175
-65
| --- src/robot.c | ||
| +++ src/robot.c | ||
| @@ -31,32 +31,51 @@ | ||
| 31 | 31 | #if INTERFACE |
| 32 | 32 | #define ROBOT_COOKIE "fossil-client-ok" |
| 33 | 33 | #endif |
| 34 | 34 | |
| 35 | 35 | /* |
| 36 | -** Rewrite the current page with a robot squelch captcha and return 1. | |
| 36 | +** Values computed only once and then cached. | |
| 37 | +*/ | |
| 38 | +static struct RobotCache { | |
| 39 | + unsigned int h1, h2; /* Proof-of-work hash values */ | |
| 40 | + unsigned int resultCache; /* 0: unknown. 1: human 2: might-be-robot */ | |
| 41 | +} robot = { 0, 0, 0 }; | |
| 42 | + | |
| 43 | +/* | |
| 44 | +** Allowed values for robot.resultCache | |
| 45 | +*/ | |
| 46 | +#define KNOWN_NOT_ROBOT 1 | |
| 47 | +#define MIGHT_BE_ROBOT 2 | |
| 48 | + | |
| 49 | +/* | |
| 50 | +** Compute two hashes, robot.h1 and robot.h2, that are used as | |
| 51 | +** part of determining whether or not the HTTP client is a robot. | |
| 52 | +** These hashes are based on current time, client IP address, | |
| 53 | +** and User-Agent. robot.h1 is for the current time slot and | |
| 54 | +** robot.h2 is the previous. | |
| 37 | 55 | ** |
| 38 | -** Or, if valid proof-of-work is present as either a query parameter or | |
| 39 | -** as a cookie, then return 0. | |
| 56 | +** The hashes are integer values between 100,000,000 and 999,999,999 | |
| 57 | +** inclusive. | |
| 40 | 58 | */ |
| 41 | -static int robot_proofofwork(void){ | |
| 59 | +static void robot_pow_hash(void){ | |
| 60 | + const char *az[2], *z; | |
| 42 | 61 | sqlite3_int64 tm; |
| 43 | - unsigned h1, h2, p1, p2, p3, p4, p5, k2, k3; | |
| 44 | - int k; | |
| 45 | - const char *z; | |
| 46 | - const char *az[2]; | |
| 62 | + unsigned int h1, h2, k; | |
| 63 | + | |
| 64 | + if( robot.h1 ) return; /* Already computed */ | |
| 47 | 65 | |
| 48 | 66 | /* Construct a proof-of-work value based on the IP address of the |
| 49 | 67 | ** sender and the sender's user-agent string. The current time also |
| 50 | 68 | ** affects the pow value, so actually compute two values, one for the |
| 51 | 69 | ** current 900-second interval and one for the previous. Either can |
| 52 | 70 | ** match. The pow-value is an integer between 100,000,000 and |
| 53 | - ** 999,999,999. */ | |
| 71 | + ** 999,999,999. | |
| 72 | + */ | |
| 54 | 73 | az[0] = P("REMOTE_ADDR"); |
| 55 | 74 | az[1] = P("HTTP_USER_AGENT"); |
| 56 | 75 | tm = time(0); |
| 57 | - h1 = (unsigned)((tm&0xffffffff) / 900); | |
| 76 | + h1 = (unsigned)(tm/900)&0xffffffff; | |
| 58 | 77 | h2 = h1 - 1; |
| 59 | 78 | for(k=0; k<2; k++){ |
| 60 | 79 | z = az[k]; |
| 61 | 80 | if( z==0 ) continue; |
| 62 | 81 | while( *z ){ |
| @@ -63,35 +82,121 @@ | ||
| 63 | 82 | h1 = (h1 + *(unsigned char*)z)*0x9e3779b1; |
| 64 | 83 | h2 = (h2 + *(unsigned char*)z)*0x9e3779b1; |
| 65 | 84 | z++; |
| 66 | 85 | } |
| 67 | 86 | } |
| 68 | - h1 = (h1 % 900000000) + 100000000; | |
| 69 | - h2 = (h2 % 900000000) + 100000000; | |
| 70 | - | |
| 71 | - /* If there is already a proof-of-work cookie with this value | |
| 72 | - ** that means that the user agent has already authenticated. | |
| 73 | - */ | |
| 74 | - z = P(ROBOT_COOKIE); | |
| 75 | - if( z | |
| 76 | - && (atoi(z)==h1 || atoi(z)==h2) | |
| 77 | - && !cgi_is_qp(ROBOT_COOKIE) ){ | |
| 78 | - return 0; | |
| 79 | - } | |
| 80 | - | |
| 81 | - /* Check for a proof query parameter. If found, that means that | |
| 82 | - ** the captcha has just now passed, so set the proof-of-work cookie | |
| 83 | - ** in addition to letting the request through. | |
| 84 | - */ | |
| 85 | - z = P("proof"); | |
| 86 | - if( z | |
| 87 | - && (atoi(z)==h1 || atoi(z)==h2) | |
| 88 | - ){ | |
| 89 | - cgi_set_cookie(ROBOT_COOKIE,z,"/",900); | |
| 90 | - return 0; | |
| 91 | - } | |
| 92 | - cgi_tag_query_parameter("proof"); | |
| 87 | + robot.h1 = (h1 % 900000000) + 100000000; | |
| 88 | + robot.h2 = (h2 % 900000000) + 100000000; | |
| 89 | +} | |
| 90 | + | |
| 91 | +/* | |
| 92 | +** Return true if the HTTP client has not demonstrated that it is | |
| 93 | +** human interactive. Return false is the HTTP client might be | |
| 94 | +** a non-interactive robot. | |
| 95 | +** | |
| 96 | +** For this routine, any of the following is considered proof that | |
| 97 | +** the HTTP client is not a robot: | |
| 98 | +** | |
| 99 | +** 1. There is a valid login, including "anonymous". User "nobody" | |
| 100 | +** is not a valid login, but every other user is. | |
| 101 | +** | |
| 102 | +** 2. There exists a ROBOT_COOKIE with the correct proof-of-work | |
| 103 | +** value. | |
| 104 | +** | |
| 105 | +** 3. There exists a proof=VALUE query parameter where VALUE is | |
| 106 | +** a correct proof-of-work value. | |
| 107 | +** | |
| 108 | +** 4. There exists a valid token=VALUE query parameter. | |
| 109 | +** | |
| 110 | +** After being run once, this routine caches its findings and | |
| 111 | +** returns very quickly on subsequent invocations. | |
| 112 | +*/ | |
| 113 | +int client_might_be_a_robot(void){ | |
| 114 | + const char *z; | |
| 115 | + | |
| 116 | + /* Only do this computation once, then cache the results for future | |
| 117 | + ** use */ | |
| 118 | + if( robot.resultCache ){ | |
| 119 | + return robot.resultCache==MIGHT_BE_ROBOT; | |
| 120 | + } | |
| 121 | + | |
| 122 | + /* Condition 1: Is there a valid login? | |
| 123 | + */ | |
| 124 | + if( g.userUid==0 ){ | |
| 125 | + login_check_credentials(); | |
| 126 | + } | |
| 127 | + if( g.zLogin!=0 ){ | |
| 128 | + robot.resultCache = KNOWN_NOT_ROBOT; | |
| 129 | + return 0; | |
| 130 | + } | |
| 131 | + | |
| 132 | + /* Condition 2: If there is already a proof-of-work cookie | |
| 133 | + ** with a correct value, then the user agent has been authenticated. | |
| 134 | + */ | |
| 135 | + z = P(ROBOT_COOKIE); | |
| 136 | + if( z ){ | |
| 137 | + unsigned h = atoi(z); | |
| 138 | + robot_pow_hash(); | |
| 139 | + if( (h==robot.h1 || h==robot.h2) && !cgi_is_qp(ROBOT_COOKIE) ){ | |
| 140 | + robot.resultCache = KNOWN_NOT_ROBOT; | |
| 141 | + return 0; | |
| 142 | + } | |
| 143 | + } | |
| 144 | + | |
| 145 | + /* Condition 3: There is a "proof=VALUE" query parameter with a valid | |
| 146 | + ** VALUE attached. If this is the case, also set the robot cookie | |
| 147 | + ** so that future requests will hit condition 2 above. | |
| 148 | + */ | |
| 149 | + z = P("proof"); | |
| 150 | + if( z ){ | |
| 151 | + unsigned h = atoi(z); | |
| 152 | + robot_pow_hash(); | |
| 153 | + if( h==robot.h1 || h==robot.h2 ){ | |
| 154 | + cgi_set_cookie(ROBOT_COOKIE,z,"/",900); | |
| 155 | + robot.resultCache = KNOWN_NOT_ROBOT; | |
| 156 | + return 0; | |
| 157 | + } | |
| 158 | + cgi_tag_query_parameter("proof"); | |
| 159 | + } | |
| 160 | + | |
| 161 | + /* Condition 4: If there is a "token=VALUE" query parameter with a | |
| 162 | + ** valid VALUE argument, then assume that the request is coming from | |
| 163 | + ** either an interactive human session, or an authorized robot that we | |
| 164 | + ** want to treat as human. All it through and also set the robot cookie. | |
| 165 | + */ | |
| 166 | + z = P("token"); | |
| 167 | + if( z!=0 ){ | |
| 168 | + if( db_exists("SELECT 1 FROM config" | |
| 169 | + " WHERE name='token-%q'" | |
| 170 | + " AND json_valid(value,6)" | |
| 171 | + " AND value->>'user' IS NOT NULL", z) | |
| 172 | + ){ | |
| 173 | + char *zVal; | |
| 174 | + robot_pow_hash(); | |
| 175 | + zVal = mprintf("%u", robot.h1); | |
| 176 | + cgi_set_cookie(ROBOT_COOKIE,zVal,"/",900); | |
| 177 | + fossil_free(zVal); | |
| 178 | + robot.resultCache = KNOWN_NOT_ROBOT; | |
| 179 | + return 0; /* There is a valid token= query parameter */ | |
| 180 | + } | |
| 181 | + cgi_tag_query_parameter("token"); | |
| 182 | + } | |
| 183 | + | |
| 184 | + /* We have no proof that the request is coming from an interactive | |
| 185 | + ** human session, so assume the request comes from a robot. | |
| 186 | + */ | |
| 187 | + robot.resultCache = MIGHT_BE_ROBOT; | |
| 188 | + return 1; | |
| 189 | +} | |
| 190 | + | |
| 191 | +/* | |
| 192 | +** Rewrite the current page with content that attempts | |
| 193 | +** to prove that the client is not a robot. | |
| 194 | +*/ | |
| 195 | +static void ask_for_proof_that_client_is_not_robot(void){ | |
| 196 | + unsigned p1, p2, p3, p4, p5, k2, k3; | |
| 197 | + int k; | |
| 93 | 198 | |
| 94 | 199 | /* Ask the client to present proof-of-work */ |
| 95 | 200 | cgi_reset_content(); |
| 96 | 201 | cgi_set_content_type("text/html"); |
| 97 | 202 | style_header("Browser Verification"); |
| @@ -124,24 +229,24 @@ | ||
| 124 | 229 | @ aaa("x2").textContent="";\ |
| 125 | 230 | @ aaa("x3").style.display="none";\ |
| 126 | 231 | @ aaa("x1").textContent="Access Denied";\ |
| 127 | 232 | @ }\ |
| 128 | 233 | @ }\ |
| 129 | - k = 400 + h2%299; | |
| 130 | - k2 = (h2/299)%99 + 973; | |
| 131 | - k3 = (h2/(299*99))%99 + 811; | |
| 234 | + robot_pow_hash(); | |
| 235 | + k = 400 + robot.h2%299; | |
| 236 | + k2 = (robot.h2/299)%99 + 973; | |
| 237 | + k3 = (robot.h2/(299*99))%99 + 811; | |
| 132 | 238 | p1 = (k*k + k)/2; |
| 133 | - p2 = h1-p1; | |
| 239 | + p2 = robot.h1-p1; | |
| 134 | 240 | p3 = p2%k2; |
| 135 | 241 | p4 = (p2/k2)%k3; |
| 136 | 242 | p5 = p2/(k2*k3); |
| 137 | 243 | @ function ccc(a,b,c){return (a*%u(k3)+b)*%u(k2)+c;}\ |
| 138 | 244 | @ window.addEventListener('load',function(){\ |
| 139 | 245 | @ bbb(ccc(%u(p5),%u(p4),%u(p3)),%u(k));},false); |
| 140 | 246 | @ </script> |
| 141 | 247 | style_finish_page(); |
| 142 | - return 1; | |
| 143 | 248 | } |
| 144 | 249 | |
| 145 | 250 | /* |
| 146 | 251 | ** SETTING: robot-restrict width=40 block-text |
| 147 | 252 | ** The VALUE of this setting is a list of GLOB patterns that match |
| @@ -177,39 +282,26 @@ | ||
| 177 | 282 | ** page generation should be aborted. It returns false if the page |
| 178 | 283 | ** should not be restricted and should be rendered normally. |
| 179 | 284 | */ |
| 180 | 285 | int robot_restrict(const char *zPage){ |
| 181 | 286 | const char *zGlob; |
| 182 | - const char *zToken; | |
| 183 | 287 | static int bKnownPass = 0; |
| 184 | - if( g.zLogin ) return 0; /* Logged in users always get through */ | |
| 185 | - if( bKnownPass ) return 0; /* Already known to pass robot restrictions */ | |
| 288 | + | |
| 289 | + if( robot.resultCache==KNOWN_NOT_ROBOT ) return 0; | |
| 290 | + if( bKnownPass ) return 0; | |
| 186 | 291 | zGlob = db_get("robot-restrict",robot_restrict_default()); |
| 187 | 292 | if( zGlob==0 || zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){ |
| 188 | 293 | bKnownPass = 1; |
| 189 | 294 | return 0; /* Robot restriction is turned off */ |
| 190 | 295 | } |
| 191 | 296 | if( !glob_multi_match(zGlob, zPage) ) return 0; |
| 192 | - zToken = P("token"); | |
| 193 | - if( zToken!=0 | |
| 194 | - && db_exists("SELECT 1 FROM config" | |
| 195 | - " WHERE name='token-%q'" | |
| 196 | - " AND json_valid(value,6)" | |
| 197 | - " AND value->>'user' IS NOT NULL", zToken) | |
| 198 | - ){ | |
| 199 | - bKnownPass = 1; | |
| 200 | - return 0; /* There is a valid token= query parameter */ | |
| 201 | - } | |
| 202 | - if( robot_proofofwork() ){ | |
| 203 | - /* A captcha was generated. Abort this page. A redirect will occur | |
| 204 | - ** if the captcha passes. */ | |
| 205 | - return 1; | |
| 206 | - } | |
| 207 | - bKnownPass = 1; | |
| 208 | - return 0; | |
| 209 | -} | |
| 210 | - | |
| 297 | + if( !client_might_be_a_robot() ) return 0; | |
| 298 | + | |
| 299 | + /* Generate the proof-of-work captcha */ | |
| 300 | + ask_for_proof_that_client_is_not_robot(); | |
| 301 | + return 1; | |
| 302 | +} | |
| 211 | 303 | |
| 212 | 304 | /* |
| 213 | 305 | ** WEBPAGE: test-robotck |
| 214 | 306 | ** |
| 215 | 307 | ** Run the robot_restrict() function using the value of the "name=" |
| @@ -240,13 +332,31 @@ | ||
| 240 | 332 | } |
| 241 | 333 | if( zP2 && zP2[0] ){ |
| 242 | 334 | @ %h(ROBOT_COOKIE)=%h(zP2)<br> |
| 243 | 335 | cgi_set_cookie(ROBOT_COOKIE,"",0,-1); |
| 244 | 336 | } |
| 245 | - z = db_get("robot-restrict",robot_restrict_default()); | |
| 246 | - if( z && z[0] ){ | |
| 247 | - @ robot-restrict=%h(z)</br> | |
| 337 | + if( g.perm.Admin ){ | |
| 338 | + z = db_get("robot-restrict",robot_restrict_default()); | |
| 339 | + if( z && z[0] ){ | |
| 340 | + @ robot-restrict=%h(z)</br> | |
| 341 | + } | |
| 342 | + @ robot.h1=%u(robot.h1)<br> | |
| 343 | + @ robot.h2=%u(robot.h2)<br> | |
| 344 | + switch( robot.resultCache ){ | |
| 345 | + case MIGHT_BE_ROBOT: { | |
| 346 | + @ robot.resultCache=MIGHT_BE_ROBOT<br> | |
| 347 | + break; | |
| 348 | + } | |
| 349 | + case KNOWN_NOT_ROBOT: { | |
| 350 | + @ robot.resultCache=KNOWN_NOT_ROBOT<br> | |
| 351 | + break; | |
| 352 | + } | |
| 353 | + default: { | |
| 354 | + @ robot.resultCache=OTHER (%d(robot.resultCache))<br> | |
| 355 | + break; | |
| 356 | + } | |
| 357 | + } | |
| 248 | 358 | } |
| 249 | 359 | @ </p> |
| 250 | 360 | @ <p><a href="%R/test-robotck/%h(zName)">Retry</a> |
| 251 | 361 | style_finish_page(); |
| 252 | 362 | } |
| 253 | 363 |
| --- src/robot.c | |
| +++ src/robot.c | |
| @@ -31,32 +31,51 @@ | |
| 31 | #if INTERFACE |
| 32 | #define ROBOT_COOKIE "fossil-client-ok" |
| 33 | #endif |
| 34 | |
| 35 | /* |
| 36 | ** Rewrite the current page with a robot squelch captcha and return 1. |
| 37 | ** |
| 38 | ** Or, if valid proof-of-work is present as either a query parameter or |
| 39 | ** as a cookie, then return 0. |
| 40 | */ |
| 41 | static int robot_proofofwork(void){ |
| 42 | sqlite3_int64 tm; |
| 43 | unsigned h1, h2, p1, p2, p3, p4, p5, k2, k3; |
| 44 | int k; |
| 45 | const char *z; |
| 46 | const char *az[2]; |
| 47 | |
| 48 | /* Construct a proof-of-work value based on the IP address of the |
| 49 | ** sender and the sender's user-agent string. The current time also |
| 50 | ** affects the pow value, so actually compute two values, one for the |
| 51 | ** current 900-second interval and one for the previous. Either can |
| 52 | ** match. The pow-value is an integer between 100,000,000 and |
| 53 | ** 999,999,999. */ |
| 54 | az[0] = P("REMOTE_ADDR"); |
| 55 | az[1] = P("HTTP_USER_AGENT"); |
| 56 | tm = time(0); |
| 57 | h1 = (unsigned)((tm&0xffffffff) / 900); |
| 58 | h2 = h1 - 1; |
| 59 | for(k=0; k<2; k++){ |
| 60 | z = az[k]; |
| 61 | if( z==0 ) continue; |
| 62 | while( *z ){ |
| @@ -63,35 +82,121 @@ | |
| 63 | h1 = (h1 + *(unsigned char*)z)*0x9e3779b1; |
| 64 | h2 = (h2 + *(unsigned char*)z)*0x9e3779b1; |
| 65 | z++; |
| 66 | } |
| 67 | } |
| 68 | h1 = (h1 % 900000000) + 100000000; |
| 69 | h2 = (h2 % 900000000) + 100000000; |
| 70 | |
| 71 | /* If there is already a proof-of-work cookie with this value |
| 72 | ** that means that the user agent has already authenticated. |
| 73 | */ |
| 74 | z = P(ROBOT_COOKIE); |
| 75 | if( z |
| 76 | && (atoi(z)==h1 || atoi(z)==h2) |
| 77 | && !cgi_is_qp(ROBOT_COOKIE) ){ |
| 78 | return 0; |
| 79 | } |
| 80 | |
| 81 | /* Check for a proof query parameter. If found, that means that |
| 82 | ** the captcha has just now passed, so set the proof-of-work cookie |
| 83 | ** in addition to letting the request through. |
| 84 | */ |
| 85 | z = P("proof"); |
| 86 | if( z |
| 87 | && (atoi(z)==h1 || atoi(z)==h2) |
| 88 | ){ |
| 89 | cgi_set_cookie(ROBOT_COOKIE,z,"/",900); |
| 90 | return 0; |
| 91 | } |
| 92 | cgi_tag_query_parameter("proof"); |
| 93 | |
| 94 | /* Ask the client to present proof-of-work */ |
| 95 | cgi_reset_content(); |
| 96 | cgi_set_content_type("text/html"); |
| 97 | style_header("Browser Verification"); |
| @@ -124,24 +229,24 @@ | |
| 124 | @ aaa("x2").textContent="";\ |
| 125 | @ aaa("x3").style.display="none";\ |
| 126 | @ aaa("x1").textContent="Access Denied";\ |
| 127 | @ }\ |
| 128 | @ }\ |
| 129 | k = 400 + h2%299; |
| 130 | k2 = (h2/299)%99 + 973; |
| 131 | k3 = (h2/(299*99))%99 + 811; |
| 132 | p1 = (k*k + k)/2; |
| 133 | p2 = h1-p1; |
| 134 | p3 = p2%k2; |
| 135 | p4 = (p2/k2)%k3; |
| 136 | p5 = p2/(k2*k3); |
| 137 | @ function ccc(a,b,c){return (a*%u(k3)+b)*%u(k2)+c;}\ |
| 138 | @ window.addEventListener('load',function(){\ |
| 139 | @ bbb(ccc(%u(p5),%u(p4),%u(p3)),%u(k));},false); |
| 140 | @ </script> |
| 141 | style_finish_page(); |
| 142 | return 1; |
| 143 | } |
| 144 | |
| 145 | /* |
| 146 | ** SETTING: robot-restrict width=40 block-text |
| 147 | ** The VALUE of this setting is a list of GLOB patterns that match |
| @@ -177,39 +282,26 @@ | |
| 177 | ** page generation should be aborted. It returns false if the page |
| 178 | ** should not be restricted and should be rendered normally. |
| 179 | */ |
| 180 | int robot_restrict(const char *zPage){ |
| 181 | const char *zGlob; |
| 182 | const char *zToken; |
| 183 | static int bKnownPass = 0; |
| 184 | if( g.zLogin ) return 0; /* Logged in users always get through */ |
| 185 | if( bKnownPass ) return 0; /* Already known to pass robot restrictions */ |
| 186 | zGlob = db_get("robot-restrict",robot_restrict_default()); |
| 187 | if( zGlob==0 || zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){ |
| 188 | bKnownPass = 1; |
| 189 | return 0; /* Robot restriction is turned off */ |
| 190 | } |
| 191 | if( !glob_multi_match(zGlob, zPage) ) return 0; |
| 192 | zToken = P("token"); |
| 193 | if( zToken!=0 |
| 194 | && db_exists("SELECT 1 FROM config" |
| 195 | " WHERE name='token-%q'" |
| 196 | " AND json_valid(value,6)" |
| 197 | " AND value->>'user' IS NOT NULL", zToken) |
| 198 | ){ |
| 199 | bKnownPass = 1; |
| 200 | return 0; /* There is a valid token= query parameter */ |
| 201 | } |
| 202 | if( robot_proofofwork() ){ |
| 203 | /* A captcha was generated. Abort this page. A redirect will occur |
| 204 | ** if the captcha passes. */ |
| 205 | return 1; |
| 206 | } |
| 207 | bKnownPass = 1; |
| 208 | return 0; |
| 209 | } |
| 210 | |
| 211 | |
| 212 | /* |
| 213 | ** WEBPAGE: test-robotck |
| 214 | ** |
| 215 | ** Run the robot_restrict() function using the value of the "name=" |
| @@ -240,13 +332,31 @@ | |
| 240 | } |
| 241 | if( zP2 && zP2[0] ){ |
| 242 | @ %h(ROBOT_COOKIE)=%h(zP2)<br> |
| 243 | cgi_set_cookie(ROBOT_COOKIE,"",0,-1); |
| 244 | } |
| 245 | z = db_get("robot-restrict",robot_restrict_default()); |
| 246 | if( z && z[0] ){ |
| 247 | @ robot-restrict=%h(z)</br> |
| 248 | } |
| 249 | @ </p> |
| 250 | @ <p><a href="%R/test-robotck/%h(zName)">Retry</a> |
| 251 | style_finish_page(); |
| 252 | } |
| 253 |
| --- src/robot.c | |
| +++ src/robot.c | |
| @@ -31,32 +31,51 @@ | |
| 31 | #if INTERFACE |
| 32 | #define ROBOT_COOKIE "fossil-client-ok" |
| 33 | #endif |
| 34 | |
| 35 | /* |
| 36 | ** Values computed only once and then cached. |
| 37 | */ |
| 38 | static struct RobotCache { |
| 39 | unsigned int h1, h2; /* Proof-of-work hash values */ |
| 40 | unsigned int resultCache; /* 0: unknown. 1: human 2: might-be-robot */ |
| 41 | } robot = { 0, 0, 0 }; |
| 42 | |
| 43 | /* |
| 44 | ** Allowed values for robot.resultCache |
| 45 | */ |
| 46 | #define KNOWN_NOT_ROBOT 1 |
| 47 | #define MIGHT_BE_ROBOT 2 |
| 48 | |
| 49 | /* |
| 50 | ** Compute two hashes, robot.h1 and robot.h2, that are used as |
| 51 | ** part of determining whether or not the HTTP client is a robot. |
| 52 | ** These hashes are based on current time, client IP address, |
| 53 | ** and User-Agent. robot.h1 is for the current time slot and |
| 54 | ** robot.h2 is the previous. |
| 55 | ** |
| 56 | ** The hashes are integer values between 100,000,000 and 999,999,999 |
| 57 | ** inclusive. |
| 58 | */ |
| 59 | static void robot_pow_hash(void){ |
| 60 | const char *az[2], *z; |
| 61 | sqlite3_int64 tm; |
| 62 | unsigned int h1, h2, k; |
| 63 | |
| 64 | if( robot.h1 ) return; /* Already computed */ |
| 65 | |
| 66 | /* Construct a proof-of-work value based on the IP address of the |
| 67 | ** sender and the sender's user-agent string. The current time also |
| 68 | ** affects the pow value, so actually compute two values, one for the |
| 69 | ** current 900-second interval and one for the previous. Either can |
| 70 | ** match. The pow-value is an integer between 100,000,000 and |
| 71 | ** 999,999,999. |
| 72 | */ |
| 73 | az[0] = P("REMOTE_ADDR"); |
| 74 | az[1] = P("HTTP_USER_AGENT"); |
| 75 | tm = time(0); |
| 76 | h1 = (unsigned)(tm/900)&0xffffffff; |
| 77 | h2 = h1 - 1; |
| 78 | for(k=0; k<2; k++){ |
| 79 | z = az[k]; |
| 80 | if( z==0 ) continue; |
| 81 | while( *z ){ |
| @@ -63,35 +82,121 @@ | |
| 82 | h1 = (h1 + *(unsigned char*)z)*0x9e3779b1; |
| 83 | h2 = (h2 + *(unsigned char*)z)*0x9e3779b1; |
| 84 | z++; |
| 85 | } |
| 86 | } |
| 87 | robot.h1 = (h1 % 900000000) + 100000000; |
| 88 | robot.h2 = (h2 % 900000000) + 100000000; |
| 89 | } |
| 90 | |
| 91 | /* |
| 92 | ** Return true if the HTTP client has not demonstrated that it is |
| 93 | ** human interactive. Return false is the HTTP client might be |
| 94 | ** a non-interactive robot. |
| 95 | ** |
| 96 | ** For this routine, any of the following is considered proof that |
| 97 | ** the HTTP client is not a robot: |
| 98 | ** |
| 99 | ** 1. There is a valid login, including "anonymous". User "nobody" |
| 100 | ** is not a valid login, but every other user is. |
| 101 | ** |
| 102 | ** 2. There exists a ROBOT_COOKIE with the correct proof-of-work |
| 103 | ** value. |
| 104 | ** |
| 105 | ** 3. There exists a proof=VALUE query parameter where VALUE is |
| 106 | ** a correct proof-of-work value. |
| 107 | ** |
| 108 | ** 4. There exists a valid token=VALUE query parameter. |
| 109 | ** |
| 110 | ** After being run once, this routine caches its findings and |
| 111 | ** returns very quickly on subsequent invocations. |
| 112 | */ |
| 113 | int client_might_be_a_robot(void){ |
| 114 | const char *z; |
| 115 | |
| 116 | /* Only do this computation once, then cache the results for future |
| 117 | ** use */ |
| 118 | if( robot.resultCache ){ |
| 119 | return robot.resultCache==MIGHT_BE_ROBOT; |
| 120 | } |
| 121 | |
| 122 | /* Condition 1: Is there a valid login? |
| 123 | */ |
| 124 | if( g.userUid==0 ){ |
| 125 | login_check_credentials(); |
| 126 | } |
| 127 | if( g.zLogin!=0 ){ |
| 128 | robot.resultCache = KNOWN_NOT_ROBOT; |
| 129 | return 0; |
| 130 | } |
| 131 | |
| 132 | /* Condition 2: If there is already a proof-of-work cookie |
| 133 | ** with a correct value, then the user agent has been authenticated. |
| 134 | */ |
| 135 | z = P(ROBOT_COOKIE); |
| 136 | if( z ){ |
| 137 | unsigned h = atoi(z); |
| 138 | robot_pow_hash(); |
| 139 | if( (h==robot.h1 || h==robot.h2) && !cgi_is_qp(ROBOT_COOKIE) ){ |
| 140 | robot.resultCache = KNOWN_NOT_ROBOT; |
| 141 | return 0; |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | /* Condition 3: There is a "proof=VALUE" query parameter with a valid |
| 146 | ** VALUE attached. If this is the case, also set the robot cookie |
| 147 | ** so that future requests will hit condition 2 above. |
| 148 | */ |
| 149 | z = P("proof"); |
| 150 | if( z ){ |
| 151 | unsigned h = atoi(z); |
| 152 | robot_pow_hash(); |
| 153 | if( h==robot.h1 || h==robot.h2 ){ |
| 154 | cgi_set_cookie(ROBOT_COOKIE,z,"/",900); |
| 155 | robot.resultCache = KNOWN_NOT_ROBOT; |
| 156 | return 0; |
| 157 | } |
| 158 | cgi_tag_query_parameter("proof"); |
| 159 | } |
| 160 | |
| 161 | /* Condition 4: If there is a "token=VALUE" query parameter with a |
| 162 | ** valid VALUE argument, then assume that the request is coming from |
| 163 | ** either an interactive human session, or an authorized robot that we |
| 164 | ** want to treat as human. All it through and also set the robot cookie. |
| 165 | */ |
| 166 | z = P("token"); |
| 167 | if( z!=0 ){ |
| 168 | if( db_exists("SELECT 1 FROM config" |
| 169 | " WHERE name='token-%q'" |
| 170 | " AND json_valid(value,6)" |
| 171 | " AND value->>'user' IS NOT NULL", z) |
| 172 | ){ |
| 173 | char *zVal; |
| 174 | robot_pow_hash(); |
| 175 | zVal = mprintf("%u", robot.h1); |
| 176 | cgi_set_cookie(ROBOT_COOKIE,zVal,"/",900); |
| 177 | fossil_free(zVal); |
| 178 | robot.resultCache = KNOWN_NOT_ROBOT; |
| 179 | return 0; /* There is a valid token= query parameter */ |
| 180 | } |
| 181 | cgi_tag_query_parameter("token"); |
| 182 | } |
| 183 | |
| 184 | /* We have no proof that the request is coming from an interactive |
| 185 | ** human session, so assume the request comes from a robot. |
| 186 | */ |
| 187 | robot.resultCache = MIGHT_BE_ROBOT; |
| 188 | return 1; |
| 189 | } |
| 190 | |
| 191 | /* |
| 192 | ** Rewrite the current page with content that attempts |
| 193 | ** to prove that the client is not a robot. |
| 194 | */ |
| 195 | static void ask_for_proof_that_client_is_not_robot(void){ |
| 196 | unsigned p1, p2, p3, p4, p5, k2, k3; |
| 197 | int k; |
| 198 | |
| 199 | /* Ask the client to present proof-of-work */ |
| 200 | cgi_reset_content(); |
| 201 | cgi_set_content_type("text/html"); |
| 202 | style_header("Browser Verification"); |
| @@ -124,24 +229,24 @@ | |
| 229 | @ aaa("x2").textContent="";\ |
| 230 | @ aaa("x3").style.display="none";\ |
| 231 | @ aaa("x1").textContent="Access Denied";\ |
| 232 | @ }\ |
| 233 | @ }\ |
| 234 | robot_pow_hash(); |
| 235 | k = 400 + robot.h2%299; |
| 236 | k2 = (robot.h2/299)%99 + 973; |
| 237 | k3 = (robot.h2/(299*99))%99 + 811; |
| 238 | p1 = (k*k + k)/2; |
| 239 | p2 = robot.h1-p1; |
| 240 | p3 = p2%k2; |
| 241 | p4 = (p2/k2)%k3; |
| 242 | p5 = p2/(k2*k3); |
| 243 | @ function ccc(a,b,c){return (a*%u(k3)+b)*%u(k2)+c;}\ |
| 244 | @ window.addEventListener('load',function(){\ |
| 245 | @ bbb(ccc(%u(p5),%u(p4),%u(p3)),%u(k));},false); |
| 246 | @ </script> |
| 247 | style_finish_page(); |
| 248 | } |
| 249 | |
| 250 | /* |
| 251 | ** SETTING: robot-restrict width=40 block-text |
| 252 | ** The VALUE of this setting is a list of GLOB patterns that match |
| @@ -177,39 +282,26 @@ | |
| 282 | ** page generation should be aborted. It returns false if the page |
| 283 | ** should not be restricted and should be rendered normally. |
| 284 | */ |
| 285 | int robot_restrict(const char *zPage){ |
| 286 | const char *zGlob; |
| 287 | static int bKnownPass = 0; |
| 288 | |
| 289 | if( robot.resultCache==KNOWN_NOT_ROBOT ) return 0; |
| 290 | if( bKnownPass ) return 0; |
| 291 | zGlob = db_get("robot-restrict",robot_restrict_default()); |
| 292 | if( zGlob==0 || zGlob[0]==0 || fossil_strcmp(zGlob, "off")==0 ){ |
| 293 | bKnownPass = 1; |
| 294 | return 0; /* Robot restriction is turned off */ |
| 295 | } |
| 296 | if( !glob_multi_match(zGlob, zPage) ) return 0; |
| 297 | if( !client_might_be_a_robot() ) return 0; |
| 298 | |
| 299 | /* Generate the proof-of-work captcha */ |
| 300 | ask_for_proof_that_client_is_not_robot(); |
| 301 | return 1; |
| 302 | } |
| 303 | |
| 304 | /* |
| 305 | ** WEBPAGE: test-robotck |
| 306 | ** |
| 307 | ** Run the robot_restrict() function using the value of the "name=" |
| @@ -240,13 +332,31 @@ | |
| 332 | } |
| 333 | if( zP2 && zP2[0] ){ |
| 334 | @ %h(ROBOT_COOKIE)=%h(zP2)<br> |
| 335 | cgi_set_cookie(ROBOT_COOKIE,"",0,-1); |
| 336 | } |
| 337 | if( g.perm.Admin ){ |
| 338 | z = db_get("robot-restrict",robot_restrict_default()); |
| 339 | if( z && z[0] ){ |
| 340 | @ robot-restrict=%h(z)</br> |
| 341 | } |
| 342 | @ robot.h1=%u(robot.h1)<br> |
| 343 | @ robot.h2=%u(robot.h2)<br> |
| 344 | switch( robot.resultCache ){ |
| 345 | case MIGHT_BE_ROBOT: { |
| 346 | @ robot.resultCache=MIGHT_BE_ROBOT<br> |
| 347 | break; |
| 348 | } |
| 349 | case KNOWN_NOT_ROBOT: { |
| 350 | @ robot.resultCache=KNOWN_NOT_ROBOT<br> |
| 351 | break; |
| 352 | } |
| 353 | default: { |
| 354 | @ robot.resultCache=OTHER (%d(robot.resultCache))<br> |
| 355 | break; |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | @ </p> |
| 360 | @ <p><a href="%R/test-robotck/%h(zName)">Retry</a> |
| 361 | style_finish_page(); |
| 362 | } |
| 363 |