| | @@ -39,14 +39,19 @@ |
| 39 | 39 | unsigned int h1, h2; /* Proof-of-work hash values */ |
| 40 | 40 | unsigned int resultCache; /* 0: unknown. 1: human 2: might-be-robot */ |
| 41 | 41 | } robot = { 0, 0, 0 }; |
| 42 | 42 | |
| 43 | 43 | /* |
| 44 | | -** Allowed values for robot.resultCache |
| 44 | +** Allowed values for robot.resultCache. |
| 45 | +** |
| 46 | +** The names are slightly misleading. KNOWN_NOT_ROBOT might be set even |
| 47 | +** if the client is a robot, but only if the robot is an approved robot. |
| 48 | +** A better name might be "KNOWN_NOT_UNAUTHORIZED_ROBOT", but that is too |
| 49 | +** long of a name. |
| 45 | 50 | */ |
| 46 | | -#define KNOWN_NOT_ROBOT 1 |
| 47 | | -#define MIGHT_BE_ROBOT 2 |
| 51 | +#define KNOWN_NOT_ROBOT 1 /* Approved to consume CPU and bandwidth */ |
| 52 | +#define MIGHT_BE_ROBOT 2 /* Might be an unapproved robot */ |
| 48 | 53 | |
| 49 | 54 | /* |
| 50 | 55 | ** Compute two hashes, robot.h1 and robot.h2, that are used as |
| 51 | 56 | ** part of determining whether or not the HTTP client is a robot. |
| 52 | 57 | ** These hashes are based on current time, client IP address, |
| | @@ -265,11 +270,13 @@ |
| 265 | 270 | ** The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and |
| 266 | 271 | ** /vpatch. The "annotate" tag also covers /blame and /praise. "zip" |
| 267 | 272 | ** also covers /tarball and /sqlar. If a tag has an "X" character appended, |
| 268 | 273 | ** then it only applies if query parameters are such that the page is |
| 269 | 274 | ** particularly difficult to compute. In all other case, the tag should |
| 270 | | -** exactly match the page name. |
| 275 | +** exactly match the page name. Useful "X" tags include "timelineX" |
| 276 | +** and "zipX". See the robot-zip-leaf and robot-zip-tag settings |
| 277 | +** for additional controls associated with the "zipX" restriction. |
| 271 | 278 | ** |
| 272 | 279 | ** Change this setting "off" to disable all robot restrictions. |
| 273 | 280 | */ |
| 274 | 281 | /* |
| 275 | 282 | ** SETTING: robot-exception width=40 block-text |
| | @@ -287,10 +294,28 @@ |
| 287 | 294 | ** This setting can hold multiple regular expressions, one |
| 288 | 295 | ** regular expression per line. The input URL is exempted from |
| 289 | 296 | ** anti-robot defenses if any of the multiple regular expressions |
| 290 | 297 | ** matches. |
| 291 | 298 | */ |
| 299 | +/* |
| 300 | +** SETTING: robot-zip-leaf boolean |
| 301 | +** |
| 302 | +** If this setting is true, the robots are allowed to download tarballs, |
| 303 | +** ZIP-archives, and SQL-archives even though "zipX" is found in |
| 304 | +** the robot-restrict setting as long as the specific check-in being |
| 305 | +** downloaded is a leaf check-in. |
| 306 | +*/ |
| 307 | +/* |
| 308 | +** SETTING: robot-zip-tag width=40 block-text |
| 309 | +** |
| 310 | +** If this setting is a list of GLOB patterns matching tags, |
| 311 | +** then robots are allowed to download tarballs, ZIP-archives, and |
| 312 | +** SQL-archives even though "zipX" appears in robot-restrict, as long as |
| 313 | +** the specific check-in being downloaded has a tags that matches |
| 314 | +** the GLOB list of this setting. Recommended value: |
| 315 | +** "release,robot-access". |
| 316 | +*/ |
| 292 | 317 | |
| 293 | 318 | /* |
| 294 | 319 | ** Return the default restriction GLOB |
| 295 | 320 | */ |
| 296 | 321 | const char *robot_restrict_default(void){ |
| | @@ -405,10 +430,67 @@ |
| 405 | 430 | |
| 406 | 431 | /* Generate the proof-of-work captcha */ |
| 407 | 432 | ask_for_proof_that_client_is_not_robot(); |
| 408 | 433 | return 1; |
| 409 | 434 | } |
| 435 | + |
| 436 | +/* |
| 437 | +** Check to see if a robot is allowed to download a tarball, ZIP archive, |
| 438 | +** or SQL Archive for a particular check-in identified by the "rid" |
| 439 | +** argument. Return true to block the download. Return false to |
| 440 | +** continue. Prior to returning true, a captcha is presented to the user. |
| 441 | +** No output is generated when returning false. |
| 442 | +** |
| 443 | +** The rules: |
| 444 | +** |
| 445 | +** (1) If "zipX" is missing from the robot-restrict setting, then robots |
| 446 | +** are allowed to download any archive. None of the remaining rules |
| 447 | +** below are consulted unless "zipX" is on the robot-restrict setting. |
| 448 | +** |
| 449 | +** (2) If the robot-zip-leaf setting is true, then robots are allowed |
| 450 | +** to download archives for any leaf check-in. This allows URL like |
| 451 | +** /tarball/trunk/archive.tar.gz to work since branch labels like "trunk" |
| 452 | +** always resolve to a leaf. |
| 453 | +** |
| 454 | +** (3) If the robot-zip-tag setting is a comma-separated tags, then any |
| 455 | +** check-in that contains one of the tags on that list is allowed to |
| 456 | +** be downloaded. This allows check-ins with tags like "release" or |
| 457 | +** "robot-access" to be downloaded by robots. |
| 458 | +*/ |
| 459 | +int robot_restrict_zip(int rid){ |
| 460 | + const char *zTag; |
| 461 | + if( !robot_restrict_has_tag("zipX") || !client_might_be_a_robot() ){ |
| 462 | + return 0; /* Rule (1) */ |
| 463 | + } |
| 464 | + |
| 465 | + if( db_get_boolean("robot-zip-leaf",0) && is_a_leaf(rid) ){ |
| 466 | + return 0; /* Rule (2) */ |
| 467 | + } |
| 468 | + |
| 469 | + zTag = db_get("robot-zip-tag",0); |
| 470 | + if( zTag && zTag[0] && fossil_strcmp(zTag,"off")!=0 ){ |
| 471 | + int ok = 0; |
| 472 | + Stmt q; |
| 473 | + db_prepare(&q, |
| 474 | + "SELECT substr(tagname,5) FROM tagxref, tag" |
| 475 | + " WHERE tagxref.rid=%d" |
| 476 | + " AND tag.tagid=tagxref.tagid" |
| 477 | + " AND tagxref.tagtype=1" |
| 478 | + " AND tag.tagname GLOB 'sym-*'", |
| 479 | + rid |
| 480 | + ); |
| 481 | + while( !ok && db_step(&q)==SQLITE_ROW ){ |
| 482 | + if( glob_multi_match(zTag, db_column_text(&q,0)) ) ok = 1; |
| 483 | + } |
| 484 | + db_finalize(&q); |
| 485 | + if( ok ) return 0; /* Rule (3) */ |
| 486 | + } |
| 487 | + |
| 488 | + /* Generate the proof-of-work captcha */ |
| 489 | + ask_for_proof_that_client_is_not_robot(); |
| 490 | + return 1; |
| 491 | +} |
| 410 | 492 | |
| 411 | 493 | /* |
| 412 | 494 | ** WEBPAGE: test-robotck |
| 413 | 495 | ** |
| 414 | 496 | ** Run the robot_restrict() function using the value of the "name=" |
| | @@ -416,21 +498,30 @@ |
| 416 | 498 | ** logic. |
| 417 | 499 | ** |
| 418 | 500 | ** Whenever this page is successfully rendered (when it doesn't go to |
| 419 | 501 | ** the captcha) it deletes the proof-of-work cookie. So reloading the |
| 420 | 502 | ** page will reset the cookie and restart the verification. |
| 503 | +** |
| 504 | +** If the zip=CHECKIN query parameter is provided, then also invoke |
| 505 | +** robot_restrict_archive() on the RID of CHECKIN. |
| 421 | 506 | */ |
| 422 | 507 | void robot_restrict_test_page(void){ |
| 423 | 508 | const char *zName = P("name"); |
| 509 | + const char *zZip = P("zip"); |
| 424 | 510 | const char *zP1 = P("proof"); |
| 425 | 511 | const char *zP2 = P(ROBOT_COOKIE); |
| 426 | 512 | const char *z; |
| 513 | + int rid = 0; |
| 427 | 514 | if( zName==0 || zName[0]==0 ) zName = g.zPath; |
| 428 | 515 | login_check_credentials(); |
| 429 | 516 | if( g.zLogin==0 ){ login_needed(1); return; } |
| 430 | 517 | g.zLogin = 0; |
| 431 | 518 | if( robot_restrict(zName) ) return; |
| 519 | + if( zZip && zZip[0] ){ |
| 520 | + rid = symbolic_name_to_rid(zZip, "ci"); |
| 521 | + if( rid && robot_restrict_zip(rid) ) return; |
| 522 | + } |
| 432 | 523 | style_set_current_feature("test"); |
| 433 | 524 | style_header("robot_restrict() test"); |
| 434 | 525 | @ <h1>Captcha passed</h1> |
| 435 | 526 | @ |
| 436 | 527 | @ <p> |
| | @@ -438,10 +529,14 @@ |
| 438 | 529 | @ proof=%h(zP1)<br> |
| 439 | 530 | } |
| 440 | 531 | if( zP2 && zP2[0] ){ |
| 441 | 532 | @ %h(ROBOT_COOKIE)=%h(zP2)<br> |
| 442 | 533 | cgi_set_cookie(ROBOT_COOKIE,"",0,-1); |
| 534 | + } |
| 535 | + if( zZip && zZip[0] ){ |
| 536 | + @ zip=%h(zZip)<br> |
| 537 | + @ rid=%d(rid)<br> |
| 443 | 538 | } |
| 444 | 539 | if( g.perm.Admin ){ |
| 445 | 540 | z = db_get("robot-restrict",robot_restrict_default()); |
| 446 | 541 | if( z && z[0] ){ |
| 447 | 542 | @ robot-restrict=%h(z)</br> |
| 448 | 543 | |