Fossil SCM
Updates to the Robot Defense Settings page to make it easier to configure the latest defensive options.
Commit
7f846263708c7766f9bff1c7f72c53e43491bf2c39423b23e31155fd91a18ad1
Parent
da43fd9f06275e6…
1 file changed
+24
-16
+24
-16
| --- src/setup.c | ||
| +++ src/setup.c | ||
| @@ -470,12 +470,12 @@ | ||
| 470 | 470 | @ <p>A Fossil website can have billions of pages in its tree, even for a |
| 471 | 471 | @ modest project. Many of those pages (examples: diffs and tarballs) |
| 472 | 472 | @ might be expensive to compute. A robot that tries to walk the entire |
| 473 | 473 | @ website can present a crippling CPU and bandwidth load. |
| 474 | 474 | @ |
| 475 | - @ <p>The settings on this page are intended to help site administrators | |
| 476 | - @ defend the site against robots. | |
| 475 | + @ <p>The settings on this page are intended to help administrators | |
| 476 | + @ defend against abusive robots. | |
| 477 | 477 | @ |
| 478 | 478 | @ <form action="%R/setup_robot" method="post"><div> |
| 479 | 479 | login_insert_csrf_secret(); |
| 480 | 480 | @ <input type="submit" name="submit" value="Apply Changes"></p> |
| 481 | 481 | @ <hr> |
| @@ -482,43 +482,51 @@ | ||
| 482 | 482 | @ <p><b>Do not allow robots access to these pages.</b><br> |
| 483 | 483 | @ If the page name matches the GLOB pattern of this setting, and the |
| 484 | 484 | @ users is "nobody", and the client has not previously passed a captcha |
| 485 | 485 | @ test to show that it is not a robot, then the page is not displayed. |
| 486 | 486 | @ A captcha test is is rendered instead. |
| 487 | - @ The recommended value for this setting is: | |
| 487 | + @ The default value for this setting is: | |
| 488 | 488 | @ <p> |
| 489 | 489 | @    <tt>%h(robot_restrict_default())</tt> |
| 490 | 490 | @ <p> |
| 491 | 491 | @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and |
| 492 | 492 | @ /vpatch. The "annotate" tag covers /annotate and also /blame and |
| 493 | 493 | @ /praise. The "zip" covers itself and also /tarball and /sqlar. If a |
| 494 | 494 | @ tag has an "X" character appended, then it only applies if query |
| 495 | - @ parameters are such that the page is particularly difficult to compute. | |
| 495 | + @ parameters are such that the page is expensive and/or unusual. | |
| 496 | 496 | @ In all other case, the tag should exactly match the page name. |
| 497 | 497 | @ |
| 498 | 498 | @ To disable robot restrictions, change this setting to "off". |
| 499 | 499 | @ (Property: robot-restrict) |
| 500 | 500 | @ <br> |
| 501 | 501 | textarea_attribute("", 2, 80, |
| 502 | 502 | "robot-restrict", "rbrestrict", robot_restrict_default(), 0); |
| 503 | 503 | |
| 504 | - @ <hr> | |
| 505 | - @ <p><b>Exceptions to anti-robot restrictions</b><br> | |
| 506 | - @ The entry below is a list of | |
| 507 | - @ <a href="%R/re_rules">regular expressions</a>, one per line. | |
| 508 | - @ If any of these regular expressions match the input URL, then the | |
| 509 | - @ request is exempt from anti-robot defenses. Use this, for example, | |
| 510 | - @ to allow scripts to download release tarballs using a pattern | |
| 511 | - @ like:</p> | |
| 512 | - @ <p> | |
| 513 | - @   <tt>^/tarball/(version-[0-9.]+|release)/</tt> | |
| 514 | - @ <p>The pattern should match against the REQUEST_URI with the | |
| 504 | + @ <p><b>Exception #1</b><br> | |
| 505 | + @ If "zipX" appears in the robot-restrict list above, then tarballs, | |
| 506 | + @ ZIP-archives, and SQL-archives may be downloaded by robots if | |
| 507 | + @ the check-in is a leaf (robot-zip-leaf):<br> | |
| 508 | + onoff_attribute("Allow tarballs for leaf check-ins", | |
| 509 | + "robot-zip-leaf", "rzleaf", 0, 0); | |
| 510 | + | |
| 511 | + @ <p><b>Exception #2</b><br> | |
| 512 | + @ If "zipX" appears in the robot-restrict list above, then tarballs, | |
| 513 | + @ ZIP-archives, and SQL-archives may be downloaded by robots if | |
| 514 | + @ the check-in has one or more tags that match the following | |
| 515 | + @ list of GLOB patterns: (robot-zip-tag)<br> | |
| 516 | + textarea_attribute("", 2, 80, | |
| 517 | + "robot-zip-tag", "rztag", "", 0); | |
| 518 | + | |
| 519 | + @ <p><b>Exception #3</b><br> | |
| 520 | + @ If the request URI matches any of the following | |
| 521 | + @ <a href="%R/re_rules">regular expressions</a> (one per line), then the | |
| 522 | + @ request is exempt from anti-robot defenses. | |
| 523 | + @ The regular expression is matched against the REQUEST_URI with the | |
| 515 | 524 | @ SCRIPT_NAME prefix removed, and with QUERY_STRING appended following |
| 516 | 525 | @ a "?" if QUERY_STRING exists. (Property: robot-exception)<br> |
| 517 | 526 | textarea_attribute("", 3, 80, |
| 518 | 527 | "robot-exception", "rbexcept", "", 0); |
| 519 | - | |
| 520 | 528 | @ <hr> |
| 521 | 529 | addAutoHyperlinkSettings(); |
| 522 | 530 | |
| 523 | 531 | @ <hr> |
| 524 | 532 | entry_attribute("Anonymous Login Validity", 11, "anon-cookie-lifespan", |
| 525 | 533 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -470,12 +470,12 @@ | |
| 470 | @ <p>A Fossil website can have billions of pages in its tree, even for a |
| 471 | @ modest project. Many of those pages (examples: diffs and tarballs) |
| 472 | @ might be expensive to compute. A robot that tries to walk the entire |
| 473 | @ website can present a crippling CPU and bandwidth load. |
| 474 | @ |
| 475 | @ <p>The settings on this page are intended to help site administrators |
| 476 | @ defend the site against robots. |
| 477 | @ |
| 478 | @ <form action="%R/setup_robot" method="post"><div> |
| 479 | login_insert_csrf_secret(); |
| 480 | @ <input type="submit" name="submit" value="Apply Changes"></p> |
| 481 | @ <hr> |
| @@ -482,43 +482,51 @@ | |
| 482 | @ <p><b>Do not allow robots access to these pages.</b><br> |
| 483 | @ If the page name matches the GLOB pattern of this setting, and the |
| 484 | @ users is "nobody", and the client has not previously passed a captcha |
| 485 | @ test to show that it is not a robot, then the page is not displayed. |
| 486 | @ A captcha test is is rendered instead. |
| 487 | @ The recommended value for this setting is: |
| 488 | @ <p> |
| 489 | @    <tt>%h(robot_restrict_default())</tt> |
| 490 | @ <p> |
| 491 | @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and |
| 492 | @ /vpatch. The "annotate" tag covers /annotate and also /blame and |
| 493 | @ /praise. The "zip" covers itself and also /tarball and /sqlar. If a |
| 494 | @ tag has an "X" character appended, then it only applies if query |
| 495 | @ parameters are such that the page is particularly difficult to compute. |
| 496 | @ In all other case, the tag should exactly match the page name. |
| 497 | @ |
| 498 | @ To disable robot restrictions, change this setting to "off". |
| 499 | @ (Property: robot-restrict) |
| 500 | @ <br> |
| 501 | textarea_attribute("", 2, 80, |
| 502 | "robot-restrict", "rbrestrict", robot_restrict_default(), 0); |
| 503 | |
| 504 | @ <hr> |
| 505 | @ <p><b>Exceptions to anti-robot restrictions</b><br> |
| 506 | @ The entry below is a list of |
| 507 | @ <a href="%R/re_rules">regular expressions</a>, one per line. |
| 508 | @ If any of these regular expressions match the input URL, then the |
| 509 | @ request is exempt from anti-robot defenses. Use this, for example, |
| 510 | @ to allow scripts to download release tarballs using a pattern |
| 511 | @ like:</p> |
| 512 | @ <p> |
| 513 | @   <tt>^/tarball/(version-[0-9.]+|release)/</tt> |
| 514 | @ <p>The pattern should match against the REQUEST_URI with the |
| 515 | @ SCRIPT_NAME prefix removed, and with QUERY_STRING appended following |
| 516 | @ a "?" if QUERY_STRING exists. (Property: robot-exception)<br> |
| 517 | textarea_attribute("", 3, 80, |
| 518 | "robot-exception", "rbexcept", "", 0); |
| 519 | |
| 520 | @ <hr> |
| 521 | addAutoHyperlinkSettings(); |
| 522 | |
| 523 | @ <hr> |
| 524 | entry_attribute("Anonymous Login Validity", 11, "anon-cookie-lifespan", |
| 525 |
| --- src/setup.c | |
| +++ src/setup.c | |
| @@ -470,12 +470,12 @@ | |
| 470 | @ <p>A Fossil website can have billions of pages in its tree, even for a |
| 471 | @ modest project. Many of those pages (examples: diffs and tarballs) |
| 472 | @ might be expensive to compute. A robot that tries to walk the entire |
| 473 | @ website can present a crippling CPU and bandwidth load. |
| 474 | @ |
| 475 | @ <p>The settings on this page are intended to help administrators |
| 476 | @ defend against abusive robots. |
| 477 | @ |
| 478 | @ <form action="%R/setup_robot" method="post"><div> |
| 479 | login_insert_csrf_secret(); |
| 480 | @ <input type="submit" name="submit" value="Apply Changes"></p> |
| 481 | @ <hr> |
| @@ -482,43 +482,51 @@ | |
| 482 | @ <p><b>Do not allow robots access to these pages.</b><br> |
| 483 | @ If the page name matches the GLOB pattern of this setting, and the |
| 484 | @ users is "nobody", and the client has not previously passed a captcha |
| 485 | @ test to show that it is not a robot, then the page is not displayed. |
| 486 | @ A captcha test is is rendered instead. |
| 487 | @ The default value for this setting is: |
| 488 | @ <p> |
| 489 | @    <tt>%h(robot_restrict_default())</tt> |
| 490 | @ <p> |
| 491 | @ The "diff" tag covers all diffing pages such as /vdiff, /fdiff, and |
| 492 | @ /vpatch. The "annotate" tag covers /annotate and also /blame and |
| 493 | @ /praise. The "zip" covers itself and also /tarball and /sqlar. If a |
| 494 | @ tag has an "X" character appended, then it only applies if query |
| 495 | @ parameters are such that the page is expensive and/or unusual. |
| 496 | @ In all other case, the tag should exactly match the page name. |
| 497 | @ |
| 498 | @ To disable robot restrictions, change this setting to "off". |
| 499 | @ (Property: robot-restrict) |
| 500 | @ <br> |
| 501 | textarea_attribute("", 2, 80, |
| 502 | "robot-restrict", "rbrestrict", robot_restrict_default(), 0); |
| 503 | |
| 504 | @ <p><b>Exception #1</b><br> |
| 505 | @ If "zipX" appears in the robot-restrict list above, then tarballs, |
| 506 | @ ZIP-archives, and SQL-archives may be downloaded by robots if |
| 507 | @ the check-in is a leaf (robot-zip-leaf):<br> |
| 508 | onoff_attribute("Allow tarballs for leaf check-ins", |
| 509 | "robot-zip-leaf", "rzleaf", 0, 0); |
| 510 | |
| 511 | @ <p><b>Exception #2</b><br> |
| 512 | @ If "zipX" appears in the robot-restrict list above, then tarballs, |
| 513 | @ ZIP-archives, and SQL-archives may be downloaded by robots if |
| 514 | @ the check-in has one or more tags that match the following |
| 515 | @ list of GLOB patterns: (robot-zip-tag)<br> |
| 516 | textarea_attribute("", 2, 80, |
| 517 | "robot-zip-tag", "rztag", "", 0); |
| 518 | |
| 519 | @ <p><b>Exception #3</b><br> |
| 520 | @ If the request URI matches any of the following |
| 521 | @ <a href="%R/re_rules">regular expressions</a> (one per line), then the |
| 522 | @ request is exempt from anti-robot defenses. |
| 523 | @ The regular expression is matched against the REQUEST_URI with the |
| 524 | @ SCRIPT_NAME prefix removed, and with QUERY_STRING appended following |
| 525 | @ a "?" if QUERY_STRING exists. (Property: robot-exception)<br> |
| 526 | textarea_attribute("", 3, 80, |
| 527 | "robot-exception", "rbexcept", "", 0); |
| 528 | @ <hr> |
| 529 | addAutoHyperlinkSettings(); |
| 530 | |
| 531 | @ <hr> |
| 532 | entry_attribute("Anonymous Login Validity", 11, "anon-cookie-lifespan", |
| 533 |