Fossil SCM
Add a /robots.txt page - useful only when Fossil is deployed as a stand-alone server instead of as a sub-component to a larger website.
Commit
cadfcba32c7e20dedf8bb58e00eb15c3ac6c79eedb9017e5170aa90ba152b7ec
Parent
398ea9e425aa60e…
1 file changed
+43
+43
| --- src/style.c | ||
| +++ src/style.c | ||
| @@ -1804,5 +1804,48 @@ | ||
| 1804 | 1804 | void style_emit_noscript_for_js_page(void){ |
| 1805 | 1805 | CX("<noscript><div class='error'>" |
| 1806 | 1806 | "This page requires JavaScript (ES2015, a.k.a. ES6, or newer)." |
| 1807 | 1807 | "</div></noscript>"); |
| 1808 | 1808 | } |
| 1809 | + | |
| 1810 | +/* | |
| 1811 | +** SETTING: robots-txt width=70 block-text keep-empty | |
| 1812 | +** | |
| 1813 | +** This setting is the override value for the /robots.txt file that | |
| 1814 | +** Fossil returns when run as a stand-alone server for a domain. As | |
| 1815 | +** Fossil is seldom run as a stand-alone server (and is more commonly | |
| 1816 | +** deployed as a CGI or SCGI or behind a reverse proxy) this setting | |
| 1817 | +** rarely needed. A reasonable default robots.txt is sent if this | |
| 1818 | +** setting is empty. | |
| 1819 | +*/ | |
| 1820 | + | |
| 1821 | +/* | |
| 1822 | +** WEBPAGE: robots.txt | |
| 1823 | +** | |
| 1824 | +** Return text/plain which is the content of the "robots-txt" setting, if | |
| 1825 | +** such a setting exists and is non-empty. Or construct an RFC-9309 complaint | |
| 1826 | +** robots.txt file and return that if there is not "robots.txt" setting. | |
| 1827 | +** | |
| 1828 | +** This is useful for robot exclusion in cases where Fossil is run as a | |
| 1829 | +** stand-alone server in its own domain. For the more common case where | |
| 1830 | +** Fossil is run as a CGI, or SCGI, or a server that responding to a reverse | |
| 1831 | +** proxy, the returns robots.txt file will not be at the top level of the | |
| 1832 | +** domain, and so it will be pointless. | |
| 1833 | +*/ | |
| 1834 | +void robotstxt_page(void){ | |
| 1835 | + const char *z; | |
| 1836 | + static const char *zDflt = | |
| 1837 | + "User-agent: *\n" | |
| 1838 | + "Allow: /doc\n" | |
| 1839 | + "Allow: /home\n" | |
| 1840 | + "Allow: /forum\n" | |
| 1841 | + "Allow: /technote\n" | |
| 1842 | + "Allow: /tktview\n" | |
| 1843 | + "Allow: /wiki\n" | |
| 1844 | + "Allow: /uv/\n" | |
| 1845 | + "Allow: /$\n" | |
| 1846 | + "Disallow: /*\n" | |
| 1847 | + ; | |
| 1848 | + z = db_get("robots-txt",zDflt); | |
| 1849 | + cgi_set_content_type("text/plain"); | |
| 1850 | + cgi_append_content(z, -1); | |
| 1851 | +} | |
| 1809 | 1852 |
| --- src/style.c | |
| +++ src/style.c | |
| @@ -1804,5 +1804,48 @@ | |
| 1804 | void style_emit_noscript_for_js_page(void){ |
| 1805 | CX("<noscript><div class='error'>" |
| 1806 | "This page requires JavaScript (ES2015, a.k.a. ES6, or newer)." |
| 1807 | "</div></noscript>"); |
| 1808 | } |
| 1809 |
| --- src/style.c | |
| +++ src/style.c | |
| @@ -1804,5 +1804,48 @@ | |
| 1804 | void style_emit_noscript_for_js_page(void){ |
| 1805 | CX("<noscript><div class='error'>" |
| 1806 | "This page requires JavaScript (ES2015, a.k.a. ES6, or newer)." |
| 1807 | "</div></noscript>"); |
| 1808 | } |
| 1809 | |
| 1810 | /* |
| 1811 | ** SETTING: robots-txt width=70 block-text keep-empty |
| 1812 | ** |
| 1813 | ** This setting is the override value for the /robots.txt file that |
| 1814 | ** Fossil returns when run as a stand-alone server for a domain. As |
| 1815 | ** Fossil is seldom run as a stand-alone server (and is more commonly |
| 1816 | ** deployed as a CGI or SCGI or behind a reverse proxy) this setting |
| 1817 | ** rarely needed. A reasonable default robots.txt is sent if this |
| 1818 | ** setting is empty. |
| 1819 | */ |
| 1820 | |
| 1821 | /* |
| 1822 | ** WEBPAGE: robots.txt |
| 1823 | ** |
| 1824 | ** Return text/plain which is the content of the "robots-txt" setting, if |
| 1825 | ** such a setting exists and is non-empty. Or construct an RFC-9309 complaint |
| 1826 | ** robots.txt file and return that if there is not "robots.txt" setting. |
| 1827 | ** |
| 1828 | ** This is useful for robot exclusion in cases where Fossil is run as a |
| 1829 | ** stand-alone server in its own domain. For the more common case where |
| 1830 | ** Fossil is run as a CGI, or SCGI, or a server that responding to a reverse |
| 1831 | ** proxy, the returns robots.txt file will not be at the top level of the |
| 1832 | ** domain, and so it will be pointless. |
| 1833 | */ |
| 1834 | void robotstxt_page(void){ |
| 1835 | const char *z; |
| 1836 | static const char *zDflt = |
| 1837 | "User-agent: *\n" |
| 1838 | "Allow: /doc\n" |
| 1839 | "Allow: /home\n" |
| 1840 | "Allow: /forum\n" |
| 1841 | "Allow: /technote\n" |
| 1842 | "Allow: /tktview\n" |
| 1843 | "Allow: /wiki\n" |
| 1844 | "Allow: /uv/\n" |
| 1845 | "Allow: /$\n" |
| 1846 | "Disallow: /*\n" |
| 1847 | ; |
| 1848 | z = db_get("robots-txt",zDflt); |
| 1849 | cgi_set_content_type("text/plain"); |
| 1850 | cgi_append_content(z, -1); |
| 1851 | } |
| 1852 |