Fossil SCM
Make use of the Accept-Encoding header value to help distinguish humans from robots.
Commit
0d41eb4790df995f0c58ce71d1e516950fd14d751ccb0669d61dd9e7ad4a44dc
Parent
6f1c7324b662f28…
1 file changed
+17
-1
+17
-1
| --- src/login.c | ||
| +++ src/login.c | ||
| @@ -1299,20 +1299,36 @@ | ||
| 1299 | 1299 | ** that should restrict robot access. No restrictions |
| 1300 | 1300 | ** are applied if this setting is undefined or is |
| 1301 | 1301 | ** an empty string. |
| 1302 | 1302 | */ |
| 1303 | 1303 | void login_restrict_robot_access(void){ |
| 1304 | - const char *zReferer; | |
| 1305 | 1304 | const char *zGlob; |
| 1306 | 1305 | int isMatch = 1; |
| 1307 | 1306 | int nQP; /* Number of query parameters other than name= */ |
| 1308 | 1307 | if( g.zLogin!=0 ) return; |
| 1309 | 1308 | zGlob = db_get("robot-restrict",0); |
| 1310 | 1309 | if( zGlob==0 || zGlob[0]==0 ) return; |
| 1311 | 1310 | if( g.isHuman ){ |
| 1311 | + const char *zReferer; | |
| 1312 | + const char *zAccept; | |
| 1313 | + const char *zBr; | |
| 1312 | 1314 | zReferer = P("HTTP_REFERER"); |
| 1313 | 1315 | if( zReferer && zReferer[0]!=0 ) return; |
| 1316 | + | |
| 1317 | + /* Robots typically do not accept the brotli encoding, at least not | |
| 1318 | + ** at the time of this writing (2025-04-01), but standard web-browser | |
| 1319 | + ** all generally do accept brotli. So if brotli is accepted, | |
| 1320 | + ** assume we are not talking to a robot. We might want to revisit this | |
| 1321 | + ** heuristic in the future... | |
| 1322 | + */ | |
| 1323 | + if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0 | |
| 1324 | + && (zBr = strstr(zAccept,"br"))!=0 | |
| 1325 | + && !fossil_isalnum(zBr[2]) | |
| 1326 | + && (zBr==zAccept || !fossil_isalnum(zBr[-1])) | |
| 1327 | + ){ | |
| 1328 | + return; | |
| 1329 | + } | |
| 1314 | 1330 | } |
| 1315 | 1331 | nQP = cgi_qp_count(); |
| 1316 | 1332 | if( nQP<1 ) return; |
| 1317 | 1333 | isMatch = glob_multi_match(zGlob, g.zPath); |
| 1318 | 1334 | if( !isMatch ) return; |
| 1319 | 1335 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -1299,20 +1299,36 @@ | |
| 1299 | ** that should restrict robot access. No restrictions |
| 1300 | ** are applied if this setting is undefined or is |
| 1301 | ** an empty string. |
| 1302 | */ |
| 1303 | void login_restrict_robot_access(void){ |
| 1304 | const char *zReferer; |
| 1305 | const char *zGlob; |
| 1306 | int isMatch = 1; |
| 1307 | int nQP; /* Number of query parameters other than name= */ |
| 1308 | if( g.zLogin!=0 ) return; |
| 1309 | zGlob = db_get("robot-restrict",0); |
| 1310 | if( zGlob==0 || zGlob[0]==0 ) return; |
| 1311 | if( g.isHuman ){ |
| 1312 | zReferer = P("HTTP_REFERER"); |
| 1313 | if( zReferer && zReferer[0]!=0 ) return; |
| 1314 | } |
| 1315 | nQP = cgi_qp_count(); |
| 1316 | if( nQP<1 ) return; |
| 1317 | isMatch = glob_multi_match(zGlob, g.zPath); |
| 1318 | if( !isMatch ) return; |
| 1319 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -1299,20 +1299,36 @@ | |
| 1299 | ** that should restrict robot access. No restrictions |
| 1300 | ** are applied if this setting is undefined or is |
| 1301 | ** an empty string. |
| 1302 | */ |
| 1303 | void login_restrict_robot_access(void){ |
| 1304 | const char *zGlob; |
| 1305 | int isMatch = 1; |
| 1306 | int nQP; /* Number of query parameters other than name= */ |
| 1307 | if( g.zLogin!=0 ) return; |
| 1308 | zGlob = db_get("robot-restrict",0); |
| 1309 | if( zGlob==0 || zGlob[0]==0 ) return; |
| 1310 | if( g.isHuman ){ |
| 1311 | const char *zReferer; |
| 1312 | const char *zAccept; |
| 1313 | const char *zBr; |
| 1314 | zReferer = P("HTTP_REFERER"); |
| 1315 | if( zReferer && zReferer[0]!=0 ) return; |
| 1316 | |
| 1317 | /* Robots typically do not accept the brotli encoding, at least not |
| 1318 | ** at the time of this writing (2025-04-01), but standard web-browser |
| 1319 | ** all generally do accept brotli. So if brotli is accepted, |
| 1320 | ** assume we are not talking to a robot. We might want to revisit this |
| 1321 | ** heuristic in the future... |
| 1322 | */ |
| 1323 | if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0 |
| 1324 | && (zBr = strstr(zAccept,"br"))!=0 |
| 1325 | && !fossil_isalnum(zBr[2]) |
| 1326 | && (zBr==zAccept || !fossil_isalnum(zBr[-1])) |
| 1327 | ){ |
| 1328 | return; |
| 1329 | } |
| 1330 | } |
| 1331 | nQP = cgi_qp_count(); |
| 1332 | if( nQP<1 ) return; |
| 1333 | isMatch = glob_multi_match(zGlob, g.zPath); |
| 1334 | if( !isMatch ) return; |
| 1335 |