Fossil SCM

Make use of the Accept-Encoding header value to help distinguish humans from robots.

drh 2025-04-01 13:33 trunk
Commit 0d41eb4790df995f0c58ce71d1e516950fd14d751ccb0669d61dd9e7ad4a44dc
1 file changed +17 -1
+17 -1
--- src/login.c
+++ src/login.c
@@ -1299,20 +1299,36 @@
12991299
** that should restrict robot access. No restrictions
13001300
** are applied if this setting is undefined or is
13011301
** an empty string.
13021302
*/
13031303
void login_restrict_robot_access(void){
1304
- const char *zReferer;
13051304
const char *zGlob;
13061305
int isMatch = 1;
13071306
int nQP; /* Number of query parameters other than name= */
13081307
if( g.zLogin!=0 ) return;
13091308
zGlob = db_get("robot-restrict",0);
13101309
if( zGlob==0 || zGlob[0]==0 ) return;
13111310
if( g.isHuman ){
1311
+ const char *zReferer;
1312
+ const char *zAccept;
1313
+ const char *zBr;
13121314
zReferer = P("HTTP_REFERER");
13131315
if( zReferer && zReferer[0]!=0 ) return;
1316
+
1317
+ /* Robots typically do not accept the brotli encoding, at least not
1318
+ ** at the time of this writing (2025-04-01), but standard web-browser
1319
+ ** all generally do accept brotli. So if brotli is accepted,
1320
+ ** assume we are not talking to a robot. We might want to revisit this
1321
+ ** heuristic in the future...
1322
+ */
1323
+ if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0
1324
+ && (zBr = strstr(zAccept,"br"))!=0
1325
+ && !fossil_isalnum(zBr[2])
1326
+ && (zBr==zAccept || !fossil_isalnum(zBr[-1]))
1327
+ ){
1328
+ return;
1329
+ }
13141330
}
13151331
nQP = cgi_qp_count();
13161332
if( nQP<1 ) return;
13171333
isMatch = glob_multi_match(zGlob, g.zPath);
13181334
if( !isMatch ) return;
13191335
--- src/login.c
+++ src/login.c
@@ -1299,20 +1299,36 @@
1299 ** that should restrict robot access. No restrictions
1300 ** are applied if this setting is undefined or is
1301 ** an empty string.
1302 */
1303 void login_restrict_robot_access(void){
1304 const char *zReferer;
1305 const char *zGlob;
1306 int isMatch = 1;
1307 int nQP; /* Number of query parameters other than name= */
1308 if( g.zLogin!=0 ) return;
1309 zGlob = db_get("robot-restrict",0);
1310 if( zGlob==0 || zGlob[0]==0 ) return;
1311 if( g.isHuman ){
 
 
 
1312 zReferer = P("HTTP_REFERER");
1313 if( zReferer && zReferer[0]!=0 ) return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1314 }
1315 nQP = cgi_qp_count();
1316 if( nQP<1 ) return;
1317 isMatch = glob_multi_match(zGlob, g.zPath);
1318 if( !isMatch ) return;
1319
--- src/login.c
+++ src/login.c
@@ -1299,20 +1299,36 @@
1299 ** that should restrict robot access. No restrictions
1300 ** are applied if this setting is undefined or is
1301 ** an empty string.
1302 */
1303 void login_restrict_robot_access(void){
 
1304 const char *zGlob;
1305 int isMatch = 1;
1306 int nQP; /* Number of query parameters other than name= */
1307 if( g.zLogin!=0 ) return;
1308 zGlob = db_get("robot-restrict",0);
1309 if( zGlob==0 || zGlob[0]==0 ) return;
1310 if( g.isHuman ){
1311 const char *zReferer;
1312 const char *zAccept;
1313 const char *zBr;
1314 zReferer = P("HTTP_REFERER");
1315 if( zReferer && zReferer[0]!=0 ) return;
1316
1317 /* Robots typically do not accept the brotli encoding, at least not
1318 ** at the time of this writing (2025-04-01), but standard web-browser
1319 ** all generally do accept brotli. So if brotli is accepted,
1320 ** assume we are not talking to a robot. We might want to revisit this
1321 ** heuristic in the future...
1322 */
1323 if( (zAccept = P("HTTP_ACCEPT_ENCODING"))!=0
1324 && (zBr = strstr(zAccept,"br"))!=0
1325 && !fossil_isalnum(zBr[2])
1326 && (zBr==zAccept || !fossil_isalnum(zBr[-1]))
1327 ){
1328 return;
1329 }
1330 }
1331 nQP = cgi_qp_count();
1332 if( nQP<1 ) return;
1333 isMatch = glob_multi_match(zGlob, g.zPath);
1334 if( !isMatch ) return;
1335

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button