Fossil SCM

Improved robot detection logic.

drh 2024-04-06 19:01 trunk
Commit 61a8b0ecadbbdeaf40667fe69dddb84ad4558f30e393075c5e8d1636d15cb969
1 file changed +9 -26
+9 -26
--- src/login.c
+++ src/login.c
@@ -394,50 +394,33 @@
394394
cgi_replace_parameter(cookie, NULL);
395395
cgi_replace_parameter("anon", NULL);
396396
}
397397
}
398398
399
-/*
400
-** Return true if the prefix of zStr matches zPattern. Return false if
401
-** they are different.
402
-**
403
-** A lowercase character in zPattern will match either upper or lower
404
-** case in zStr. But an uppercase in zPattern will only match an
405
-** uppercase in zStr.
406
-*/
407
-static int prefix_match(const char *zPattern, const char *zStr){
408
- int i;
409
- char c;
410
- for(i=0; (c = zPattern[i])!=0; i++){
411
- if( zStr[i]!=c && fossil_tolower(zStr[i])!=c ) return 0;
412
- }
413
- return 1;
414
-}
415
-
416399
/*
417400
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
418401
** is a manually operated browser or a bot. When in doubt, assume a bot.
419402
** Return true if we believe the agent is a real person.
420403
*/
421404
static int isHuman(const char *zAgent){
422
- int i;
423405
if( zAgent==0 ) return 0; /* If no UserAgent, then probably a bot */
424
- for(i=0; zAgent[i]; i++){
425
- if( prefix_match("bot", zAgent+i) ) return 0;
426
- if( prefix_match("spider", zAgent+i) ) return 0;
427
- if( prefix_match("crawl", zAgent+i) ) return 0;
428
- /* If a URI appears in the User-Agent, it is probably a bot */
429
- if( strncmp("http", zAgent+i,4)==0 ) return 0;
430
- }
406
+ if( strstr(zAgent, "bot")!=0 ) return 0;
407
+ if( strstr(zAgent, "spider")!=0 ) return 0;
408
+ if( strstr(zAgent, "crawl")!=0 ) return 0;
409
+ /* If a URI appears in the User-Agent, it is probably a bot */
410
+ if( strstr(zAgent, "http")!=0 ) return 0;
431411
if( strncmp(zAgent, "Mozilla/", 8)==0 ){
432412
if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
433413
414
+ /* Google AI Robot, maybe? */
415
+ if( strstr(zAgent, "GoogleOther)")!=0 ) return 0;
416
+
434417
/* 2016-05-30: A pernicious spider that likes to walk Fossil timelines has
435418
** been detected on the SQLite website. The spider changes its user-agent
436419
** string frequently, but it always seems to include the following text:
437420
*/
438
- if( sqlite3_strglob("*Safari/537.36Mozilla/5.0*", zAgent)==0 ) return 0;
421
+ if( strstr(zAgent, "Safari/537.36Mozilla/5.0")!=0 ) return 0;
439422
440423
if( sqlite3_strglob("*Firefox/[1-9]*", zAgent)==0 ) return 1;
441424
if( sqlite3_strglob("*Chrome/[1-9]*", zAgent)==0 ) return 1;
442425
if( sqlite3_strglob("*(compatible;?MSIE?[1789]*", zAgent)==0 ) return 1;
443426
if( sqlite3_strglob("*Trident/[1-9]*;?rv:[1-9]*", zAgent)==0 ){
444427
--- src/login.c
+++ src/login.c
@@ -394,50 +394,33 @@
394 cgi_replace_parameter(cookie, NULL);
395 cgi_replace_parameter("anon", NULL);
396 }
397 }
398
399 /*
400 ** Return true if the prefix of zStr matches zPattern. Return false if
401 ** they are different.
402 **
403 ** A lowercase character in zPattern will match either upper or lower
404 ** case in zStr. But an uppercase in zPattern will only match an
405 ** uppercase in zStr.
406 */
407 static int prefix_match(const char *zPattern, const char *zStr){
408 int i;
409 char c;
410 for(i=0; (c = zPattern[i])!=0; i++){
411 if( zStr[i]!=c && fossil_tolower(zStr[i])!=c ) return 0;
412 }
413 return 1;
414 }
415
416 /*
417 ** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
418 ** is a manually operated browser or a bot. When in doubt, assume a bot.
419 ** Return true if we believe the agent is a real person.
420 */
421 static int isHuman(const char *zAgent){
422 int i;
423 if( zAgent==0 ) return 0; /* If no UserAgent, then probably a bot */
424 for(i=0; zAgent[i]; i++){
425 if( prefix_match("bot", zAgent+i) ) return 0;
426 if( prefix_match("spider", zAgent+i) ) return 0;
427 if( prefix_match("crawl", zAgent+i) ) return 0;
428 /* If a URI appears in the User-Agent, it is probably a bot */
429 if( strncmp("http", zAgent+i,4)==0 ) return 0;
430 }
431 if( strncmp(zAgent, "Mozilla/", 8)==0 ){
432 if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
433
 
 
 
434 /* 2016-05-30: A pernicious spider that likes to walk Fossil timelines has
435 ** been detected on the SQLite website. The spider changes its user-agent
436 ** string frequently, but it always seems to include the following text:
437 */
438 if( sqlite3_strglob("*Safari/537.36Mozilla/5.0*", zAgent)==0 ) return 0;
439
440 if( sqlite3_strglob("*Firefox/[1-9]*", zAgent)==0 ) return 1;
441 if( sqlite3_strglob("*Chrome/[1-9]*", zAgent)==0 ) return 1;
442 if( sqlite3_strglob("*(compatible;?MSIE?[1789]*", zAgent)==0 ) return 1;
443 if( sqlite3_strglob("*Trident/[1-9]*;?rv:[1-9]*", zAgent)==0 ){
444
--- src/login.c
+++ src/login.c
@@ -394,50 +394,33 @@
394 cgi_replace_parameter(cookie, NULL);
395 cgi_replace_parameter("anon", NULL);
396 }
397 }
398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399 /*
400 ** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
401 ** is a manually operated browser or a bot. When in doubt, assume a bot.
402 ** Return true if we believe the agent is a real person.
403 */
404 static int isHuman(const char *zAgent){
 
405 if( zAgent==0 ) return 0; /* If no UserAgent, then probably a bot */
406 if( strstr(zAgent, "bot")!=0 ) return 0;
407 if( strstr(zAgent, "spider")!=0 ) return 0;
408 if( strstr(zAgent, "crawl")!=0 ) return 0;
409 /* If a URI appears in the User-Agent, it is probably a bot */
410 if( strstr(zAgent, "http")!=0 ) return 0;
 
 
411 if( strncmp(zAgent, "Mozilla/", 8)==0 ){
412 if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
413
414 /* Google AI Robot, maybe? */
415 if( strstr(zAgent, "GoogleOther)")!=0 ) return 0;
416
417 /* 2016-05-30: A pernicious spider that likes to walk Fossil timelines has
418 ** been detected on the SQLite website. The spider changes its user-agent
419 ** string frequently, but it always seems to include the following text:
420 */
421 if( strstr(zAgent, "Safari/537.36Mozilla/5.0")!=0 ) return 0;
422
423 if( sqlite3_strglob("*Firefox/[1-9]*", zAgent)==0 ) return 1;
424 if( sqlite3_strglob("*Chrome/[1-9]*", zAgent)==0 ) return 1;
425 if( sqlite3_strglob("*(compatible;?MSIE?[1789]*", zAgent)==0 ) return 1;
426 if( sqlite3_strglob("*Trident/[1-9]*;?rv:[1-9]*", zAgent)==0 ){
427

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button