Fossil SCM
Still more refinements to the human/robot discriminator.
Commit
9e680d6375de0905274fd358f6aa9c088279e412
Parent
53db20530286ca7…
1 file changed
+11
-7
+11
-7
| --- src/login.c | ||
| +++ src/login.c | ||
| @@ -361,28 +361,32 @@ | ||
| 361 | 361 | ** Return true if we believe the agent is a real person. |
| 362 | 362 | */ |
| 363 | 363 | static int isHuman(const char *zAgent){ |
| 364 | 364 | int i; |
| 365 | 365 | int seenCompatible = 0; |
| 366 | - int seenIE = 0; | |
| 367 | 366 | if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */ |
| 368 | 367 | for(i=0; zAgent[i]; i++){ |
| 369 | 368 | char c = zAgent[i]; |
| 370 | 369 | if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0; |
| 371 | - if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0; | |
| 372 | - if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */ | |
| 370 | + if( c=='p' && memcmp(&zAgent[i],"pider",5)==0 ) return 0; /* "spider" */ | |
| 371 | + if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */ | |
| 373 | 372 | /* Anything that puts a URL in the UserAgent string is probably a bot */ |
| 374 | 373 | if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0; |
| 375 | - if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){ | |
| 376 | - seenCompatible = 1; | |
| 374 | + if( c=='c' && seenCompatible==0 && memcmp(&zAgent[i],"compatible",11)==0 ){ | |
| 375 | + seenCompatible = i; | |
| 377 | 376 | i+=10; |
| 378 | 377 | } |
| 379 | - if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1; | |
| 380 | 378 | } |
| 381 | 379 | if( memcmp(zAgent, "Mozilla/", 8)==0 ){ |
| 382 | 380 | if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */ |
| 383 | - if( seenCompatible && !seenIE ) return 0; | |
| 381 | + if( seenCompatible | |
| 382 | + && memcmp(&zAgent[seenCompatible],"compatible;_MSIE_", 18)!=0 | |
| 383 | + ){ | |
| 384 | + /* If it claims to be Mozilla compatible and it isn't MSIE, then it | |
| 385 | + ** is probably a bot */ | |
| 386 | + return 0; | |
| 387 | + } | |
| 384 | 388 | return 1; |
| 385 | 389 | } |
| 386 | 390 | if( memcmp(zAgent, "Opera/", 6)==0 ) return 1; |
| 387 | 391 | if( memcmp(zAgent, "Safari/", 7)==0 ) return 1; |
| 388 | 392 | if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1; |
| 389 | 393 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -361,28 +361,32 @@ | |
| 361 | ** Return true if we believe the agent is a real person. |
| 362 | */ |
| 363 | static int isHuman(const char *zAgent){ |
| 364 | int i; |
| 365 | int seenCompatible = 0; |
| 366 | int seenIE = 0; |
| 367 | if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */ |
| 368 | for(i=0; zAgent[i]; i++){ |
| 369 | char c = zAgent[i]; |
| 370 | if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0; |
| 371 | if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0; |
| 372 | if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */ |
| 373 | /* Anything that puts a URL in the UserAgent string is probably a bot */ |
| 374 | if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0; |
| 375 | if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){ |
| 376 | seenCompatible = 1; |
| 377 | i+=10; |
| 378 | } |
| 379 | if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1; |
| 380 | } |
| 381 | if( memcmp(zAgent, "Mozilla/", 8)==0 ){ |
| 382 | if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */ |
| 383 | if( seenCompatible && !seenIE ) return 0; |
| 384 | return 1; |
| 385 | } |
| 386 | if( memcmp(zAgent, "Opera/", 6)==0 ) return 1; |
| 387 | if( memcmp(zAgent, "Safari/", 7)==0 ) return 1; |
| 388 | if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1; |
| 389 |
| --- src/login.c | |
| +++ src/login.c | |
| @@ -361,28 +361,32 @@ | |
| 361 | ** Return true if we believe the agent is a real person. |
| 362 | */ |
| 363 | static int isHuman(const char *zAgent){ |
| 364 | int i; |
| 365 | int seenCompatible = 0; |
| 366 | if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */ |
| 367 | for(i=0; zAgent[i]; i++){ |
| 368 | char c = zAgent[i]; |
| 369 | if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0; |
| 370 | if( c=='p' && memcmp(&zAgent[i],"pider",5)==0 ) return 0; /* "spider" */ |
| 371 | if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */ |
| 372 | /* Anything that puts a URL in the UserAgent string is probably a bot */ |
| 373 | if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0; |
| 374 | if( c=='c' && seenCompatible==0 && memcmp(&zAgent[i],"compatible",11)==0 ){ |
| 375 | seenCompatible = i; |
| 376 | i+=10; |
| 377 | } |
| 378 | } |
| 379 | if( memcmp(zAgent, "Mozilla/", 8)==0 ){ |
| 380 | if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */ |
| 381 | if( seenCompatible |
| 382 | && memcmp(&zAgent[seenCompatible],"compatible;_MSIE_", 18)!=0 |
| 383 | ){ |
| 384 | /* If it claims to be Mozilla compatible and it isn't MSIE, then it |
| 385 | ** is probably a bot */ |
| 386 | return 0; |
| 387 | } |
| 388 | return 1; |
| 389 | } |
| 390 | if( memcmp(zAgent, "Opera/", 6)==0 ) return 1; |
| 391 | if( memcmp(zAgent, "Safari/", 7)==0 ) return 1; |
| 392 | if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1; |
| 393 |