Fossil SCM

Still more refinements to the human/robot discriminator.

drh 2011-11-25 21:44 trunk
Commit 9e680d6375de0905274fd358f6aa9c088279e412
1 file changed +11 -7
+11 -7
--- src/login.c
+++ src/login.c
@@ -361,28 +361,32 @@
361361
** Return true if we believe the agent is a real person.
362362
*/
363363
static int isHuman(const char *zAgent){
364364
int i;
365365
int seenCompatible = 0;
366
- int seenIE = 0;
367366
if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */
368367
for(i=0; zAgent[i]; i++){
369368
char c = zAgent[i];
370369
if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
371
- if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
372
- if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
370
+ if( c=='p' && memcmp(&zAgent[i],"pider",5)==0 ) return 0; /* "spider" */
371
+ if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
373372
/* Anything that puts a URL in the UserAgent string is probably a bot */
374373
if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0;
375
- if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){
376
- seenCompatible = 1;
374
+ if( c=='c' && seenCompatible==0 && memcmp(&zAgent[i],"compatible",11)==0 ){
375
+ seenCompatible = i;
377376
i+=10;
378377
}
379
- if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1;
380378
}
381379
if( memcmp(zAgent, "Mozilla/", 8)==0 ){
382380
if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
383
- if( seenCompatible && !seenIE ) return 0;
381
+ if( seenCompatible
382
+ && memcmp(&zAgent[seenCompatible],"compatible;_MSIE_", 18)!=0
383
+ ){
384
+ /* If it claims to be Mozilla compatible and it isn't MSIE, then it
385
+ ** is probably a bot */
386
+ return 0;
387
+ }
384388
return 1;
385389
}
386390
if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
387391
if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
388392
if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
389393
--- src/login.c
+++ src/login.c
@@ -361,28 +361,32 @@
361 ** Return true if we believe the agent is a real person.
362 */
363 static int isHuman(const char *zAgent){
364 int i;
365 int seenCompatible = 0;
366 int seenIE = 0;
367 if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */
368 for(i=0; zAgent[i]; i++){
369 char c = zAgent[i];
370 if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
371 if( c=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
372 if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
373 /* Anything that puts a URL in the UserAgent string is probably a bot */
374 if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0;
375 if( c=='c' && memcmp(&zAgent[i],"compatible",11)==0 ){
376 seenCompatible = 1;
377 i+=10;
378 }
379 if( c=='I' && zAgent[i+1]=='E' ) seenIE = 1;
380 }
381 if( memcmp(zAgent, "Mozilla/", 8)==0 ){
382 if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
383 if( seenCompatible && !seenIE ) return 0;
 
 
 
 
 
 
384 return 1;
385 }
386 if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
387 if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
388 if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
389
--- src/login.c
+++ src/login.c
@@ -361,28 +361,32 @@
361 ** Return true if we believe the agent is a real person.
362 */
363 static int isHuman(const char *zAgent){
364 int i;
365 int seenCompatible = 0;
 
366 if( zAgent==0 ) return 0; /* If not UserAgent, the probably a bot */
367 for(i=0; zAgent[i]; i++){
368 char c = zAgent[i];
369 if( c=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
370 if( c=='p' && memcmp(&zAgent[i],"pider",5)==0 ) return 0; /* "spider" */
371 if( c=='r' && memcmp(&zAgent[i],"rawl",4)==0 ) return 0; /* "crawler" */
372 /* Anything that puts a URL in the UserAgent string is probably a bot */
373 if( c=='h' && memcmp(&zAgent[i],"http",4)==0 ) return 0;
374 if( c=='c' && seenCompatible==0 && memcmp(&zAgent[i],"compatible",11)==0 ){
375 seenCompatible = i;
376 i+=10;
377 }
 
378 }
379 if( memcmp(zAgent, "Mozilla/", 8)==0 ){
380 if( atoi(&zAgent[8])<4 ) return 0; /* Many bots advertise as Mozilla/3 */
381 if( seenCompatible
382 && memcmp(&zAgent[seenCompatible],"compatible;_MSIE_", 18)!=0
383 ){
384 /* If it claims to be Mozilla compatible and it isn't MSIE, then it
385 ** is probably a bot */
386 return 0;
387 }
388 return 1;
389 }
390 if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
391 if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
392 if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
393

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button