Fossil SCM

Improvements to the User-Agent bot recognizer. Add the test-ishuman command for testing the bot recognizer.

drh 2011-10-26 15:41 trunk
Commit 06e0cb70054d3c3e303a808563ca6c1ab2b86c2b
1 file changed +20 -4
+20 -4
--- src/login.c
+++ src/login.c
@@ -203,23 +203,39 @@
203203
/*
204204
** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
205205
** is a manually operated browser or a bot. When in doubt, assume a bot. Return
206206
** true if we believe the agent is a real person.
207207
*/
208
-static int isHuman(void){
209
- const char *zAgent = P("HTTP_USER_AGENT");
208
+static int isHuman(const char *zAgent){
210209
int i;
211210
if( zAgent==0 ) return 0;
212211
for(i=0; zAgent[i]; i++){
213212
if( zAgent[i]=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
214213
if( zAgent[i]=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
215214
}
216
- if( memcmp(zAgent, "Mozilla/", 8)==0 ) return 1;
215
+ if( memcmp(zAgent, "Mozilla/", 8)==0 ){
216
+ return atoi(&zAgent[8])>=4;
217
+ }
217218
if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
218219
if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
220
+ if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
219221
return 0;
220222
}
223
+
224
+/*
225
+** COMMAND: test-ishuman
226
+**
227
+** Read lines of text from standard input. Interpret each line of text
228
+** as a User-Agent string from an HTTP header. Label each line as HUMAN
229
+** or ROBOT.
230
+*/
231
+void test_ishuman(void){
232
+ char zLine[3000];
233
+ while( fgets(zLine, sizeof(zLine), stdin) ){
234
+ fossil_print("%s %s", isHuman(zLine) ? "HUMAN" : "ROBOT", zLine);
235
+ }
236
+}
221237
222238
/*
223239
** SQL function for constant time comparison of two values.
224240
** Sets result to 0 if two values are equal.
225241
*/
@@ -747,11 +763,11 @@
747763
748764
/* Set the capabilities */
749765
login_set_capabilities(zCap, 0);
750766
login_set_anon_nobody_capabilities();
751767
if( zCap[0] && !g.perm.History && db_get_boolean("auto-enable-hyperlinks",1)
752
- && isHuman() ){
768
+ && isHuman(P("HTTP_USER_AGENT")) ){
753769
g.perm.History = 1;
754770
}
755771
}
756772
757773
/*
758774
--- src/login.c
+++ src/login.c
@@ -203,23 +203,39 @@
203 /*
204 ** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
205 ** is a manually operated browser or a bot. When in doubt, assume a bot. Return
206 ** true if we believe the agent is a real person.
207 */
208 static int isHuman(void){
209 const char *zAgent = P("HTTP_USER_AGENT");
210 int i;
211 if( zAgent==0 ) return 0;
212 for(i=0; zAgent[i]; i++){
213 if( zAgent[i]=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
214 if( zAgent[i]=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
215 }
216 if( memcmp(zAgent, "Mozilla/", 8)==0 ) return 1;
 
 
217 if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
218 if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
 
219 return 0;
220 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
222 /*
223 ** SQL function for constant time comparison of two values.
224 ** Sets result to 0 if two values are equal.
225 */
@@ -747,11 +763,11 @@
747
748 /* Set the capabilities */
749 login_set_capabilities(zCap, 0);
750 login_set_anon_nobody_capabilities();
751 if( zCap[0] && !g.perm.History && db_get_boolean("auto-enable-hyperlinks",1)
752 && isHuman() ){
753 g.perm.History = 1;
754 }
755 }
756
757 /*
758
--- src/login.c
+++ src/login.c
@@ -203,23 +203,39 @@
203 /*
204 ** Look at the HTTP_USER_AGENT parameter and try to determine if the user agent
205 ** is a manually operated browser or a bot. When in doubt, assume a bot. Return
206 ** true if we believe the agent is a real person.
207 */
208 static int isHuman(const char *zAgent){
 
209 int i;
210 if( zAgent==0 ) return 0;
211 for(i=0; zAgent[i]; i++){
212 if( zAgent[i]=='b' && memcmp(&zAgent[i],"bot",3)==0 ) return 0;
213 if( zAgent[i]=='s' && memcmp(&zAgent[i],"spider",6)==0 ) return 0;
214 }
215 if( memcmp(zAgent, "Mozilla/", 8)==0 ){
216 return atoi(&zAgent[8])>=4;
217 }
218 if( memcmp(zAgent, "Opera/", 6)==0 ) return 1;
219 if( memcmp(zAgent, "Safari/", 7)==0 ) return 1;
220 if( memcmp(zAgent, "Lynx/", 5)==0 ) return 1;
221 return 0;
222 }
223
224 /*
225 ** COMMAND: test-ishuman
226 **
227 ** Read lines of text from standard input. Interpret each line of text
228 ** as a User-Agent string from an HTTP header. Label each line as HUMAN
229 ** or ROBOT.
230 */
231 void test_ishuman(void){
232 char zLine[3000];
233 while( fgets(zLine, sizeof(zLine), stdin) ){
234 fossil_print("%s %s", isHuman(zLine) ? "HUMAN" : "ROBOT", zLine);
235 }
236 }
237
238 /*
239 ** SQL function for constant time comparison of two values.
240 ** Sets result to 0 if two values are equal.
241 */
@@ -747,11 +763,11 @@
763
764 /* Set the capabilities */
765 login_set_capabilities(zCap, 0);
766 login_set_anon_nobody_capabilities();
767 if( zCap[0] && !g.perm.History && db_get_boolean("auto-enable-hyperlinks",1)
768 && isHuman(P("HTTP_USER_AGENT")) ){
769 g.perm.History = 1;
770 }
771 }
772
773 /*
774

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button