Fossil SCM

Add the manifest_is_well_formed() routine which can quickly check to see if what we believe to be a data file is actually a control artifact of some kind. Add options to the "test-parse-all-blobs" command to verify this interface.

drh 2020-09-11 14:15 trunk
Commit 0f7eff9eebc00cc84b1d0faf60b8677d7ee1721385898663ef07a40f4e2d499d
1 file changed +84 -12
+84 -12
--- src/manifest.c
+++ src/manifest.c
@@ -289,12 +289,12 @@
289289
}
290290
291291
/*
292292
** Remove the PGP signature from the artifact, if there is one.
293293
*/
294
-static void remove_pgp_signature(char **pz, int *pn){
295
- char *z = *pz;
294
+static void remove_pgp_signature(const char **pz, int *pn){
295
+ const char *z = *pz;
296296
int n = *pn;
297297
int i;
298298
if( strncmp(z, "-----BEGIN PGP SIGNED MESSAGE-----", 34)!=0 ) return;
299299
for(i=34; i<n && !after_blank_line(z+i); i++){}
300300
if( i>=n ) return;
@@ -469,11 +469,11 @@
469469
return 0;
470470
}
471471
472472
/* Strip off the PGP signature if there is one.
473473
*/
474
- remove_pgp_signature(&z, &n);
474
+ remove_pgp_signature((const char**)&z, &n);
475475
476476
/* Verify that the first few characters of the artifact look like
477477
** a control artifact.
478478
*/
479479
if( n<10 || z[0]<'A' || z[0]>'Z' || z[1]!=' ' ){
@@ -1142,10 +1142,51 @@
11421142
if( p==0 ){
11431143
fossil_fatal("cannot parse manifest for check-in: %s", zName);
11441144
}
11451145
return p;
11461146
}
1147
+
1148
+/*
1149
+** The input blob is text that may or may not be a valid Fossil
1150
+** control artifact of some kind. This routine returns true if
1151
+** the input is a well-formed control artifact and false if it
1152
+** is not.
1153
+**
1154
+** This routine is optimized to return false quickly and with minimal
1155
+** work in the common case where the input is some random file.
1156
+*/
1157
+int manifest_is_well_formed(const char *zIn, int nIn){
1158
+ int i;
1159
+ int iRes;
1160
+ Manifest *pManifest;
1161
+ Blob copy, errmsg;
1162
+ remove_pgp_signature(&zIn, &nIn);
1163
+
1164
+ /* Check to see that the file begins with a "card" */
1165
+ if( nIn<3 ) return 0;
1166
+ if( zIn[0]<'A' || zIn[0]>'M' || zIn[1]!=' ' ) return 0;
1167
+
1168
+ /* Check to see that the first card is followed by one more card */
1169
+ for(i=2; i<nIn && zIn[i]!='\n'; i++){}
1170
+ if( i>=nIn-3 ) return 0;
1171
+ i++;
1172
+ if( !fossil_isupper(zIn[i]) || zIn[i]<zIn[0] || zIn[i+1]!=' ' ) return 0;
1173
+
1174
+ /* The checks above will eliminate most random inputs. If these
1175
+ ** quick checks pass, then we could be dealing with a well-formed
1176
+ ** control artifact. Make a copy, and run it through the official
1177
+ ** artifact parser. This is the slow path, but it is rarely taken.
1178
+ */
1179
+ blob_init(&copy, 0, 0);
1180
+ blob_init(&errmsg, 0, 0);
1181
+ blob_append(&copy, zIn, nIn);
1182
+ pManifest = manifest_parse(&copy, 0, &errmsg);
1183
+ iRes = pManifest!=0;
1184
+ manifest_destroy(pManifest);
1185
+ blob_reset(&errmsg);
1186
+ return iRes;
1187
+}
11471188
11481189
/*
11491190
** COMMAND: test-parse-manifest
11501191
**
11511192
** Usage: %fossil test-parse-manifest FILENAME ?N?
@@ -1156,11 +1197,11 @@
11561197
void manifest_test_parse_cmd(void){
11571198
Manifest *p;
11581199
Blob b;
11591200
int i;
11601201
int n = 1;
1161
- db_find_and_open_repository(0,0);
1202
+ db_find_and_open_repository(OPEN_SUBSTITUTE|OPEN_OK_NOT_FOUND,0);
11621203
verify_all_options();
11631204
if( g.argc!=3 && g.argc!=4 ){
11641205
usage("FILENAME");
11651206
}
11661207
blob_read_from_file(&b, g.argv[2], ExtFILE);
@@ -1179,42 +1220,73 @@
11791220
}
11801221
11811222
/*
11821223
** COMMAND: test-parse-all-blobs
11831224
**
1184
-** Usage: %fossil test-parse-all-blobs [--limit N]
1225
+** Usage: %fossil test-parse-all-blobs ?OPTIONS?
11851226
**
11861227
** Parse all entries in the BLOB table that are believed to be non-data
11871228
** artifacts and report any errors. Run this test command on historical
11881229
** repositories after making any changes to the manifest_parse()
11891230
** implementation to confirm that the changes did not break anything.
11901231
**
1191
-** If the --limit N argument is given, parse no more than N blobs
1232
+** Options:
1233
+**
1234
+** --limit N Parse no more than N artifacts before stopping.
1235
+** --wellformed Use all BLOB table entries as input, not just
1236
+** those entries that are believed to be valid
1237
+** artifacts, and verify that the result the
1238
+** manifest_is_well_formed() agrees with the
1239
+** result of manifest_parse().
11921240
*/
11931241
void manifest_test_parse_all_blobs_cmd(void){
11941242
Manifest *p;
11951243
Blob err;
11961244
Stmt q;
11971245
int nTest = 0;
11981246
int nErr = 0;
11991247
int N = 1000000000;
1248
+ int bWellFormed;
12001249
const char *z;
12011250
db_find_and_open_repository(0, 0);
12021251
z = find_option("limit", 0, 1);
12031252
if( z ) N = atoi(z);
1253
+ bWellFormed = find_option("wellformed",0,0)!=0;
12041254
verify_all_options();
1205
- db_prepare(&q, "SELECT DISTINCT objid FROM EVENT");
1255
+ if( bWellFormed ){
1256
+ db_prepare(&q, "SELECT rid FROM blob ORDER BY rid");
1257
+ }else{
1258
+ db_prepare(&q, "SELECT DISTINCT objid FROM EVENT ORDER BY objid");
1259
+ }
12061260
while( (N--)>0 && db_step(&q)==SQLITE_ROW ){
12071261
int id = db_column_int(&q,0);
12081262
fossil_print("Checking %d \r", id);
12091263
nTest++;
12101264
fflush(stdout);
12111265
blob_init(&err, 0, 0);
1212
- p = manifest_get(id, CFTYPE_ANY, &err);
1213
- if( p==0 ){
1214
- fossil_print("%d ERROR: %s\n", id, blob_str(&err));
1215
- nErr++;
1266
+ if( bWellFormed ){
1267
+ Blob content;
1268
+ int isWF;
1269
+ content_get(id, &content);
1270
+ isWF = manifest_is_well_formed(blob_buffer(&content),blob_size(&content));
1271
+ p = manifest_parse(&content, id, &err);
1272
+ if( isWF && p==0 ){
1273
+ fossil_print("%d ERROR: manifest_is_well_formed() reported true "
1274
+ "but manifest_parse() reports an error: %s\n",
1275
+ id, blob_str(&err));
1276
+ nErr++;
1277
+ }else if( !isWF && p!=0 ){
1278
+ fossil_print("%d ERROR: manifest_is_well_formed() reported false "
1279
+ "but manifest_parse() found nothing wrong.\n", id);
1280
+ nErr++;
1281
+ }
1282
+ }else{
1283
+ p = manifest_get(id, CFTYPE_ANY, &err);
1284
+ if( p==0 ){
1285
+ fossil_print("%d ERROR: %s\n", id, blob_str(&err));
1286
+ nErr++;
1287
+ }
12161288
}
12171289
blob_reset(&err);
12181290
manifest_destroy(p);
12191291
}
12201292
db_finalize(&q);
@@ -2060,11 +2132,11 @@
20602132
static const char zExtraLine[] =
20612133
"# Remove this line to create a well-formed manifest.\n";
20622134
20632135
z = zOrig = blob_materialize(p);
20642136
n = nOrig = blob_size(p);
2065
- remove_pgp_signature(&z, &n);
2137
+ remove_pgp_signature((const char **)&z, &n);
20662138
if( z==zOrig ){
20672139
blob_append(p, zExtraLine, -1);
20682140
}else{
20692141
int iEnd;
20702142
Blob copy;
20712143
--- src/manifest.c
+++ src/manifest.c
@@ -289,12 +289,12 @@
289 }
290
291 /*
292 ** Remove the PGP signature from the artifact, if there is one.
293 */
294 static void remove_pgp_signature(char **pz, int *pn){
295 char *z = *pz;
296 int n = *pn;
297 int i;
298 if( strncmp(z, "-----BEGIN PGP SIGNED MESSAGE-----", 34)!=0 ) return;
299 for(i=34; i<n && !after_blank_line(z+i); i++){}
300 if( i>=n ) return;
@@ -469,11 +469,11 @@
469 return 0;
470 }
471
472 /* Strip off the PGP signature if there is one.
473 */
474 remove_pgp_signature(&z, &n);
475
476 /* Verify that the first few characters of the artifact look like
477 ** a control artifact.
478 */
479 if( n<10 || z[0]<'A' || z[0]>'Z' || z[1]!=' ' ){
@@ -1142,10 +1142,51 @@
1142 if( p==0 ){
1143 fossil_fatal("cannot parse manifest for check-in: %s", zName);
1144 }
1145 return p;
1146 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1147
1148 /*
1149 ** COMMAND: test-parse-manifest
1150 **
1151 ** Usage: %fossil test-parse-manifest FILENAME ?N?
@@ -1156,11 +1197,11 @@
1156 void manifest_test_parse_cmd(void){
1157 Manifest *p;
1158 Blob b;
1159 int i;
1160 int n = 1;
1161 db_find_and_open_repository(0,0);
1162 verify_all_options();
1163 if( g.argc!=3 && g.argc!=4 ){
1164 usage("FILENAME");
1165 }
1166 blob_read_from_file(&b, g.argv[2], ExtFILE);
@@ -1179,42 +1220,73 @@
1179 }
1180
1181 /*
1182 ** COMMAND: test-parse-all-blobs
1183 **
1184 ** Usage: %fossil test-parse-all-blobs [--limit N]
1185 **
1186 ** Parse all entries in the BLOB table that are believed to be non-data
1187 ** artifacts and report any errors. Run this test command on historical
1188 ** repositories after making any changes to the manifest_parse()
1189 ** implementation to confirm that the changes did not break anything.
1190 **
1191 ** If the --limit N argument is given, parse no more than N blobs
 
 
 
 
 
 
 
1192 */
1193 void manifest_test_parse_all_blobs_cmd(void){
1194 Manifest *p;
1195 Blob err;
1196 Stmt q;
1197 int nTest = 0;
1198 int nErr = 0;
1199 int N = 1000000000;
 
1200 const char *z;
1201 db_find_and_open_repository(0, 0);
1202 z = find_option("limit", 0, 1);
1203 if( z ) N = atoi(z);
 
1204 verify_all_options();
1205 db_prepare(&q, "SELECT DISTINCT objid FROM EVENT");
 
 
 
 
1206 while( (N--)>0 && db_step(&q)==SQLITE_ROW ){
1207 int id = db_column_int(&q,0);
1208 fossil_print("Checking %d \r", id);
1209 nTest++;
1210 fflush(stdout);
1211 blob_init(&err, 0, 0);
1212 p = manifest_get(id, CFTYPE_ANY, &err);
1213 if( p==0 ){
1214 fossil_print("%d ERROR: %s\n", id, blob_str(&err));
1215 nErr++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1216 }
1217 blob_reset(&err);
1218 manifest_destroy(p);
1219 }
1220 db_finalize(&q);
@@ -2060,11 +2132,11 @@
2060 static const char zExtraLine[] =
2061 "# Remove this line to create a well-formed manifest.\n";
2062
2063 z = zOrig = blob_materialize(p);
2064 n = nOrig = blob_size(p);
2065 remove_pgp_signature(&z, &n);
2066 if( z==zOrig ){
2067 blob_append(p, zExtraLine, -1);
2068 }else{
2069 int iEnd;
2070 Blob copy;
2071
--- src/manifest.c
+++ src/manifest.c
@@ -289,12 +289,12 @@
289 }
290
291 /*
292 ** Remove the PGP signature from the artifact, if there is one.
293 */
294 static void remove_pgp_signature(const char **pz, int *pn){
295 const char *z = *pz;
296 int n = *pn;
297 int i;
298 if( strncmp(z, "-----BEGIN PGP SIGNED MESSAGE-----", 34)!=0 ) return;
299 for(i=34; i<n && !after_blank_line(z+i); i++){}
300 if( i>=n ) return;
@@ -469,11 +469,11 @@
469 return 0;
470 }
471
472 /* Strip off the PGP signature if there is one.
473 */
474 remove_pgp_signature((const char**)&z, &n);
475
476 /* Verify that the first few characters of the artifact look like
477 ** a control artifact.
478 */
479 if( n<10 || z[0]<'A' || z[0]>'Z' || z[1]!=' ' ){
@@ -1142,10 +1142,51 @@
1142 if( p==0 ){
1143 fossil_fatal("cannot parse manifest for check-in: %s", zName);
1144 }
1145 return p;
1146 }
1147
1148 /*
1149 ** The input blob is text that may or may not be a valid Fossil
1150 ** control artifact of some kind. This routine returns true if
1151 ** the input is a well-formed control artifact and false if it
1152 ** is not.
1153 **
1154 ** This routine is optimized to return false quickly and with minimal
1155 ** work in the common case where the input is some random file.
1156 */
1157 int manifest_is_well_formed(const char *zIn, int nIn){
1158 int i;
1159 int iRes;
1160 Manifest *pManifest;
1161 Blob copy, errmsg;
1162 remove_pgp_signature(&zIn, &nIn);
1163
1164 /* Check to see that the file begins with a "card" */
1165 if( nIn<3 ) return 0;
1166 if( zIn[0]<'A' || zIn[0]>'M' || zIn[1]!=' ' ) return 0;
1167
1168 /* Check to see that the first card is followed by one more card */
1169 for(i=2; i<nIn && zIn[i]!='\n'; i++){}
1170 if( i>=nIn-3 ) return 0;
1171 i++;
1172 if( !fossil_isupper(zIn[i]) || zIn[i]<zIn[0] || zIn[i+1]!=' ' ) return 0;
1173
1174 /* The checks above will eliminate most random inputs. If these
1175 ** quick checks pass, then we could be dealing with a well-formed
1176 ** control artifact. Make a copy, and run it through the official
1177 ** artifact parser. This is the slow path, but it is rarely taken.
1178 */
1179 blob_init(&copy, 0, 0);
1180 blob_init(&errmsg, 0, 0);
1181 blob_append(&copy, zIn, nIn);
1182 pManifest = manifest_parse(&copy, 0, &errmsg);
1183 iRes = pManifest!=0;
1184 manifest_destroy(pManifest);
1185 blob_reset(&errmsg);
1186 return iRes;
1187 }
1188
1189 /*
1190 ** COMMAND: test-parse-manifest
1191 **
1192 ** Usage: %fossil test-parse-manifest FILENAME ?N?
@@ -1156,11 +1197,11 @@
1197 void manifest_test_parse_cmd(void){
1198 Manifest *p;
1199 Blob b;
1200 int i;
1201 int n = 1;
1202 db_find_and_open_repository(OPEN_SUBSTITUTE|OPEN_OK_NOT_FOUND,0);
1203 verify_all_options();
1204 if( g.argc!=3 && g.argc!=4 ){
1205 usage("FILENAME");
1206 }
1207 blob_read_from_file(&b, g.argv[2], ExtFILE);
@@ -1179,42 +1220,73 @@
1220 }
1221
1222 /*
1223 ** COMMAND: test-parse-all-blobs
1224 **
1225 ** Usage: %fossil test-parse-all-blobs ?OPTIONS?
1226 **
1227 ** Parse all entries in the BLOB table that are believed to be non-data
1228 ** artifacts and report any errors. Run this test command on historical
1229 ** repositories after making any changes to the manifest_parse()
1230 ** implementation to confirm that the changes did not break anything.
1231 **
1232 ** Options:
1233 **
1234 ** --limit N Parse no more than N artifacts before stopping.
1235 ** --wellformed Use all BLOB table entries as input, not just
1236 ** those entries that are believed to be valid
1237 ** artifacts, and verify that the result the
1238 ** manifest_is_well_formed() agrees with the
1239 ** result of manifest_parse().
1240 */
1241 void manifest_test_parse_all_blobs_cmd(void){
1242 Manifest *p;
1243 Blob err;
1244 Stmt q;
1245 int nTest = 0;
1246 int nErr = 0;
1247 int N = 1000000000;
1248 int bWellFormed;
1249 const char *z;
1250 db_find_and_open_repository(0, 0);
1251 z = find_option("limit", 0, 1);
1252 if( z ) N = atoi(z);
1253 bWellFormed = find_option("wellformed",0,0)!=0;
1254 verify_all_options();
1255 if( bWellFormed ){
1256 db_prepare(&q, "SELECT rid FROM blob ORDER BY rid");
1257 }else{
1258 db_prepare(&q, "SELECT DISTINCT objid FROM EVENT ORDER BY objid");
1259 }
1260 while( (N--)>0 && db_step(&q)==SQLITE_ROW ){
1261 int id = db_column_int(&q,0);
1262 fossil_print("Checking %d \r", id);
1263 nTest++;
1264 fflush(stdout);
1265 blob_init(&err, 0, 0);
1266 if( bWellFormed ){
1267 Blob content;
1268 int isWF;
1269 content_get(id, &content);
1270 isWF = manifest_is_well_formed(blob_buffer(&content),blob_size(&content));
1271 p = manifest_parse(&content, id, &err);
1272 if( isWF && p==0 ){
1273 fossil_print("%d ERROR: manifest_is_well_formed() reported true "
1274 "but manifest_parse() reports an error: %s\n",
1275 id, blob_str(&err));
1276 nErr++;
1277 }else if( !isWF && p!=0 ){
1278 fossil_print("%d ERROR: manifest_is_well_formed() reported false "
1279 "but manifest_parse() found nothing wrong.\n", id);
1280 nErr++;
1281 }
1282 }else{
1283 p = manifest_get(id, CFTYPE_ANY, &err);
1284 if( p==0 ){
1285 fossil_print("%d ERROR: %s\n", id, blob_str(&err));
1286 nErr++;
1287 }
1288 }
1289 blob_reset(&err);
1290 manifest_destroy(p);
1291 }
1292 db_finalize(&q);
@@ -2060,11 +2132,11 @@
2132 static const char zExtraLine[] =
2133 "# Remove this line to create a well-formed manifest.\n";
2134
2135 z = zOrig = blob_materialize(p);
2136 n = nOrig = blob_size(p);
2137 remove_pgp_signature((const char **)&z, &n);
2138 if( z==zOrig ){
2139 blob_append(p, zExtraLine, -1);
2140 }else{
2141 int iEnd;
2142 Blob copy;
2143

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button