Fossil SCM
Another incremental check-in on the email format decoder.
Commit
ec3fccd3b236135262ab9dcfb04a59e4cf95c8a638e84a1652d893d483152457
Parent
11c82d816969039…
1 file changed
+92
-58
+92
-58
| --- src/webmail.c | ||
| +++ src/webmail.c | ||
| @@ -39,21 +39,21 @@ | ||
| 39 | 39 | */ |
| 40 | 40 | struct EmailBody { |
| 41 | 41 | char zMimetype[32]; /* Mimetype */ |
| 42 | 42 | u8 encoding; /* Type of encoding */ |
| 43 | 43 | char *zFilename; /* From content-disposition: */ |
| 44 | - Blob content; /* Encoded content for this segment */ | |
| 44 | + char *zContent; /* Content. \0 terminator inserted */ | |
| 45 | 45 | }; |
| 46 | 46 | |
| 47 | 47 | /* |
| 48 | 48 | ** An instance of the following object describes the struture of |
| 49 | 49 | ** an rfc-2822 email message. |
| 50 | 50 | */ |
| 51 | 51 | struct EmailToc { |
| 52 | 52 | int nHdr; /* Number of header lines */ |
| 53 | 53 | int nHdrAlloc; /* Number of header lines allocated */ |
| 54 | - int *aHdr; /* Two integers for each hdr line, offset and length */ | |
| 54 | + char **azHdr; /* Pointer to header line. \0 terminator inserted */ | |
| 55 | 55 | int nBody; /* Number of body segments */ |
| 56 | 56 | int nBodyAlloc; /* Number of body segments allocated */ |
| 57 | 57 | EmailBody *aBody; /* Location of body information */ |
| 58 | 58 | }; |
| 59 | 59 | #endif |
| @@ -61,14 +61,13 @@ | ||
| 61 | 61 | /* |
| 62 | 62 | ** Free An EmailToc object |
| 63 | 63 | */ |
| 64 | 64 | void emailtoc_free(EmailToc *p){ |
| 65 | 65 | int i; |
| 66 | - fossil_free(p->aHdr); | |
| 66 | + fossil_free(p->azHdr); | |
| 67 | 67 | for(i=0; i<p->nBody; i++){ |
| 68 | 68 | fossil_free(p->aBody[i].zFilename); |
| 69 | - blob_reset(&p->aBody[i].content); | |
| 70 | 69 | } |
| 71 | 70 | fossil_free(p->aBody); |
| 72 | 71 | fossil_free(p); |
| 73 | 72 | } |
| 74 | 73 | |
| @@ -91,25 +90,23 @@ | ||
| 91 | 90 | p->nBodyAlloc = (p->nBodyAlloc+1)*2; |
| 92 | 91 | p->aBody = fossil_realloc(p->aBody, sizeof(p->aBody[0])*p->nBodyAlloc); |
| 93 | 92 | } |
| 94 | 93 | pNew = &p->aBody[p->nBody-1]; |
| 95 | 94 | memset(pNew, 0, sizeof(*pNew)); |
| 96 | - pNew->content = empty_blob; | |
| 97 | 95 | return pNew; |
| 98 | 96 | } |
| 99 | 97 | |
| 100 | 98 | /* |
| 101 | 99 | ** Add a new header line to the EmailToc. |
| 102 | 100 | */ |
| 103 | -void emailtoc_new_header_line(EmailToc *p, int iOfst, int nAmt){ | |
| 101 | +void emailtoc_new_header_line(EmailToc *p, char *z){ | |
| 104 | 102 | p->nHdr++; |
| 105 | 103 | if( p->nHdr>p->nHdrAlloc ){ |
| 106 | 104 | p->nHdrAlloc = (p->nHdrAlloc+1)*2; |
| 107 | - p->aHdr = fossil_realloc(p->aHdr, sizeof(int)*2*p->nHdrAlloc); | |
| 105 | + p->azHdr = fossil_realloc(p->azHdr, sizeof(p->azHdr[0])*p->nHdrAlloc); | |
| 108 | 106 | } |
| 109 | - p->aHdr[p->nHdr*2-2] = iOfst; | |
| 110 | - p->aHdr[p->nHdr*2-1] = nAmt; | |
| 107 | + p->azHdr[p->nHdr-1] = z; | |
| 111 | 108 | } |
| 112 | 109 | |
| 113 | 110 | /* |
| 114 | 111 | ** Return the length of a line in an email header. Continuation lines |
| 115 | 112 | ** are included. Hence, this routine returns the number of bytes up to |
| @@ -124,58 +121,45 @@ | ||
| 124 | 121 | } |
| 125 | 122 | |
| 126 | 123 | /* |
| 127 | 124 | ** Return a pointer to the first non-whitespace character in z |
| 128 | 125 | */ |
| 129 | -static const char *firstToken(const char *z, int n){ | |
| 130 | - while( n>0 && fossil_isspace(*z) ){ | |
| 131 | - n--; | |
| 132 | - z++; | |
| 133 | - } | |
| 134 | - return n>0 ? z : 0; | |
| 135 | -} | |
| 136 | - | |
| 137 | -/* | |
| 138 | -** The n-bytes of content in z are a multipart/ body component for | |
| 139 | -** an email message. Decode this into its individual segments. | |
| 140 | -** | |
| 141 | -** The component should start and end with a boundary line. There | |
| 142 | -** may be additional boundary lines in the middle. | |
| 143 | -*/ | |
| 144 | -static void emailtoc_add_multipart( | |
| 145 | - EmailToc *p, /* Append the segments here */ | |
| 146 | - Blob *pEmail, /* The original full email raw text */ | |
| 147 | - const char *z, /* The body component */ | |
| 148 | - int n /* Bytes of content in z[] */ | |
| 149 | -){ | |
| 150 | - return; | |
| 151 | -} | |
| 152 | - | |
| 153 | - | |
| 154 | -/* | |
| 155 | -** Compute a table-of-contents (EmailToc) for the email message | |
| 156 | -** provided on the input. | |
| 157 | -*/ | |
| 158 | -EmailToc *emailtoc_from_email(Blob *pEmail){ | |
| 159 | - const char *z; | |
| 160 | - int i; | |
| 161 | - int n; | |
| 162 | - int multipartBody = 0; | |
| 163 | - EmailToc *p = emailtoc_alloc(); | |
| 164 | - EmailBody *pBody = emailtoc_new_body(p); | |
| 165 | - blob_terminate(pEmail); | |
| 166 | - z = blob_buffer(pEmail); | |
| 167 | - i = 0; | |
| 126 | +static const char *firstToken(const char *z){ | |
| 127 | + while( fossil_isspace(*z) ){ | |
| 128 | + z++; | |
| 129 | + } | |
| 130 | + return z; | |
| 131 | +} | |
| 132 | + | |
| 133 | +/* | |
| 134 | +** The n-bytes of content in z is a single multipart mime segment | |
| 135 | +** with its own header and body. Decode this one segment and add it to p; | |
| 136 | +** | |
| 137 | +** Rows of the header of the segment are added to p if bAddHeader is | |
| 138 | +** true. | |
| 139 | +*/ | |
| 140 | +LOCAL void emailtoc_add_multipart_segment( | |
| 141 | + EmailToc *p, /* Append the segments here */ | |
| 142 | + char *z, /* The body component */ | |
| 143 | + int bAddHeader /* True to add header lines to p */ | |
| 144 | +){ | |
| 145 | + int i, j; | |
| 146 | + int n; | |
| 147 | + int multipartBody = 0; | |
| 148 | + EmailBody *pBody = emailtoc_new_body(p); | |
| 149 | + i = 0; | |
| 168 | 150 | while( z[i] ){ |
| 169 | 151 | n = email_line_length(&z[i]); |
| 170 | 152 | if( (n==2 && z[i]=='\r' && z[i+1]=='\n') || z[i]=='\n' || n==0 ){ |
| 171 | 153 | /* This is the blank line at the end of the header */ |
| 172 | 154 | i += n; |
| 173 | 155 | break; |
| 174 | 156 | } |
| 157 | + for(j=i+n; j>i && fossil_isspace(z[j-1]); j--){} | |
| 158 | + z[j] = 0; | |
| 175 | 159 | if( sqlite3_strnicmp(z+i, "Content-Type:", 13)==0 ){ |
| 176 | - const char *z2 = firstToken(z+i+13, n-13); | |
| 160 | + const char *z2 = firstToken(z+i+13); | |
| 177 | 161 | if( z2 && strncmp(z2, "multipart/", 10)==0 ){ |
| 178 | 162 | multipartBody = 1; |
| 179 | 163 | }else{ |
| 180 | 164 | int j; |
| 181 | 165 | for(j=0; z2[j]=='/' || fossil_isalnum(z2[j]); j++){} |
| @@ -184,30 +168,82 @@ | ||
| 184 | 168 | pBody->zMimetype[j] = 0; |
| 185 | 169 | } |
| 186 | 170 | } |
| 187 | 171 | /* 123456789 123456789 123456 */ |
| 188 | 172 | if( sqlite3_strnicmp(z+i, "Content-Transfer-Encoding:", 26)==0 ){ |
| 189 | - const char *z2 = firstToken(z+(i+26), n-26); | |
| 173 | + const char *z2 = firstToken(z+(i+26)); | |
| 190 | 174 | if( z2 && sqlite3_strnicmp(z2, "base64", 6)==0 ){ |
| 191 | 175 | pBody->encoding = EMAILENC_B64; |
| 192 | 176 | /* 123456789 123456 */ |
| 193 | 177 | }else if( sqlite3_strnicmp(z2, "quoted-printable", 16)==0 ){ |
| 194 | 178 | pBody->encoding = EMAILENC_QUOTED; |
| 195 | 179 | }else{ |
| 196 | 180 | pBody->encoding = EMAILENC_NONE; |
| 197 | 181 | } |
| 198 | 182 | } |
| 199 | - emailtoc_new_header_line(p, i, n); | |
| 183 | + if( bAddHeader ) emailtoc_new_header_line(p, z+i); | |
| 200 | 184 | i += n; |
| 201 | 185 | } |
| 202 | - n = blob_size(pEmail) - i; | |
| 203 | 186 | if( multipartBody ){ |
| 204 | 187 | p->nBody--; |
| 205 | - emailtoc_add_multipart(p, pEmail, z+i, n); | |
| 188 | + emailtoc_add_multipart(p, z+i); | |
| 206 | 189 | }else{ |
| 207 | - blob_init(&pBody->content, z+i, n); | |
| 190 | + pBody->zContent = z+i; | |
| 191 | + } | |
| 192 | +} | |
| 193 | + | |
| 194 | +/* | |
| 195 | +** The n-bytes of content in z are a multipart/ body component for | |
| 196 | +** an email message. Decode this into its individual segments. | |
| 197 | +** | |
| 198 | +** The component should start and end with a boundary line. There | |
| 199 | +** may be additional boundary lines in the middle. | |
| 200 | +*/ | |
| 201 | +LOCAL void emailtoc_add_multipart( | |
| 202 | + EmailToc *p, /* Append the segments here */ | |
| 203 | + char *z /* The body component. zero-terminated */ | |
| 204 | +){ | |
| 205 | + int nB; /* Size of the boundary string */ | |
| 206 | + int iStart; /* Start of the coding region past boundary mark */ | |
| 207 | + int i; /* Loop index */ | |
| 208 | + char *zBoundary = 0; /* Boundary marker */ | |
| 209 | + | |
| 210 | + /* Find the length of the boundary mark. */ | |
| 211 | + while( fossil_isspace(z[0]) ) z++; | |
| 212 | + zBoundary = z; | |
| 213 | + for(nB=0; z[nB] && !fossil_isspace(z[nB]); nB++){} | |
| 214 | + if( nB==0 ) return; | |
| 215 | + z += nB; | |
| 216 | + while( fossil_isspace(z[0]) ) z++; | |
| 217 | + zBoundary[nB] = 0; | |
| 218 | + for(i=iStart=0; z[i]; i++){ | |
| 219 | + if( z[i]=='\n' && strncmp(z+i+1, zBoundary, nB)==0 ){ | |
| 220 | + z[i+1] = 0; | |
| 221 | + emailtoc_add_multipart_segment(p, z+iStart, 0); | |
| 222 | + iStart = i+nB; | |
| 223 | + if( z[iStart]=='-' && z[iStart+1]=='-' ) return; | |
| 224 | + while( fossil_isspace(z[iStart]) ) iStart++; | |
| 225 | + i = iStart; | |
| 226 | + } | |
| 208 | 227 | } |
| 228 | +} | |
| 229 | + | |
| 230 | + | |
| 231 | +/* | |
| 232 | +** Compute a table-of-contents (EmailToc) for the email message | |
| 233 | +** provided on the input. | |
| 234 | +** | |
| 235 | +** This routine will cause pEmail to become zero-terminated if it is | |
| 236 | +** not already. It will also insert zero characters into parts of | |
| 237 | +** the message, to delimit the various components. | |
| 238 | +*/ | |
| 239 | +EmailToc *emailtoc_from_email(Blob *pEmail){ | |
| 240 | + char *z; | |
| 241 | + EmailToc *p = emailtoc_alloc(); | |
| 242 | + blob_terminate(pEmail); | |
| 243 | + z = blob_buffer(pEmail); | |
| 244 | + emailtoc_add_multipart_segment(p, z, 1); | |
| 209 | 245 | return p; |
| 210 | 246 | } |
| 211 | 247 | |
| 212 | 248 | /* |
| 213 | 249 | ** COMMAND: test-decode-email |
| @@ -219,25 +255,23 @@ | ||
| 219 | 255 | */ |
| 220 | 256 | void test_email_decode_cmd(void){ |
| 221 | 257 | Blob email; |
| 222 | 258 | EmailToc *p; |
| 223 | 259 | int i; |
| 224 | - const char *z; | |
| 225 | 260 | verify_all_options(); |
| 226 | 261 | if( g.argc!=3 ) usage("FILE"); |
| 227 | 262 | blob_read_from_file(&email, g.argv[2], ExtFILE); |
| 228 | 263 | p = emailtoc_from_email(&email); |
| 229 | - z = blob_buffer(&email); | |
| 230 | 264 | fossil_print("%d header line and %d content segments\n", |
| 231 | 265 | p->nHdr, p->nBody); |
| 232 | 266 | for(i=0; i<p->nHdr; i++){ |
| 233 | - fossil_print("%3d: %.*s", i, p->aHdr[i*2+1], z+p->aHdr[i*2]); | |
| 267 | + fossil_print("%3d: %s\n", i, p->azHdr[i]); | |
| 234 | 268 | } |
| 235 | 269 | for(i=0; i<p->nBody; i++){ |
| 236 | 270 | fossil_print("\nBODY %d mime \"%s\" encoding %d:\n", |
| 237 | 271 | i, p->aBody[i].zMimetype, p->aBody[i].encoding); |
| 238 | - fossil_print("%s\n", blob_str(&p->aBody[i].content)); | |
| 272 | + fossil_print("%s\n", p->aBody[i].zContent); | |
| 239 | 273 | } |
| 240 | 274 | emailtoc_free(p); |
| 241 | 275 | blob_reset(&email); |
| 242 | 276 | } |
| 243 | 277 | |
| 244 | 278 |
| --- src/webmail.c | |
| +++ src/webmail.c | |
| @@ -39,21 +39,21 @@ | |
| 39 | */ |
| 40 | struct EmailBody { |
| 41 | char zMimetype[32]; /* Mimetype */ |
| 42 | u8 encoding; /* Type of encoding */ |
| 43 | char *zFilename; /* From content-disposition: */ |
| 44 | Blob content; /* Encoded content for this segment */ |
| 45 | }; |
| 46 | |
| 47 | /* |
| 48 | ** An instance of the following object describes the struture of |
| 49 | ** an rfc-2822 email message. |
| 50 | */ |
| 51 | struct EmailToc { |
| 52 | int nHdr; /* Number of header lines */ |
| 53 | int nHdrAlloc; /* Number of header lines allocated */ |
| 54 | int *aHdr; /* Two integers for each hdr line, offset and length */ |
| 55 | int nBody; /* Number of body segments */ |
| 56 | int nBodyAlloc; /* Number of body segments allocated */ |
| 57 | EmailBody *aBody; /* Location of body information */ |
| 58 | }; |
| 59 | #endif |
| @@ -61,14 +61,13 @@ | |
| 61 | /* |
| 62 | ** Free An EmailToc object |
| 63 | */ |
| 64 | void emailtoc_free(EmailToc *p){ |
| 65 | int i; |
| 66 | fossil_free(p->aHdr); |
| 67 | for(i=0; i<p->nBody; i++){ |
| 68 | fossil_free(p->aBody[i].zFilename); |
| 69 | blob_reset(&p->aBody[i].content); |
| 70 | } |
| 71 | fossil_free(p->aBody); |
| 72 | fossil_free(p); |
| 73 | } |
| 74 | |
| @@ -91,25 +90,23 @@ | |
| 91 | p->nBodyAlloc = (p->nBodyAlloc+1)*2; |
| 92 | p->aBody = fossil_realloc(p->aBody, sizeof(p->aBody[0])*p->nBodyAlloc); |
| 93 | } |
| 94 | pNew = &p->aBody[p->nBody-1]; |
| 95 | memset(pNew, 0, sizeof(*pNew)); |
| 96 | pNew->content = empty_blob; |
| 97 | return pNew; |
| 98 | } |
| 99 | |
| 100 | /* |
| 101 | ** Add a new header line to the EmailToc. |
| 102 | */ |
| 103 | void emailtoc_new_header_line(EmailToc *p, int iOfst, int nAmt){ |
| 104 | p->nHdr++; |
| 105 | if( p->nHdr>p->nHdrAlloc ){ |
| 106 | p->nHdrAlloc = (p->nHdrAlloc+1)*2; |
| 107 | p->aHdr = fossil_realloc(p->aHdr, sizeof(int)*2*p->nHdrAlloc); |
| 108 | } |
| 109 | p->aHdr[p->nHdr*2-2] = iOfst; |
| 110 | p->aHdr[p->nHdr*2-1] = nAmt; |
| 111 | } |
| 112 | |
| 113 | /* |
| 114 | ** Return the length of a line in an email header. Continuation lines |
| 115 | ** are included. Hence, this routine returns the number of bytes up to |
| @@ -124,58 +121,45 @@ | |
| 124 | } |
| 125 | |
| 126 | /* |
| 127 | ** Return a pointer to the first non-whitespace character in z |
| 128 | */ |
| 129 | static const char *firstToken(const char *z, int n){ |
| 130 | while( n>0 && fossil_isspace(*z) ){ |
| 131 | n--; |
| 132 | z++; |
| 133 | } |
| 134 | return n>0 ? z : 0; |
| 135 | } |
| 136 | |
| 137 | /* |
| 138 | ** The n-bytes of content in z are a multipart/ body component for |
| 139 | ** an email message. Decode this into its individual segments. |
| 140 | ** |
| 141 | ** The component should start and end with a boundary line. There |
| 142 | ** may be additional boundary lines in the middle. |
| 143 | */ |
| 144 | static void emailtoc_add_multipart( |
| 145 | EmailToc *p, /* Append the segments here */ |
| 146 | Blob *pEmail, /* The original full email raw text */ |
| 147 | const char *z, /* The body component */ |
| 148 | int n /* Bytes of content in z[] */ |
| 149 | ){ |
| 150 | return; |
| 151 | } |
| 152 | |
| 153 | |
| 154 | /* |
| 155 | ** Compute a table-of-contents (EmailToc) for the email message |
| 156 | ** provided on the input. |
| 157 | */ |
| 158 | EmailToc *emailtoc_from_email(Blob *pEmail){ |
| 159 | const char *z; |
| 160 | int i; |
| 161 | int n; |
| 162 | int multipartBody = 0; |
| 163 | EmailToc *p = emailtoc_alloc(); |
| 164 | EmailBody *pBody = emailtoc_new_body(p); |
| 165 | blob_terminate(pEmail); |
| 166 | z = blob_buffer(pEmail); |
| 167 | i = 0; |
| 168 | while( z[i] ){ |
| 169 | n = email_line_length(&z[i]); |
| 170 | if( (n==2 && z[i]=='\r' && z[i+1]=='\n') || z[i]=='\n' || n==0 ){ |
| 171 | /* This is the blank line at the end of the header */ |
| 172 | i += n; |
| 173 | break; |
| 174 | } |
| 175 | if( sqlite3_strnicmp(z+i, "Content-Type:", 13)==0 ){ |
| 176 | const char *z2 = firstToken(z+i+13, n-13); |
| 177 | if( z2 && strncmp(z2, "multipart/", 10)==0 ){ |
| 178 | multipartBody = 1; |
| 179 | }else{ |
| 180 | int j; |
| 181 | for(j=0; z2[j]=='/' || fossil_isalnum(z2[j]); j++){} |
| @@ -184,30 +168,82 @@ | |
| 184 | pBody->zMimetype[j] = 0; |
| 185 | } |
| 186 | } |
| 187 | /* 123456789 123456789 123456 */ |
| 188 | if( sqlite3_strnicmp(z+i, "Content-Transfer-Encoding:", 26)==0 ){ |
| 189 | const char *z2 = firstToken(z+(i+26), n-26); |
| 190 | if( z2 && sqlite3_strnicmp(z2, "base64", 6)==0 ){ |
| 191 | pBody->encoding = EMAILENC_B64; |
| 192 | /* 123456789 123456 */ |
| 193 | }else if( sqlite3_strnicmp(z2, "quoted-printable", 16)==0 ){ |
| 194 | pBody->encoding = EMAILENC_QUOTED; |
| 195 | }else{ |
| 196 | pBody->encoding = EMAILENC_NONE; |
| 197 | } |
| 198 | } |
| 199 | emailtoc_new_header_line(p, i, n); |
| 200 | i += n; |
| 201 | } |
| 202 | n = blob_size(pEmail) - i; |
| 203 | if( multipartBody ){ |
| 204 | p->nBody--; |
| 205 | emailtoc_add_multipart(p, pEmail, z+i, n); |
| 206 | }else{ |
| 207 | blob_init(&pBody->content, z+i, n); |
| 208 | } |
| 209 | return p; |
| 210 | } |
| 211 | |
| 212 | /* |
| 213 | ** COMMAND: test-decode-email |
| @@ -219,25 +255,23 @@ | |
| 219 | */ |
| 220 | void test_email_decode_cmd(void){ |
| 221 | Blob email; |
| 222 | EmailToc *p; |
| 223 | int i; |
| 224 | const char *z; |
| 225 | verify_all_options(); |
| 226 | if( g.argc!=3 ) usage("FILE"); |
| 227 | blob_read_from_file(&email, g.argv[2], ExtFILE); |
| 228 | p = emailtoc_from_email(&email); |
| 229 | z = blob_buffer(&email); |
| 230 | fossil_print("%d header line and %d content segments\n", |
| 231 | p->nHdr, p->nBody); |
| 232 | for(i=0; i<p->nHdr; i++){ |
| 233 | fossil_print("%3d: %.*s", i, p->aHdr[i*2+1], z+p->aHdr[i*2]); |
| 234 | } |
| 235 | for(i=0; i<p->nBody; i++){ |
| 236 | fossil_print("\nBODY %d mime \"%s\" encoding %d:\n", |
| 237 | i, p->aBody[i].zMimetype, p->aBody[i].encoding); |
| 238 | fossil_print("%s\n", blob_str(&p->aBody[i].content)); |
| 239 | } |
| 240 | emailtoc_free(p); |
| 241 | blob_reset(&email); |
| 242 | } |
| 243 | |
| 244 |
| --- src/webmail.c | |
| +++ src/webmail.c | |
| @@ -39,21 +39,21 @@ | |
| 39 | */ |
| 40 | struct EmailBody { |
| 41 | char zMimetype[32]; /* Mimetype */ |
| 42 | u8 encoding; /* Type of encoding */ |
| 43 | char *zFilename; /* From content-disposition: */ |
| 44 | char *zContent; /* Content. \0 terminator inserted */ |
| 45 | }; |
| 46 | |
| 47 | /* |
| 48 | ** An instance of the following object describes the struture of |
| 49 | ** an rfc-2822 email message. |
| 50 | */ |
| 51 | struct EmailToc { |
| 52 | int nHdr; /* Number of header lines */ |
| 53 | int nHdrAlloc; /* Number of header lines allocated */ |
| 54 | char **azHdr; /* Pointer to header line. \0 terminator inserted */ |
| 55 | int nBody; /* Number of body segments */ |
| 56 | int nBodyAlloc; /* Number of body segments allocated */ |
| 57 | EmailBody *aBody; /* Location of body information */ |
| 58 | }; |
| 59 | #endif |
| @@ -61,14 +61,13 @@ | |
| 61 | /* |
| 62 | ** Free An EmailToc object |
| 63 | */ |
| 64 | void emailtoc_free(EmailToc *p){ |
| 65 | int i; |
| 66 | fossil_free(p->azHdr); |
| 67 | for(i=0; i<p->nBody; i++){ |
| 68 | fossil_free(p->aBody[i].zFilename); |
| 69 | } |
| 70 | fossil_free(p->aBody); |
| 71 | fossil_free(p); |
| 72 | } |
| 73 | |
| @@ -91,25 +90,23 @@ | |
| 90 | p->nBodyAlloc = (p->nBodyAlloc+1)*2; |
| 91 | p->aBody = fossil_realloc(p->aBody, sizeof(p->aBody[0])*p->nBodyAlloc); |
| 92 | } |
| 93 | pNew = &p->aBody[p->nBody-1]; |
| 94 | memset(pNew, 0, sizeof(*pNew)); |
| 95 | return pNew; |
| 96 | } |
| 97 | |
| 98 | /* |
| 99 | ** Add a new header line to the EmailToc. |
| 100 | */ |
| 101 | void emailtoc_new_header_line(EmailToc *p, char *z){ |
| 102 | p->nHdr++; |
| 103 | if( p->nHdr>p->nHdrAlloc ){ |
| 104 | p->nHdrAlloc = (p->nHdrAlloc+1)*2; |
| 105 | p->azHdr = fossil_realloc(p->azHdr, sizeof(p->azHdr[0])*p->nHdrAlloc); |
| 106 | } |
| 107 | p->azHdr[p->nHdr-1] = z; |
| 108 | } |
| 109 | |
| 110 | /* |
| 111 | ** Return the length of a line in an email header. Continuation lines |
| 112 | ** are included. Hence, this routine returns the number of bytes up to |
| @@ -124,58 +121,45 @@ | |
| 121 | } |
| 122 | |
| 123 | /* |
| 124 | ** Return a pointer to the first non-whitespace character in z |
| 125 | */ |
| 126 | static const char *firstToken(const char *z){ |
| 127 | while( fossil_isspace(*z) ){ |
| 128 | z++; |
| 129 | } |
| 130 | return z; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | ** The n-bytes of content in z is a single multipart mime segment |
| 135 | ** with its own header and body. Decode this one segment and add it to p; |
| 136 | ** |
| 137 | ** Rows of the header of the segment are added to p if bAddHeader is |
| 138 | ** true. |
| 139 | */ |
| 140 | LOCAL void emailtoc_add_multipart_segment( |
| 141 | EmailToc *p, /* Append the segments here */ |
| 142 | char *z, /* The body component */ |
| 143 | int bAddHeader /* True to add header lines to p */ |
| 144 | ){ |
| 145 | int i, j; |
| 146 | int n; |
| 147 | int multipartBody = 0; |
| 148 | EmailBody *pBody = emailtoc_new_body(p); |
| 149 | i = 0; |
| 150 | while( z[i] ){ |
| 151 | n = email_line_length(&z[i]); |
| 152 | if( (n==2 && z[i]=='\r' && z[i+1]=='\n') || z[i]=='\n' || n==0 ){ |
| 153 | /* This is the blank line at the end of the header */ |
| 154 | i += n; |
| 155 | break; |
| 156 | } |
| 157 | for(j=i+n; j>i && fossil_isspace(z[j-1]); j--){} |
| 158 | z[j] = 0; |
| 159 | if( sqlite3_strnicmp(z+i, "Content-Type:", 13)==0 ){ |
| 160 | const char *z2 = firstToken(z+i+13); |
| 161 | if( z2 && strncmp(z2, "multipart/", 10)==0 ){ |
| 162 | multipartBody = 1; |
| 163 | }else{ |
| 164 | int j; |
| 165 | for(j=0; z2[j]=='/' || fossil_isalnum(z2[j]); j++){} |
| @@ -184,30 +168,82 @@ | |
| 168 | pBody->zMimetype[j] = 0; |
| 169 | } |
| 170 | } |
| 171 | /* 123456789 123456789 123456 */ |
| 172 | if( sqlite3_strnicmp(z+i, "Content-Transfer-Encoding:", 26)==0 ){ |
| 173 | const char *z2 = firstToken(z+(i+26)); |
| 174 | if( z2 && sqlite3_strnicmp(z2, "base64", 6)==0 ){ |
| 175 | pBody->encoding = EMAILENC_B64; |
| 176 | /* 123456789 123456 */ |
| 177 | }else if( sqlite3_strnicmp(z2, "quoted-printable", 16)==0 ){ |
| 178 | pBody->encoding = EMAILENC_QUOTED; |
| 179 | }else{ |
| 180 | pBody->encoding = EMAILENC_NONE; |
| 181 | } |
| 182 | } |
| 183 | if( bAddHeader ) emailtoc_new_header_line(p, z+i); |
| 184 | i += n; |
| 185 | } |
| 186 | if( multipartBody ){ |
| 187 | p->nBody--; |
| 188 | emailtoc_add_multipart(p, z+i); |
| 189 | }else{ |
| 190 | pBody->zContent = z+i; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | ** The n-bytes of content in z are a multipart/ body component for |
| 196 | ** an email message. Decode this into its individual segments. |
| 197 | ** |
| 198 | ** The component should start and end with a boundary line. There |
| 199 | ** may be additional boundary lines in the middle. |
| 200 | */ |
| 201 | LOCAL void emailtoc_add_multipart( |
| 202 | EmailToc *p, /* Append the segments here */ |
| 203 | char *z /* The body component. zero-terminated */ |
| 204 | ){ |
| 205 | int nB; /* Size of the boundary string */ |
| 206 | int iStart; /* Start of the coding region past boundary mark */ |
| 207 | int i; /* Loop index */ |
| 208 | char *zBoundary = 0; /* Boundary marker */ |
| 209 | |
| 210 | /* Find the length of the boundary mark. */ |
| 211 | while( fossil_isspace(z[0]) ) z++; |
| 212 | zBoundary = z; |
| 213 | for(nB=0; z[nB] && !fossil_isspace(z[nB]); nB++){} |
| 214 | if( nB==0 ) return; |
| 215 | z += nB; |
| 216 | while( fossil_isspace(z[0]) ) z++; |
| 217 | zBoundary[nB] = 0; |
| 218 | for(i=iStart=0; z[i]; i++){ |
| 219 | if( z[i]=='\n' && strncmp(z+i+1, zBoundary, nB)==0 ){ |
| 220 | z[i+1] = 0; |
| 221 | emailtoc_add_multipart_segment(p, z+iStart, 0); |
| 222 | iStart = i+nB; |
| 223 | if( z[iStart]=='-' && z[iStart+1]=='-' ) return; |
| 224 | while( fossil_isspace(z[iStart]) ) iStart++; |
| 225 | i = iStart; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | |
| 231 | /* |
| 232 | ** Compute a table-of-contents (EmailToc) for the email message |
| 233 | ** provided on the input. |
| 234 | ** |
| 235 | ** This routine will cause pEmail to become zero-terminated if it is |
| 236 | ** not already. It will also insert zero characters into parts of |
| 237 | ** the message, to delimit the various components. |
| 238 | */ |
| 239 | EmailToc *emailtoc_from_email(Blob *pEmail){ |
| 240 | char *z; |
| 241 | EmailToc *p = emailtoc_alloc(); |
| 242 | blob_terminate(pEmail); |
| 243 | z = blob_buffer(pEmail); |
| 244 | emailtoc_add_multipart_segment(p, z, 1); |
| 245 | return p; |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | ** COMMAND: test-decode-email |
| @@ -219,25 +255,23 @@ | |
| 255 | */ |
| 256 | void test_email_decode_cmd(void){ |
| 257 | Blob email; |
| 258 | EmailToc *p; |
| 259 | int i; |
| 260 | verify_all_options(); |
| 261 | if( g.argc!=3 ) usage("FILE"); |
| 262 | blob_read_from_file(&email, g.argv[2], ExtFILE); |
| 263 | p = emailtoc_from_email(&email); |
| 264 | fossil_print("%d header line and %d content segments\n", |
| 265 | p->nHdr, p->nBody); |
| 266 | for(i=0; i<p->nHdr; i++){ |
| 267 | fossil_print("%3d: %s\n", i, p->azHdr[i]); |
| 268 | } |
| 269 | for(i=0; i<p->nBody; i++){ |
| 270 | fossil_print("\nBODY %d mime \"%s\" encoding %d:\n", |
| 271 | i, p->aBody[i].zMimetype, p->aBody[i].encoding); |
| 272 | fossil_print("%s\n", p->aBody[i].zContent); |
| 273 | } |
| 274 | emailtoc_free(p); |
| 275 | blob_reset(&email); |
| 276 | } |
| 277 | |
| 278 |