Fossil SCM

Work toward an email format decoder. This is an incremental check-in.

drh 2018-07-12 20:36 trunk
Commit 11c82d816969039bb371e0a44c03ffe876146e6086247686a2d592620d989c80
1 file changed +215
+215
--- src/webmail.c
+++ src/webmail.c
@@ -24,10 +24,225 @@
2424
*/
2525
#include "config.h"
2626
#include "webmail.h"
2727
#include <assert.h>
2828
29
+
30
+#if INTERFACE
31
+
32
+/* Recognized content encodings */
33
+#define EMAILENC_NONE 0 /* No encoding */
34
+#define EMAILENC_B64 1 /* Base64 encoded */
35
+#define EMAILENC_QUOTED 2 /* Quoted printable */
36
+
37
+/* An instance of the following object records the location of important
38
+** attributes on a single element in a multipart email message body.
39
+*/
40
+struct EmailBody {
41
+ char zMimetype[32]; /* Mimetype */
42
+ u8 encoding; /* Type of encoding */
43
+ char *zFilename; /* From content-disposition: */
44
+ Blob content; /* Encoded content for this segment */
45
+};
46
+
47
+/*
48
+** An instance of the following object describes the struture of
49
+** an rfc-2822 email message.
50
+*/
51
+struct EmailToc {
52
+ int nHdr; /* Number of header lines */
53
+ int nHdrAlloc; /* Number of header lines allocated */
54
+ int *aHdr; /* Two integers for each hdr line, offset and length */
55
+ int nBody; /* Number of body segments */
56
+ int nBodyAlloc; /* Number of body segments allocated */
57
+ EmailBody *aBody; /* Location of body information */
58
+};
59
+#endif
60
+
61
+/*
62
+** Free An EmailToc object
63
+*/
64
+void emailtoc_free(EmailToc *p){
65
+ int i;
66
+ fossil_free(p->aHdr);
67
+ for(i=0; i<p->nBody; i++){
68
+ fossil_free(p->aBody[i].zFilename);
69
+ blob_reset(&p->aBody[i].content);
70
+ }
71
+ fossil_free(p->aBody);
72
+ fossil_free(p);
73
+}
74
+
75
+/*
76
+** Allocate a new EmailToc object
77
+*/
78
+EmailToc *emailtoc_alloc(void){
79
+ EmailToc *p = fossil_malloc( sizeof(*p) );
80
+ memset(p, 0, sizeof(*p));
81
+ return p;
82
+}
83
+
84
+/*
85
+** Add a new body element to an EmailToc.
86
+*/
87
+EmailBody *emailtoc_new_body(EmailToc *p){
88
+ EmailBody *pNew;
89
+ p->nBody++;
90
+ if( p->nBody>p->nBodyAlloc ){
91
+ p->nBodyAlloc = (p->nBodyAlloc+1)*2;
92
+ p->aBody = fossil_realloc(p->aBody, sizeof(p->aBody[0])*p->nBodyAlloc);
93
+ }
94
+ pNew = &p->aBody[p->nBody-1];
95
+ memset(pNew, 0, sizeof(*pNew));
96
+ pNew->content = empty_blob;
97
+ return pNew;
98
+}
99
+
100
+/*
101
+** Add a new header line to the EmailToc.
102
+*/
103
+void emailtoc_new_header_line(EmailToc *p, int iOfst, int nAmt){
104
+ p->nHdr++;
105
+ if( p->nHdr>p->nHdrAlloc ){
106
+ p->nHdrAlloc = (p->nHdrAlloc+1)*2;
107
+ p->aHdr = fossil_realloc(p->aHdr, sizeof(int)*2*p->nHdrAlloc);
108
+ }
109
+ p->aHdr[p->nHdr*2-2] = iOfst;
110
+ p->aHdr[p->nHdr*2-1] = nAmt;
111
+}
112
+
113
+/*
114
+** Return the length of a line in an email header. Continuation lines
115
+** are included. Hence, this routine returns the number of bytes up to
116
+** and including the first \n character that is followed by something
117
+** other than whitespace.
118
+*/
119
+static int email_line_length(const char *z){
120
+ int i;
121
+ for(i=0; z[i] && (z[i]!='\n' || z[i+1]==' ' || z[i+1]=='\t'); i++){}
122
+ if( z[i]=='\n' ) i++;
123
+ return i;
124
+}
125
+
126
+/*
127
+** Return a pointer to the first non-whitespace character in z
128
+*/
129
+static const char *firstToken(const char *z, int n){
130
+ while( n>0 && fossil_isspace(*z) ){
131
+ n--;
132
+ z++;
133
+ }
134
+ return n>0 ? z : 0;
135
+}
136
+
137
+/*
138
+** The n-bytes of content in z are a multipart/ body component for
139
+** an email message. Decode this into its individual segments.
140
+**
141
+** The component should start and end with a boundary line. There
142
+** may be additional boundary lines in the middle.
143
+*/
144
+static void emailtoc_add_multipart(
145
+ EmailToc *p, /* Append the segments here */
146
+ Blob *pEmail, /* The original full email raw text */
147
+ const char *z, /* The body component */
148
+ int n /* Bytes of content in z[] */
149
+){
150
+ return;
151
+}
152
+
153
+
154
+/*
155
+** Compute a table-of-contents (EmailToc) for the email message
156
+** provided on the input.
157
+*/
158
+EmailToc *emailtoc_from_email(Blob *pEmail){
159
+ const char *z;
160
+ int i;
161
+ int n;
162
+ int multipartBody = 0;
163
+ EmailToc *p = emailtoc_alloc();
164
+ EmailBody *pBody = emailtoc_new_body(p);
165
+ blob_terminate(pEmail);
166
+ z = blob_buffer(pEmail);
167
+ i = 0;
168
+ while( z[i] ){
169
+ n = email_line_length(&z[i]);
170
+ if( (n==2 && z[i]=='\r' && z[i+1]=='\n') || z[i]=='\n' || n==0 ){
171
+ /* This is the blank line at the end of the header */
172
+ i += n;
173
+ break;
174
+ }
175
+ if( sqlite3_strnicmp(z+i, "Content-Type:", 13)==0 ){
176
+ const char *z2 = firstToken(z+i+13, n-13);
177
+ if( z2 && strncmp(z2, "multipart/", 10)==0 ){
178
+ multipartBody = 1;
179
+ }else{
180
+ int j;
181
+ for(j=0; z2[j]=='/' || fossil_isalnum(z2[j]); j++){}
182
+ if( j>=sizeof(pBody->zMimetype) ) j = sizeof(pBody->zMimetype);
183
+ memcpy(pBody->zMimetype, z2, j);
184
+ pBody->zMimetype[j] = 0;
185
+ }
186
+ }
187
+ /* 123456789 123456789 123456 */
188
+ if( sqlite3_strnicmp(z+i, "Content-Transfer-Encoding:", 26)==0 ){
189
+ const char *z2 = firstToken(z+(i+26), n-26);
190
+ if( z2 && sqlite3_strnicmp(z2, "base64", 6)==0 ){
191
+ pBody->encoding = EMAILENC_B64;
192
+ /* 123456789 123456 */
193
+ }else if( sqlite3_strnicmp(z2, "quoted-printable", 16)==0 ){
194
+ pBody->encoding = EMAILENC_QUOTED;
195
+ }else{
196
+ pBody->encoding = EMAILENC_NONE;
197
+ }
198
+ }
199
+ emailtoc_new_header_line(p, i, n);
200
+ i += n;
201
+ }
202
+ n = blob_size(pEmail) - i;
203
+ if( multipartBody ){
204
+ p->nBody--;
205
+ emailtoc_add_multipart(p, pEmail, z+i, n);
206
+ }else{
207
+ blob_init(&pBody->content, z+i, n);
208
+ }
209
+ return p;
210
+}
211
+
212
+/*
213
+** COMMAND: test-decode-email
214
+**
215
+** Usage: %fossil test-decode-email FILE
216
+**
217
+** Read an rfc-2822 formatted email out of FILE, then write a decoding
218
+** to stdout. Use for testing and validating the email decoder.
219
+*/
220
+void test_email_decode_cmd(void){
221
+ Blob email;
222
+ EmailToc *p;
223
+ int i;
224
+ const char *z;
225
+ verify_all_options();
226
+ if( g.argc!=3 ) usage("FILE");
227
+ blob_read_from_file(&email, g.argv[2], ExtFILE);
228
+ p = emailtoc_from_email(&email);
229
+ z = blob_buffer(&email);
230
+ fossil_print("%d header line and %d content segments\n",
231
+ p->nHdr, p->nBody);
232
+ for(i=0; i<p->nHdr; i++){
233
+ fossil_print("%3d: %.*s", i, p->aHdr[i*2+1], z+p->aHdr[i*2]);
234
+ }
235
+ for(i=0; i<p->nBody; i++){
236
+ fossil_print("\nBODY %d mime \"%s\" encoding %d:\n",
237
+ i, p->aBody[i].zMimetype, p->aBody[i].encoding);
238
+ fossil_print("%s\n", blob_str(&p->aBody[i].content));
239
+ }
240
+ emailtoc_free(p);
241
+ blob_reset(&email);
242
+}
243
+
29244
/*
30245
** WEBPAGE: webmail
31246
**
32247
** This page can be used to read content from the EMAILBOX table
33248
** that contains email received by the "fossil smtpd" command.
34249
--- src/webmail.c
+++ src/webmail.c
@@ -24,10 +24,225 @@
24 */
25 #include "config.h"
26 #include "webmail.h"
27 #include <assert.h>
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29 /*
30 ** WEBPAGE: webmail
31 **
32 ** This page can be used to read content from the EMAILBOX table
33 ** that contains email received by the "fossil smtpd" command.
34
--- src/webmail.c
+++ src/webmail.c
@@ -24,10 +24,225 @@
24 */
25 #include "config.h"
26 #include "webmail.h"
27 #include <assert.h>
28
29
30 #if INTERFACE
31
32 /* Recognized content encodings */
33 #define EMAILENC_NONE 0 /* No encoding */
34 #define EMAILENC_B64 1 /* Base64 encoded */
35 #define EMAILENC_QUOTED 2 /* Quoted printable */
36
37 /* An instance of the following object records the location of important
38 ** attributes on a single element in a multipart email message body.
39 */
40 struct EmailBody {
41 char zMimetype[32]; /* Mimetype */
42 u8 encoding; /* Type of encoding */
43 char *zFilename; /* From content-disposition: */
44 Blob content; /* Encoded content for this segment */
45 };
46
47 /*
48 ** An instance of the following object describes the struture of
49 ** an rfc-2822 email message.
50 */
51 struct EmailToc {
52 int nHdr; /* Number of header lines */
53 int nHdrAlloc; /* Number of header lines allocated */
54 int *aHdr; /* Two integers for each hdr line, offset and length */
55 int nBody; /* Number of body segments */
56 int nBodyAlloc; /* Number of body segments allocated */
57 EmailBody *aBody; /* Location of body information */
58 };
59 #endif
60
61 /*
62 ** Free An EmailToc object
63 */
64 void emailtoc_free(EmailToc *p){
65 int i;
66 fossil_free(p->aHdr);
67 for(i=0; i<p->nBody; i++){
68 fossil_free(p->aBody[i].zFilename);
69 blob_reset(&p->aBody[i].content);
70 }
71 fossil_free(p->aBody);
72 fossil_free(p);
73 }
74
75 /*
76 ** Allocate a new EmailToc object
77 */
78 EmailToc *emailtoc_alloc(void){
79 EmailToc *p = fossil_malloc( sizeof(*p) );
80 memset(p, 0, sizeof(*p));
81 return p;
82 }
83
84 /*
85 ** Add a new body element to an EmailToc.
86 */
87 EmailBody *emailtoc_new_body(EmailToc *p){
88 EmailBody *pNew;
89 p->nBody++;
90 if( p->nBody>p->nBodyAlloc ){
91 p->nBodyAlloc = (p->nBodyAlloc+1)*2;
92 p->aBody = fossil_realloc(p->aBody, sizeof(p->aBody[0])*p->nBodyAlloc);
93 }
94 pNew = &p->aBody[p->nBody-1];
95 memset(pNew, 0, sizeof(*pNew));
96 pNew->content = empty_blob;
97 return pNew;
98 }
99
100 /*
101 ** Add a new header line to the EmailToc.
102 */
103 void emailtoc_new_header_line(EmailToc *p, int iOfst, int nAmt){
104 p->nHdr++;
105 if( p->nHdr>p->nHdrAlloc ){
106 p->nHdrAlloc = (p->nHdrAlloc+1)*2;
107 p->aHdr = fossil_realloc(p->aHdr, sizeof(int)*2*p->nHdrAlloc);
108 }
109 p->aHdr[p->nHdr*2-2] = iOfst;
110 p->aHdr[p->nHdr*2-1] = nAmt;
111 }
112
113 /*
114 ** Return the length of a line in an email header. Continuation lines
115 ** are included. Hence, this routine returns the number of bytes up to
116 ** and including the first \n character that is followed by something
117 ** other than whitespace.
118 */
119 static int email_line_length(const char *z){
120 int i;
121 for(i=0; z[i] && (z[i]!='\n' || z[i+1]==' ' || z[i+1]=='\t'); i++){}
122 if( z[i]=='\n' ) i++;
123 return i;
124 }
125
126 /*
127 ** Return a pointer to the first non-whitespace character in z
128 */
129 static const char *firstToken(const char *z, int n){
130 while( n>0 && fossil_isspace(*z) ){
131 n--;
132 z++;
133 }
134 return n>0 ? z : 0;
135 }
136
137 /*
138 ** The n-bytes of content in z are a multipart/ body component for
139 ** an email message. Decode this into its individual segments.
140 **
141 ** The component should start and end with a boundary line. There
142 ** may be additional boundary lines in the middle.
143 */
144 static void emailtoc_add_multipart(
145 EmailToc *p, /* Append the segments here */
146 Blob *pEmail, /* The original full email raw text */
147 const char *z, /* The body component */
148 int n /* Bytes of content in z[] */
149 ){
150 return;
151 }
152
153
154 /*
155 ** Compute a table-of-contents (EmailToc) for the email message
156 ** provided on the input.
157 */
158 EmailToc *emailtoc_from_email(Blob *pEmail){
159 const char *z;
160 int i;
161 int n;
162 int multipartBody = 0;
163 EmailToc *p = emailtoc_alloc();
164 EmailBody *pBody = emailtoc_new_body(p);
165 blob_terminate(pEmail);
166 z = blob_buffer(pEmail);
167 i = 0;
168 while( z[i] ){
169 n = email_line_length(&z[i]);
170 if( (n==2 && z[i]=='\r' && z[i+1]=='\n') || z[i]=='\n' || n==0 ){
171 /* This is the blank line at the end of the header */
172 i += n;
173 break;
174 }
175 if( sqlite3_strnicmp(z+i, "Content-Type:", 13)==0 ){
176 const char *z2 = firstToken(z+i+13, n-13);
177 if( z2 && strncmp(z2, "multipart/", 10)==0 ){
178 multipartBody = 1;
179 }else{
180 int j;
181 for(j=0; z2[j]=='/' || fossil_isalnum(z2[j]); j++){}
182 if( j>=sizeof(pBody->zMimetype) ) j = sizeof(pBody->zMimetype);
183 memcpy(pBody->zMimetype, z2, j);
184 pBody->zMimetype[j] = 0;
185 }
186 }
187 /* 123456789 123456789 123456 */
188 if( sqlite3_strnicmp(z+i, "Content-Transfer-Encoding:", 26)==0 ){
189 const char *z2 = firstToken(z+(i+26), n-26);
190 if( z2 && sqlite3_strnicmp(z2, "base64", 6)==0 ){
191 pBody->encoding = EMAILENC_B64;
192 /* 123456789 123456 */
193 }else if( sqlite3_strnicmp(z2, "quoted-printable", 16)==0 ){
194 pBody->encoding = EMAILENC_QUOTED;
195 }else{
196 pBody->encoding = EMAILENC_NONE;
197 }
198 }
199 emailtoc_new_header_line(p, i, n);
200 i += n;
201 }
202 n = blob_size(pEmail) - i;
203 if( multipartBody ){
204 p->nBody--;
205 emailtoc_add_multipart(p, pEmail, z+i, n);
206 }else{
207 blob_init(&pBody->content, z+i, n);
208 }
209 return p;
210 }
211
212 /*
213 ** COMMAND: test-decode-email
214 **
215 ** Usage: %fossil test-decode-email FILE
216 **
217 ** Read an rfc-2822 formatted email out of FILE, then write a decoding
218 ** to stdout. Use for testing and validating the email decoder.
219 */
220 void test_email_decode_cmd(void){
221 Blob email;
222 EmailToc *p;
223 int i;
224 const char *z;
225 verify_all_options();
226 if( g.argc!=3 ) usage("FILE");
227 blob_read_from_file(&email, g.argv[2], ExtFILE);
228 p = emailtoc_from_email(&email);
229 z = blob_buffer(&email);
230 fossil_print("%d header line and %d content segments\n",
231 p->nHdr, p->nBody);
232 for(i=0; i<p->nHdr; i++){
233 fossil_print("%3d: %.*s", i, p->aHdr[i*2+1], z+p->aHdr[i*2]);
234 }
235 for(i=0; i<p->nBody; i++){
236 fossil_print("\nBODY %d mime \"%s\" encoding %d:\n",
237 i, p->aBody[i].zMimetype, p->aBody[i].encoding);
238 fossil_print("%s\n", blob_str(&p->aBody[i].content));
239 }
240 emailtoc_free(p);
241 blob_reset(&email);
242 }
243
244 /*
245 ** WEBPAGE: webmail
246 **
247 ** This page can be used to read content from the EMAILBOX table
248 ** that contains email received by the "fossil smtpd" command.
249

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button