improved message parsing

This commit is contained in:
SJ 2012-08-21 21:57:39 +02:00
parent d763892f3e
commit 7e8c7f12b6
9 changed files with 162 additions and 33 deletions

View File

@ -13,7 +13,7 @@
#define VERSION "0.1.20"
#define BUILD 684
#define BUILD 687
#define HOSTID "mailarchiver"

View File

@ -130,6 +130,8 @@ struct _state {
int content_type_is_set;
int pushed_pointer;
int saved_size;
int writebufpos;
int abufpos;
char attachedfile[RND_STR_LEN+SMALLBUFSIZE];
char message_id[SMALLBUFSIZE];
char miscbuf[MAX_TOKEN_LEN];
@ -152,6 +154,9 @@ struct _state {
char reference[SMALLBUFSIZE];
char b_from[SMALLBUFSIZE], b_from_domain[SMALLBUFSIZE], b_to[MAXBUFSIZE], b_to_domain[SMALLBUFSIZE], b_subject[MAXBUFSIZE], b_body[BIGBUFSIZE];
int bodylen;
int tolen;
};
@ -220,6 +225,8 @@ struct memcached_server {
struct __data {
char *folder;
#ifdef HAVE_TRE
struct rule *archiving_rules;
struct rule *retention_rules;

View File

@ -97,7 +97,7 @@ ENDE:
switch(rc) {
case OK:
printf("imported: %s\n", filename);
//printf("imported: %s\n", filename);
bzero(&counters, sizeof(counters));
counters.c_size += sdata->tot_len;
@ -106,7 +106,7 @@ ENDE:
break;
case ERR_EXISTS:
printf("discarding duplicate message: %s\n", filename);
//printf("discarding duplicate message: %s\n", filename);
rc = OK;
break;

View File

@ -20,6 +20,7 @@ struct _state parse_message(struct session_data *sdata, int take_into_pieces, st
FILE *f;
int i, len;
char *p, buf[MAXBUFSIZE], puf[SMALLBUFSIZE];
char writebuffer[MAXBUFSIZE], abuffer[MAXBUFSIZE];
struct _state state;
init_state(&state);
@ -77,7 +78,13 @@ struct _state parse_message(struct session_data *sdata, int take_into_pieces, st
}
while(fgets(buf, sizeof(buf)-1, f)){
parse_line(buf, &state, sdata, take_into_pieces, cfg);
parse_line(buf, &state, sdata, take_into_pieces, &writebuffer[0], sizeof(writebuffer), &abuffer[0], sizeof(abuffer), cfg);
}
if(take_into_pieces == 1 && state.writebufpos > 0){
len = write(state.mfd, writebuffer, state.writebufpos);
memset(writebuffer, 0, sizeof(writebuffer));
state.writebufpos = 0;
}
if(take_into_pieces == 1){
@ -138,9 +145,9 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi
}
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, struct __config *cfg){
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __config *cfg){
char *p, *q, puf[SMALLBUFSIZE];
int x, n, len, b64_len, boundary_line=0;
int x, n, len, writelen, b64_len, boundary_line=0;
if(cfg->debug == 1) printf("line: %s", buf);
@ -167,14 +174,21 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
if(take_into_pieces == 1){
if(state->message_state == MSG_BODY && state->fd != -1 && is_item_on_string(state->boundaries, buf) == 0){
//printf("dumping: %s", buf);
n = write(state->fd, buf, len);
//n = write(state->fd, buf, len); // WRITE
if(len + state->abufpos > abuffersize-1){
n = write(state->fd, abuffer, state->abufpos); state->abufpos = 0; memset(abuffer, 0, abuffersize);
}
memcpy(abuffer+state->abufpos, buf, len); state->abufpos += len;
state->attachments[state->n_attachments].size += len;
}
else {
state->saved_size += len;
//printf("%s", buf);
n = write(state->mfd, buf, len);
//n = write(state->mfd, buf, len); // WRITE
if(len + state->writebufpos > writebuffersize-1){
n = write(state->mfd, writebuffer, state->writebufpos); state->writebufpos = 0; memset(writebuffer, 0, writebuffersize);
}
memcpy(writebuffer+state->writebufpos, buf, len); state->writebufpos += len;
}
}
@ -213,8 +227,12 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
}
else {
snprintf(puf, sizeof(puf)-1, "ATTACHMENT_POINTER_%s.a%d_XXX_PILER", sdata->ttmpfile, state->n_attachments);
n = write(state->mfd, puf, strlen(puf));
//printf("%s", puf);
//n = write(state->mfd, puf, strlen(puf)); // WRITE
writelen = strlen(puf);
if(writelen + state->writebufpos > writebuffersize-1){
n = write(state->mfd, writebuffer, state->writebufpos); state->writebufpos = 0; memset(writebuffer, 0, writebuffersize);
}
memcpy(writebuffer+state->writebufpos, puf, writelen); state->writebufpos += writelen;
}
}
@ -377,7 +395,12 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
state->content_type_is_set = 0;
if(state->has_to_dump == 1){
if(take_into_pieces == 1 && state->fd != -1) close(state->fd);
if(take_into_pieces == 1 && state->fd != -1){
if(state->abufpos > 0){
n = write(state->fd, abuffer, state->abufpos); state->abufpos = 0; memset(abuffer, 0, abuffersize);
}
close(state->fd);
}
state->fd = -1;
}
@ -483,11 +506,12 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
}
}
}
else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strlen(state->b_to) < MAXBUFSIZE-len-1){
else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && state->tolen < MAXBUFSIZE-len-1){
if(is_string_on_list(state->rcpt, puf) == 0){
append_list(&(state->rcpt), puf);
memcpy(&(state->b_to[strlen(state->b_to)]), puf, len);
memcpy(&(state->b_to[state->tolen]), puf, len);
state->tolen += len;
if(does_it_seem_like_an_email_address(puf) == 1){
if(is_email_address_on_my_domains(puf, cfg) == 1) sdata->internal_recipient = 1;
@ -501,16 +525,19 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
}
}
if(strlen(state->b_to) < MAXBUFSIZE-len-1){
if(state->tolen < MAXBUFSIZE-len-1){
split_email_address(puf);
memcpy(&(state->b_to[strlen(state->b_to)]), puf, len);
memcpy(&(state->b_to[state->tolen]), puf, len);
state->tolen += len;
}
}
}
}
else if(state->message_state == MSG_BODY && strlen(state->b_body) < BIGBUFSIZE-len-1)
memcpy(&(state->b_body[strlen(state->b_body)]), puf, len);
else if(state->message_state == MSG_BODY && state->bodylen < BIGBUFSIZE-len-1){
memcpy(&(state->b_body[state->bodylen]), puf, len);
state->bodylen += len;
}
} while(p);

View File

@ -11,7 +11,8 @@
struct _state parse_message(struct session_data *sdata, int take_into_pieces, struct __config *cfg);
void post_parse(struct session_data *sdata, struct _state *state, struct __config *cfg);
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, struct __config *cfg);
//int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, struct __config *cfg);
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __config *cfg);
void init_state(struct _state *state);
unsigned long parse_date_header(char *s);

View File

@ -62,6 +62,9 @@ void init_state(struct _state *state){
state->pushed_pointer = 0;
state->saved_size = 0;
state->writebufpos = 0;
state->abufpos = 0;
state->boundaries = NULL;
state->rcpt = NULL;
state->rcpt_domain = NULL;
@ -84,6 +87,9 @@ void init_state(struct _state *state){
memset(state->b_to_domain, 0, SMALLBUFSIZE);
memset(state->b_subject, 0, MAXBUFSIZE);
memset(state->b_body, 0, BIGBUFSIZE);
state->tolen = 0;
state->bodylen = 0;
}

View File

@ -288,6 +288,7 @@ void initialise_configuration(){
free_rule(data.archiving_rules);
free_rule(data.retention_rules);
data.folder = NULL;
data.archiving_rules = NULL;
data.retention_rules = NULL;
@ -341,6 +342,7 @@ int main(int argc, char **argv){
(void) openlog(PROGNAME, LOG_PID, LOG_MAIL);
data.folder = NULL;
data.archiving_rules = NULL;
data.retention_rules = NULL;

View File

@ -16,6 +16,7 @@
#include <unistd.h>
#include <time.h>
#include <locale.h>
#include <getopt.h>
#include <syslog.h>
#include <piler.h>
@ -80,7 +81,54 @@ int import_from_mailbox(char *mailbox, struct session_data *sdata, struct __data
fclose(F);
if(quiet == 0) printf("\n");
return ret;
}
int import_mbox_from_dir(char *directory, struct session_data *sdata, struct __data *data, int *tot_msgs, struct __config *cfg){
DIR *dir;
struct dirent *de;
int rc=ERR, ret=OK;
char fname[SMALLBUFSIZE];
struct stat st;
dir = opendir(directory);
if(!dir){
printf("cannot open directory: %s\n", directory);
return ERR;
}
while((de = readdir(dir))){
if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue;
snprintf(fname, sizeof(fname)-1, "%s/%s", directory, de->d_name);
if(stat(fname, &st) == 0){
if(S_ISDIR(st.st_mode)){
rc = import_mbox_from_dir(fname, sdata, data, tot_msgs, cfg);
if(rc == ERR) ret = ERR;
}
else {
if(S_ISREG(st.st_mode)){
rc = import_from_mailbox(fname, sdata, data, cfg);
if(rc == OK) (*tot_msgs)++;
else ret = ERR;
}
else {
printf("%s is not a file\n", fname);
}
}
}
else {
printf("cannot stat() %s\n", fname);
}
}
closedir(dir);
return ret;
}
@ -132,8 +180,6 @@ int import_from_maildir(char *directory, struct session_data *sdata, struct __da
}
closedir(dir);
if(quiet == 0) printf("\n");
return ret;
}
@ -194,8 +240,6 @@ int import_from_imap_server(char *imapserver, char *username, char *password, st
close(sd);
if(quiet == 0) printf("\n");
return ret;
}
@ -207,8 +251,8 @@ void usage(){
int main(int argc, char **argv){
int i, rc=0, n_mbox=0, tot_msgs=0;
char *configfile=CONFIG_FILE, *emlfile=NULL, *mbox[MBOX_ARGS], *directory=NULL;
int i, c, rc=0, n_mbox=0, tot_msgs=0;
char *configfile=CONFIG_FILE, *emlfile=NULL, *mboxdir=NULL, *mbox[MBOX_ARGS], *directory=NULL;
char *imapserver=NULL, *username=NULL, *password=NULL, *skiplist=SKIPLIST;
struct session_data sdata;
struct __config cfg;
@ -216,8 +260,41 @@ int main(int argc, char **argv){
for(i=0; i<MBOX_ARGS; i++) mbox[i] = NULL;
while((i = getopt(argc, argv, "c:m:e:d:i:u:p:x:h?")) > 0){
switch(i){
data.folder = NULL;
data.archiving_rules = NULL;
data.retention_rules = NULL;
while(1){
#ifdef _GNU_SOURCE
static struct option long_options[] =
{
{"config", required_argument, 0, 'c' },
{"eml", required_argument, 0, 'e' },
{"dir", required_argument, 0, 'd' },
{"mbox", required_argument, 0, 'm' },
{"mboxdir", required_argument, 0, 'M' },
{"imapserver", required_argument, 0, 'i' },
{"username", required_argument, 0, 'u' },
{"password", required_argument, 0, 'p' },
{"skiplist", required_argument, 0, 'x' },
{"folder", required_argument, 0, 'f' },
{"help", no_argument, 0, 'h' },
{0,0,0,0}
};
int option_index = 0;
c = getopt_long(argc, argv, "c:m:M:e:d:i:u:p:x:f:h?", long_options, &option_index);
#else
c = getopt(argc, argv, "c:m:M:e:d:i:u:p:x:f:h?");
#endif
if(c == -1) break;
switch(c){
case 'c' :
configfile = optarg;
@ -240,6 +317,10 @@ int main(int argc, char **argv){
break;
case 'M' :
mboxdir = optarg;
break;
case 'i' :
imapserver = optarg;
break;
@ -256,6 +337,10 @@ int main(int argc, char **argv){
skiplist = optarg;
break;
case 'f' :
data.folder = optarg;
break;
case 'h' :
case '?' :
usage();
@ -269,7 +354,7 @@ int main(int argc, char **argv){
if(!mbox[0] && !emlfile && !directory && !imapserver) usage();
if(!mbox[0] && !mboxdir && !emlfile && !directory && !imapserver) usage();
cfg = read_config(configfile);
@ -291,9 +376,6 @@ int main(int argc, char **argv){
setlocale(LC_CTYPE, cfg.locale);
data.archiving_rules = NULL;
data.retention_rules = NULL;
(void) openlog("pilerimport", LOG_PID, LOG_MAIL);
load_rules(&sdata, &(data.archiving_rules), SQL_ARCHIVING_RULE_TABLE);
@ -305,6 +387,7 @@ int main(int argc, char **argv){
rc = import_from_mailbox(mbox[i], &sdata, &data, &cfg);
}
}
if(mboxdir) rc = import_mbox_from_dir(mboxdir, &sdata, &data, &tot_msgs, &cfg);
if(directory) rc = import_from_maildir(directory, &sdata, &data, &tot_msgs, &cfg);
if(imapserver && username && password) rc = import_from_imap_server(imapserver, username, password, &sdata, &data, skiplist, &cfg);
@ -315,6 +398,8 @@ int main(int argc, char **argv){
mysql_close(&(sdata.mysql));
if(quiet == 0) printf("\n");
return rc;
}

View File

@ -53,6 +53,7 @@ int main(int argc, char **argv){
printf("build: %d\n", get_build());
data.folder = NULL;
data.archiving_rules = NULL;
data.retention_rules = NULL;