From 93696c29854cc04bbd4bb8877a70aed2f5f883d4 Mon Sep 17 00:00:00 2001 From: SJ Date: Sun, 24 Mar 2013 01:20:12 +0100 Subject: [PATCH] parser fixes --- src/config.h | 2 +- src/extract.c | 11 ++++++----- src/parser.c | 24 +++++++++++++----------- src/test.c | 3 +++ 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/config.h b/src/config.h index 717709a8..9f0c3f78 100644 --- a/src/config.h +++ b/src/config.h @@ -13,7 +13,7 @@ #define VERSION "0.1.23-master-branch" -#define BUILD 777 +#define BUILD 782 #define HOSTID "mailarchiver" diff --git a/src/extract.c b/src/extract.c index 12f37f0c..0b630c73 100644 --- a/src/extract.c +++ b/src/extract.c @@ -45,6 +45,7 @@ void remove_xml(char *buf, int *html){ #ifdef HAVE_ZIP int extract_opendocument(struct session_data *sdata, struct _state *state, char *filename, char *prefix){ int errorp, i=0, len=0, html=0; + int len2; char buf[MAXBUFSIZE]; struct zip *z; struct zip_stat sb; @@ -60,14 +61,14 @@ int extract_opendocument(struct session_data *sdata, struct _state *state, char zf = zip_fopen_index(z, i, 0); if(zf){ - while((len = zip_fread(zf, buf, sizeof(buf))) > 0){ + while((len = zip_fread(zf, buf, sizeof(buf)-2)) > 0){ remove_xml(buf, &html); - len = strlen(buf); + len2 = strlen(buf); - if(state->bodylen < BIGBUFSIZE-len-1){ - memcpy(&(state->b_body[state->bodylen]), buf, len); - state->bodylen += len; + if(len2 > 0 && state->bodylen < BIGBUFSIZE-len2-1){ + memcpy(&(state->b_body[state->bodylen]), buf, len2); + state->bodylen += len2; } memset(buf, 0, sizeof(buf)); diff --git a/src/parser.c b/src/parser.c index 27056580..431c65d9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -320,19 +320,17 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int if(state->is_header == 0 && buf[0] != ' ' && buf[0] != '\t') state->message_state = MSG_BODY; - if((state->content_type_is_set == 0 || state->is_header == 1) && strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0) state->message_state = MSG_CONTENT_TYPE; - else if(strncasecmp(buf, "Content-Transfer-Encoding:", strlen("Content-Transfer-Encoding:")) == 0) state->message_state = MSG_CONTENT_TRANSFER_ENCODING; - else if(strncasecmp(buf, "Content-Disposition:", strlen("Content-Disposition:")) == 0) state->message_state = MSG_CONTENT_DISPOSITION; - - - if(state->message_state == MSG_CONTENT_TYPE || state->message_state == MSG_CONTENT_TRANSFER_ENCODING) state->is_header = 1; - /* header checks */ if(state->is_header == 1){ if(strncasecmp(buf, "From:", strlen("From:")) == 0) state->message_state = MSG_FROM; + + else if(strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0) state->message_state = MSG_CONTENT_TYPE; + else if(strncasecmp(buf, "Content-Transfer-Encoding:", strlen("Content-Transfer-Encoding:")) == 0) state->message_state = MSG_CONTENT_TRANSFER_ENCODING; + else if(strncasecmp(buf, "Content-Disposition:", strlen("Content-Disposition:")) == 0) state->message_state = MSG_CONTENT_DISPOSITION; + else if(strncasecmp(buf, "To:", 3) == 0) state->message_state = MSG_TO; else if(strncasecmp(buf, "Cc:", 3) == 0) state->message_state = MSG_CC; else if(strncasecmp(buf, "Bcc:", 4) == 0) state->message_state = MSG_CC; @@ -358,8 +356,10 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int } - if((p = strcasestr(buf, "boundary"))){ - extract_boundary(p, state); + if(state->message_state == MSG_CONTENT_TYPE){ + if((p = strcasestr(buf, "boundary"))){ + extract_boundary(p, state); + } } @@ -426,7 +426,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int p++; if(*p == ' ' || *p == '\t') p++; snprintf(state->type, TINYBUFSIZE-1, "%s", p); - state->content_type_is_set = 1; + //state->content_type_is_set = 1; p = strchr(state->type, ';'); if(p) *p = '\0'; } @@ -476,7 +476,9 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int boundary_line = is_item_on_string(state->boundaries, buf); if(!strstr(buf, "boundary=") && !strstr(buf, "boundary =") && boundary_line == 1){ - state->content_type_is_set = 0; + state->is_header = 1; + + //state->content_type_is_set = 0; if(state->has_to_dump == 1){ if(take_into_pieces == 1 && state->fd != -1){ diff --git a/src/test.c b/src/test.c index 08a60226..5cb99968 100644 --- a/src/test.c +++ b/src/test.c @@ -72,7 +72,10 @@ int main(int argc, char **argv){ snprintf(sdata.filename, SMALLBUFSIZE-1, "%s", argv[1]); snprintf(sdata.tmpframe, SMALLBUFSIZE-1, "%s.m", argv[1]); + printf("parsing...\n"); state = parse_message(&sdata, 1, &data, &cfg); + + printf("post parsing...\n"); post_parse(&sdata, &state, &cfg); printf("message-id: %s\n", state.message_id);