From 723f0b5c5f6554f7bb83123918a0d4328fcf266d Mon Sep 17 00:00:00 2001 From: Janos SUTO Date: Thu, 11 Jan 2018 09:59:58 +0100 Subject: [PATCH] additional parser fix Signed-off-by: Janos SUTO --- src/parser.c | 68 ++++++++++++++++++--------------------- unit_tests/check_parser.c | 21 ++++-------- unit_tests/test.conf | 1 + 3 files changed, 40 insertions(+), 50 deletions(-) diff --git a/src/parser.c b/src/parser.c index c028e9e7..39af6145 100644 --- a/src/parser.c +++ b/src/parser.c @@ -109,6 +109,7 @@ void post_parse(struct session_data *sdata, struct parser_state *state, struct c clearhash(state->rcpt_domain); clearhash(state->journal_recipient); + fixupEncodedHeaderLine(state->b_subject, MAXBUFSIZE); trimBuffer(state->b_subject); if(sdata->internal_sender == 0) sdata->direction = DIRECTION_INCOMING; @@ -359,8 +360,10 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata sdata->spam_message = 1; } - if(strncasecmp(buf, "From:", strlen("From:")) == 0) state->message_state = MSG_FROM; - + if(strncasecmp(buf, "From:", strlen("From:")) == 0){ + state->message_state = MSG_FROM; + buf += strlen("From:"); + } else if(strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0){ state->message_state = MSG_CONTENT_TYPE; @@ -382,14 +385,28 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata } } - else if(strncasecmp(buf, "To:", 3) == 0) state->message_state = MSG_TO; - else if(strncasecmp(buf, "Cc:", 3) == 0) state->message_state = MSG_CC; - else if(strncasecmp(buf, "Bcc:", 4) == 0) state->message_state = MSG_CC; + else if(strncasecmp(buf, "To:", 3) == 0){ + state->message_state = MSG_TO; + buf += strlen("To:"); + } + else if(strncasecmp(buf, "Cc:", 3) == 0){ + state->message_state = MSG_CC; + buf += strlen("Cc:"); + } + else if(strncasecmp(buf, "Bcc:", 4) == 0){ + state->message_state = MSG_CC; + buf += strlen("Bcc:"); + } else if(strncasecmp(buf, "Message-Id:", 11) == 0) state->message_state = MSG_MESSAGE_ID; else if(strncasecmp(buf, "References:", 11) == 0) state->message_state = MSG_REFERENCES; - else if(strncasecmp(buf, "Subject:", strlen("Subject:")) == 0) state->message_state = MSG_SUBJECT; - else if(strncasecmp(buf, "Recipient:", strlen("Recipient:")) == 0) state->message_state = MSG_RECIPIENT; - + else if(strncasecmp(buf, "Subject:", strlen("Subject:")) == 0){ + state->message_state = MSG_SUBJECT; + buf += strlen("Subject:"); + } + else if(strncasecmp(buf, "Recipient:", strlen("Recipient:")) == 0){ + state->message_state = MSG_RECIPIENT; + buf += strlen("Recipient:"); + } if(sdata->ms_journal == 1 && (state->message_state == MSG_TO || state->message_state == MSG_RECIPIENT) ){ p = strstr(buf, "Expanded:"); if(p) *p = '\0'; @@ -413,7 +430,10 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata else if(strncasecmp(buf, "Delivery-date:", strlen("Delivery-date:")) == 0 && sdata->delivered == 0) sdata->delivered = parse_date_header(buf); else if(strncasecmp(buf, "Received:", strlen("Received:")) == 0) state->message_state = MSG_RECEIVED; - else if(cfg->extra_to_field[0] != '\0' && strncasecmp(buf, cfg->extra_to_field, strlen(cfg->extra_to_field)) == 0) state->message_state = MSG_TO; + else if(cfg->extra_to_field[0] != '\0' && strncasecmp(buf, cfg->extra_to_field, strlen(cfg->extra_to_field)) == 0){ + state->message_state = MSG_TO; + buf += strlen(cfg->extra_to_field); + } if(state->message_state == MSG_MESSAGE_ID && state->message_id[0] == 0){ p = strchr(buf+11, ' '); @@ -475,31 +495,8 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata if(state->is_1st_header == 1){ if(state->message_state == MSG_SUBJECT && strlen(state->b_subject) + strlen(buf) < MAXBUFSIZE-1){ - - if(state->b_subject[0] == '\0'){ - p = &buf[0]; - if(strncmp(buf, "Subject:", strlen("Subject:")) == 0) p += strlen("Subject:"); - if(*p == ' ') p++; - - fixupEncodedHeaderLine(p, MAXBUFSIZE); - strncat(state->b_subject, p, MAXBUFSIZE-strlen(state->b_subject)-1); - } - else { - - /* - * if the next subject line is encoded, then strip the whitespace characters at the beginning of the line - */ - - p = buf; - - if(strcasestr(buf, "?Q?") || strcasestr(buf, "?B?")){ - while(isspace(*p)) p++; - } - - fixupEncodedHeaderLine(p, MAXBUFSIZE); - - strncat(state->b_subject, p, MAXBUFSIZE-strlen(state->b_subject)-1); - } + // buffer the subject lines, and decode it later + strncat(state->b_subject, buf, MAXBUFSIZE-strlen(state->b_subject)-1); } else { fixupEncodedHeaderLine(buf, MAXBUFSIZE); } } @@ -692,8 +689,7 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata reassembleToken(buf); - if(state->is_header == 1) p = strchr(buf, ' '); - else p = buf; + p = buf; //printf("a: %d/%d/%d/%d/j=%d %s\n", state->is_1st_header, state->is_header, state->message_rfc822, state->message_state, sdata->ms_journal, buf); diff --git a/unit_tests/check_parser.c b/unit_tests/check_parser.c index f4606cae..e0833b41 100644 --- a/unit_tests/check_parser.c +++ b/unit_tests/check_parser.c @@ -2,14 +2,7 @@ * check_parser.c, SJ */ -#include -#include -#include -#include -#include -#include -#include -#include "../src/piler.h" +#include "test.h" struct parser_test { @@ -42,17 +35,19 @@ static void test_parser(struct config *cfg){ {"15-image-only-spam.eml", "", "kriegel paff sketches@pnmarketing.com sketches pnmarketing com ", "pnmarketing.com", "holmon knobel aaaaa@acts.hu aaaaa acts hu ", "acts.hu ", "", "Lack of concentration, backed up by a vocabulary of tremendous scope, a", 1}, {"16-rfc822-attachment-1.eml", "", "martonagnes martonagnes@lajt.hu martonagnes lajt hu erős istván eistvan@marosheviz.info ", "lajt.hu", "martonagnes@lajt.hu martonagnes lajt hu ", "lajt.hu ", "", "Féláras akció! 31000Ft/2fő/3nap húsvétkor is a Park Inn****-ben!", 2 }, {"17-attached-text-bogus-mime.eml", "", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "usuarios-no-listados ", "", "", "Please read my attached letter", 1}, - {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁS Vargay Péter", 0}, + {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁSVargay Péter", 0}, {"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, {"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, - {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, - {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com Aaagágyi és kia ttt webstat hiba", 0}, + {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, + {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0}, {"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0}, {"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", " www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0}, {"33-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "[JIRA] Commented: (AAAA-151) A aaa-nek kerek egy XXX-et, ZH74617282, ACC27363484944", 0}, }; + TEST_HEADER(); + if(open_database(&sdata, cfg) == ERR){ printf("cannot open database\n"); return; @@ -78,8 +73,6 @@ static void test_parser(struct config *cfg){ state = parse_message(&sdata, 1, &data, cfg); post_parse(&sdata, &state, cfg); - //printf("%s, %s/%s %d / %d\n", tests[i].s, tests[i].message_id, state.message_id, tests[i].n_attachments, state.n_attachments); - for(j=1; j<=state.n_attachments; j++){ unlink(state.attachments[j].internalname); } @@ -97,7 +90,7 @@ static void test_parser(struct config *cfg){ close_database(&sdata); - printf("test_parser() OK\n"); + TEST_FOOTER(); } diff --git a/unit_tests/test.conf b/unit_tests/test.conf index 995d28ca..652991c6 100644 --- a/unit_tests/test.conf +++ b/unit_tests/test.conf @@ -30,6 +30,7 @@ pemfile=/usr/local/etc/piler.pem pidfile=/var/run/piler/piler.pid piler_header_field=X-piler-id: process_rcpt_to_addresses=0 +queuedir=./store server_id=0 spam_header_line= syslog_recipients=0