From d9d5f7db66a58d52362b37186269cbbd2626e8a9 Mon Sep 17 00:00:00 2001 From: Janos SUTO Date: Sat, 12 Dec 2020 21:43:16 +0100 Subject: [PATCH] Handle the Sender: header line Signed-off-by: Janos SUTO --- src/defs.h | 23 ++++++++++++----------- src/import.c | 2 +- src/message.c | 14 +++++++------- src/parser.c | 23 +++++++++++++++++++++-- src/parser.h | 2 +- src/parser_utils.c | 2 ++ src/piler.c | 2 +- src/reindex.c | 2 +- src/test.c | 5 +++-- src/tokenizer.c | 12 ++++++++++++ unit_tests/check_attachments.c | 2 +- unit_tests/check_rules.c | 6 +++--- unit_tests/common.c | 4 ++-- 13 files changed, 67 insertions(+), 32 deletions(-) diff --git a/src/defs.h b/src/defs.h index 7361b453..d6c653b4 100644 --- a/src/defs.h +++ b/src/defs.h @@ -27,16 +27,17 @@ #define MSG_BODY 0 #define MSG_RECEIVED 1 #define MSG_FROM 2 -#define MSG_TO 3 -#define MSG_CC 4 -#define MSG_SUBJECT 5 -#define MSG_CONTENT_TYPE 6 -#define MSG_CONTENT_TRANSFER_ENCODING 7 -#define MSG_CONTENT_DISPOSITION 8 -#define MSG_MESSAGE_ID 9 -#define MSG_REFERENCES 10 -#define MSG_RECIPIENT 11 -#define MSG_ENVELOPE_TO 12 +#define MSG_SENDER 3 +#define MSG_TO 4 +#define MSG_CC 5 +#define MSG_SUBJECT 6 +#define MSG_CONTENT_TYPE 7 +#define MSG_CONTENT_TRANSFER_ENCODING 8 +#define MSG_CONTENT_DISPOSITION 9 +#define MSG_MESSAGE_ID 10 +#define MSG_REFERENCES 11 +#define MSG_RECIPIENT 12 +#define MSG_ENVELOPE_TO 13 #define MAXHASH 277 @@ -203,7 +204,7 @@ struct parser_state { char reference[SMALLBUFSIZE]; - char b_from[SMALLBUFSIZE], b_from_domain[SMALLBUFSIZE], b_to[MAXBUFSIZE], b_to_domain[SMALLBUFSIZE], b_subject[MAXBUFSIZE], b_body[BIGBUFSIZE]; + char b_from[SMALLBUFSIZE], b_from_domain[SMALLBUFSIZE], b_sender[SMALLBUFSIZE], b_sender_domain[SMALLBUFSIZE], b_to[MAXBUFSIZE], b_to_domain[SMALLBUFSIZE], b_subject[MAXBUFSIZE], b_body[BIGBUFSIZE]; char b_journal_to[MAXBUFSIZE]; unsigned int bodylen; diff --git a/src/import.c b/src/import.c index 601c9b84..c3decf9f 100644 --- a/src/import.c +++ b/src/import.c @@ -71,7 +71,7 @@ int import_message(struct session_data *sdata, struct data *data, struct config sdata->import = 1; state = parse_message(sdata, 1, data, cfg); - post_parse(sdata, &state, cfg); + post_parse(sdata, data, &state, cfg); rule = check_against_ruleset(data->archiving_rules, &state, sdata->tot_len, sdata->spam_message); if(rule){ diff --git a/src/message.c b/src/message.c index 7700fcdb..5c755839 100644 --- a/src/message.c +++ b/src/message.c @@ -34,18 +34,18 @@ int store_index_data(struct session_data *sdata, struct parser_state *state, str if(prepare_sql_statement(sdata, &sql, SQL_PREPARED_STMT_INSERT_INTO_SPHINX_TABLE) == ERR) return rc; - fix_email_address_for_sphinx(state->b_from); + fix_email_address_for_sphinx(state->b_sender); fix_email_address_for_sphinx(state->b_to); - fix_email_address_for_sphinx(state->b_from_domain); + fix_email_address_for_sphinx(state->b_sender_domain); fix_email_address_for_sphinx(state->b_to_domain); p_bind_init(&sql); sql.sql[sql.pos] = (char *)&id; sql.type[sql.pos] = TYPE_LONGLONG; sql.pos++; - sql.sql[sql.pos] = state->b_from; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = state->b_sender; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_to; sql.type[sql.pos] = TYPE_STRING; sql.pos++; - sql.sql[sql.pos] = state->b_from_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_to_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = state->b_body; sql.type[sql.pos] = TYPE_STRING; sql.pos++; @@ -188,7 +188,7 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, stru subj = state->b_subject; if(*subj == ' ') subj++; - snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, state->b_from, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest); + snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, state->b_sender, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest); digest_string(s, &vcode[0]); @@ -203,7 +203,7 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, stru memset(s2, 0, sizeof(s2)); - p = state->b_from; + p = state->b_sender; do { memset(s2, 0, sizeof(s2)); p = split(p, ' ', s2, sizeof(s2)-1, &result); @@ -222,7 +222,7 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, stru p_bind_init(&sql); sql.sql[sql.pos] = &s2[0]; sql.type[sql.pos] = TYPE_STRING; sql.pos++; - sql.sql[sql.pos] = state->b_from_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; + sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++; sql.sql[sql.pos] = (char *)&sdata->spam_message; sql.type[sql.pos] = TYPE_LONG; sql.pos++; sql.sql[sql.pos] = (char *)&sdata->now; sql.type[sql.pos] = TYPE_LONG; sql.pos++; diff --git a/src/parser.c b/src/parser.c index e3c36528..bcbba16d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -72,7 +72,7 @@ struct parser_state parse_message(struct session_data *sdata, int take_into_piec } -void post_parse(struct session_data *sdata, struct parser_state *state, struct config *cfg){ +void post_parse(struct session_data *sdata, struct data *data, struct parser_state *state, struct config *cfg){ int i; clearhash(state->boundaries); @@ -84,6 +84,19 @@ void post_parse(struct session_data *sdata, struct parser_state *state, struct c if(strlen(state->b_from) > 255) state->b_from[255] = '\0'; if(strlen(state->b_from_domain) > 255) state->b_from_domain[255] = '\0'; + if(strlen(state->b_sender) > 255) state->b_sender[255] = '\0'; + if(strlen(state->b_sender_domain) > 255) state->b_sender_domain[255] = '\0'; + + // If Sender: header doesn't exist, then copy the From: header value to it + // Otherwise append the From: address to the recipients list + + if(state->b_sender[0] == '\0'){ + strcpy(state->b_sender, state->b_from); + strcpy(state->b_sender_domain, state->b_from_domain); + } else { + add_recipient(state->b_from, strlen(state->b_from), sdata, state, data, cfg); + } + // Truncate the message_id if it's >255 characters if(strlen(state->message_id) > 255) state->message_id[255] = '\0'; @@ -358,6 +371,10 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata state->message_state = MSG_FROM; buf += strlen("From:"); } + else if(strncasecmp(buf, "Sender:", strlen("Sender:")) == 0){ + state->message_state = MSG_SENDER; + buf += strlen("Sender:"); + } else if(strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0){ state->message_state = MSG_CONTENT_TYPE; } @@ -545,6 +562,8 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata memset(state->b_body, 0, BIGBUFSIZE); memset(state->b_from, 0, SMALLBUFSIZE); memset(state->b_from_domain, 0, SMALLBUFSIZE); + memset(state->b_sender, 0, SMALLBUFSIZE); + memset(state->b_sender_domain, 0, SMALLBUFSIZE); memset(state->message_id, 0, SMALLBUFSIZE); sdata->ms_journal = 0; @@ -616,7 +635,7 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata /* skip irrelevant headers */ - if(state->is_header == 1 && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC && state->message_state != MSG_RECIPIENT && state->message_state != MSG_ENVELOPE_TO) return 0; + if(state->is_header == 1 && state->message_state != MSG_FROM && state->message_state != MSG_SENDER && state->message_state != MSG_TO && state->message_state != MSG_CC && state->message_state != MSG_RECIPIENT && state->message_state != MSG_ENVELOPE_TO) return 0; /* don't process body if it's not a text or html part */ diff --git a/src/parser.h b/src/parser.h index 5525af8a..ca81e3bc 100644 --- a/src/parser.h +++ b/src/parser.h @@ -10,7 +10,7 @@ #include "defs.h" struct parser_state parse_message(struct session_data *sdata, int take_into_pieces, struct data *data, struct config *cfg); -void post_parse(struct session_data *sdata, struct parser_state *state, struct config *cfg); +void post_parse(struct session_data *sdata, struct data *data, struct parser_state *state, struct config *cfg); int parse_line(char *buf, struct parser_state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, unsigned int writebuffersize, char *abuffer, unsigned int abuffersize, struct data *data, struct config *cfg); void init_state(struct parser_state *state); diff --git a/src/parser_utils.c b/src/parser_utils.c index 87e91cb8..6cd2bbf9 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -90,6 +90,8 @@ void init_state(struct parser_state *state){ memset(state->b_from, 0, SMALLBUFSIZE); memset(state->b_from_domain, 0, SMALLBUFSIZE); + memset(state->b_sender, 0, SMALLBUFSIZE); + memset(state->b_sender_domain, 0, SMALLBUFSIZE); memset(state->b_to, 0, MAXBUFSIZE); memset(state->b_to_domain, 0, SMALLBUFSIZE); memset(state->b_subject, 0, MAXBUFSIZE); diff --git a/src/piler.c b/src/piler.c index 684d90de..fa49a074 100644 --- a/src/piler.c +++ b/src/piler.c @@ -163,7 +163,7 @@ int process_email(char *filename, struct session_data *sdata, struct data *data, parser_state = parse_message(sdata, 1, data, cfg); - post_parse(sdata, &parser_state, cfg); + post_parse(sdata, data, &parser_state, cfg); if(cfg->syslog_recipients == 1){ char *rcpt = parser_state.b_to; diff --git a/src/reindex.c b/src/reindex.c index b2b137b4..812fbe4b 100644 --- a/src/reindex.c +++ b/src/reindex.c @@ -124,7 +124,7 @@ uint64 retrieve_email_by_metadata_id(struct session_data *sdata, struct data *da snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", filename); state = parse_message(sdata, 1, data, cfg); - post_parse(sdata, &state, cfg); + post_parse(sdata, data, &state, cfg); rc = store_index_data(sdata, &state, data, stored_id, cfg); diff --git a/src/test.c b/src/test.c index 91e2bcfc..4d7a8a3f 100644 --- a/src/test.c +++ b/src/test.c @@ -134,7 +134,7 @@ int main(int argc, char **argv){ init_session_data(&sdata, &cfg); - + sdata.delivered = 0; sdata.tot_len = st.st_size; sdata.import = 1; @@ -147,10 +147,11 @@ int main(int argc, char **argv){ state = parse_message(&sdata, 1, &data, &cfg); printf("post parsing...\n"); - post_parse(&sdata, &state, &cfg); + post_parse(&sdata, &data, &state, &cfg); printf("message-id: %s / %s\n", state.message_id, state.message_id_hash); printf("from: *%s (%s)*\n", state.b_from, state.b_from_domain); + printf("sender: *%s (%s)*\n", state.b_sender, state.b_sender_domain); printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain); printf("reference: *%s*\n", state.reference); printf("subject: *%s*\n", state.b_subject); diff --git a/src/tokenizer.c b/src/tokenizer.c index 52edbe59..ebadbcff 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -75,6 +75,18 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, } } } + else if(state->message_state == MSG_SENDER && state->is_1st_header == 1 && strlen(state->b_sender) < SMALLBUFSIZE-len-1){ + strtolower(puf); + + q = strchr(puf, '@'); + if(q) fix_plus_sign_in_email_address(puf, &q, &len); + + memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); + if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){ + split_email_address(puf); + memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); + } + } else if((state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT || state->message_state == MSG_ENVELOPE_TO) && state->is_1st_header == 1 && state->tolen < MAXBUFSIZE-len-1){ strtolower(puf); diff --git a/unit_tests/check_attachments.c b/unit_tests/check_attachments.c index e051ea40..f6ee2b1c 100644 --- a/unit_tests/check_attachments.c +++ b/unit_tests/check_attachments.c @@ -43,7 +43,7 @@ static void test_attachments(struct config *cfg){ snprintf(sdata.tmpframe, SMALLBUFSIZE-1, "%s.m", tests[i].s); state = parse_message(&sdata, 1, &data, cfg); - post_parse(&sdata, &state, cfg); + post_parse(&sdata, &data, &state, cfg); for(j=1; j<=state.n_attachments; j++){ unlink(state.attachments[j].internalname); diff --git a/unit_tests/check_rules.c b/unit_tests/check_rules.c index 596a8c41..47d18ca8 100644 --- a/unit_tests/check_rules.c +++ b/unit_tests/check_rules.c @@ -41,7 +41,7 @@ static void fill_rule_table(struct config *cfg){ for(i=0; idelivered = 0; sdata->tot_len = st.st_size; @@ -30,7 +30,7 @@ int setup_and_parse_message(struct session_data *sdata, struct parser_state *sta snprintf(sdata->tmpframe, SMALLBUFSIZE-1, "%s.m", filename); *state = parse_message(sdata, 1, data, cfg); - post_parse(sdata, state, cfg); + post_parse(sdata, data, state, cfg); return 0; }