Fixed from/sender processing

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2020-12-13 07:50:03 +01:00
parent d9d5f7db66
commit 4b37b4fc1a
6 changed files with 79 additions and 51 deletions

View File

@ -20,7 +20,7 @@
int store_index_data(struct session_data *sdata, struct parser_state *state, struct data *data, uint64 id, struct config *cfg){
int rc=ERR;
char *subj;
char *subj, *sender=state->b_from, *sender_domain=state->b_from_domain;
struct sql sql;
if(data->folder == 0){
@ -34,18 +34,24 @@ int store_index_data(struct session_data *sdata, struct parser_state *state, str
if(prepare_sql_statement(sdata, &sql, SQL_PREPARED_STMT_INSERT_INTO_SPHINX_TABLE) == ERR) return rc;
fix_email_address_for_sphinx(state->b_from);
fix_email_address_for_sphinx(state->b_sender);
fix_email_address_for_sphinx(state->b_to);
fix_email_address_for_sphinx(state->b_from_domain);
fix_email_address_for_sphinx(state->b_sender_domain);
fix_email_address_for_sphinx(state->b_to_domain);
if(state->b_sender_domain){
sender = state->b_sender;
sender_domain = state->b_sender_domain;
}
p_bind_init(&sql);
sql.sql[sql.pos] = (char *)&id; sql.type[sql.pos] = TYPE_LONGLONG; sql.pos++;
sql.sql[sql.pos] = state->b_sender; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = sender; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = state->b_to; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = state->b_to_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = state->b_body; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
@ -180,15 +186,25 @@ int update_metadata_reference(struct session_data *sdata, struct parser_state *s
int store_meta_data(struct session_data *sdata, struct parser_state *state, struct data *data, struct config *cfg){
int rc=ERR, result;
char *subj, *p, s[MAXBUFSIZE], s2[SMALLBUFSIZE], vcode[2*DIGEST_LENGTH+1], ref[2*DIGEST_LENGTH+1];
int rc=ERR;
char *subj, *sender, *sender_domain, s[MAXBUFSIZE], s2[SMALLBUFSIZE], vcode[2*DIGEST_LENGTH+1], ref[2*DIGEST_LENGTH+1];
uint64 id=0;
struct sql sql;
subj = state->b_subject;
if(*subj == ' ') subj++;
snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, state->b_sender, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest);
if(state->b_sender_domain){
sender = state->b_sender;
sender_domain = state->b_sender_domain;
get_first_email_address_from_string(state->b_sender, s2, sizeof(s2));
} else {
sender = state->b_from;
sender_domain = state->b_from_domain;
get_first_email_address_from_string(state->b_from, s2, sizeof(s2));
}
snprintf(s, sizeof(s)-1, "%llu+%s%s%s%ld%ld%ld%d%d%d%d%s%s%s", id, subj, sender, state->message_id, sdata->now, sdata->sent, sdata->retained, sdata->tot_len, sdata->hdr_len, sdata->direction, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest);
digest_string(s, &vcode[0]);
@ -201,19 +217,6 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, stru
if(prepare_sql_statement(sdata, &sql, SQL_PREPARED_STMT_INSERT_INTO_META_TABLE) == ERR) return ERR;
memset(s2, 0, sizeof(s2));
p = state->b_sender;
do {
memset(s2, 0, sizeof(s2));
p = split(p, ' ', s2, sizeof(s2)-1, &result);
if(s2[0] == '\0') continue;
if(does_it_seem_like_an_email_address(s2) == 1){ break; }
} while(p);
if(strlen(state->b_to) < 5){
snprintf(state->b_to, SMALLBUFSIZE-1, "undisclosed-recipients@no.domain");
}
@ -222,7 +225,7 @@ int store_meta_data(struct session_data *sdata, struct parser_state *state, stru
p_bind_init(&sql);
sql.sql[sql.pos] = &s2[0]; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = state->b_sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = sender_domain; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = subj; sql.type[sql.pos] = TYPE_STRING; sql.pos++;
sql.sql[sql.pos] = (char *)&sdata->spam_message; sql.type[sql.pos] = TYPE_LONG; sql.pos++;
sql.sql[sql.pos] = (char *)&sdata->now; sql.type[sql.pos] = TYPE_LONG; sql.pos++;

View File

@ -80,22 +80,18 @@ void post_parse(struct session_data *sdata, struct data *data, struct parser_sta
clearhash(state->rcpt_domain);
clearhash(state->journal_recipient);
// Fix From: line if it's too long
// Fix From: and Sender: lines if they are too long
if(strlen(state->b_from) > 255) state->b_from[255] = '\0';
if(strlen(state->b_from_domain) > 255) state->b_from_domain[255] = '\0';
if(strlen(state->b_sender) > 255) state->b_sender[255] = '\0';
if(strlen(state->b_sender_domain) > 255) state->b_sender_domain[255] = '\0';
// If Sender: header doesn't exist, then copy the From: header value to it
// Otherwise append the From: address to the recipients list
// TODO: If both Sender: and From: headers exist, and they are different, then
// append the From: address to recipients list to give him access to this email
// as well
if(state->b_sender[0] == '\0'){
strcpy(state->b_sender, state->b_from);
strcpy(state->b_sender_domain, state->b_from_domain);
} else {
add_recipient(state->b_from, strlen(state->b_from), sdata, state, data, cfg);
}
// Truncate the message_id if it's >255 characters
if(strlen(state->message_id) > 255) state->message_id[255] = '\0';

View File

@ -38,5 +38,6 @@ void fix_plus_sign_in_email_address(char *puf, char **at_sign, unsigned int *len
void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, struct data *data, struct config *cfg);
void flush_attachment_buffer(struct parser_state *state, char *abuffer, unsigned int abuffersize);
void fill_attachment_name_buf(struct parser_state *state, char *buf);
int get_first_email_address_from_string(char *str, char *buf, int buflen);
#endif /* _PARSER_H */

View File

@ -624,9 +624,9 @@ void translateLine(unsigned char *p, struct parser_state *state){
for(; *p; p++){
if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT) && *p == '@'){ continue; }
if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_SENDER || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT) && *p == '@'){ continue; }
if(state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT){
if(state->message_state == MSG_FROM || state->message_state == MSG_SENDER || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT){
/* To fix some unusual addresses, eg.
* "'user@domain'" -> user@domain
@ -1077,3 +1077,20 @@ void fill_attachment_name_buf(struct parser_state *state, char *buf){
state->anamepos++;
}
}
int get_first_email_address_from_string(char *str, char *buf, int buflen){
int result;
char *p = str;
do {
memset(buf, 0, buflen);
p = split(p, ' ', buf, buflen-1, &result);
if(*buf == '\0') continue;
if(does_it_seem_like_an_email_address(buf) == 1){ return 1; }
} while(p);
return 0;
}

View File

@ -82,9 +82,16 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata,
if(q) fix_plus_sign_in_email_address(puf, &q, &len);
memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len);
if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){
split_email_address(puf);
memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len);
if(len >= MIN_EMAIL_ADDRESS_LEN && does_it_seem_like_an_email_address(puf) == 1 && state->b_sender_domain[0] == '\0'){
if(q && strlen(q) > 5){
memcpy(&(state->b_sender_domain), q+1, strlen(q+1)-1);
}
if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){
split_email_address(puf);
memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len);
}
}
}
else if((state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_RECIPIENT || state->message_state == MSG_ENVELOPE_TO) && state->is_1st_header == 1 && state->tolen < MAXBUFSIZE-len-1){

View File

@ -10,6 +10,8 @@ struct parser_test {
char message_id[SMALLBUFSIZE];
char from[SMALLBUFSIZE];
char from_domain[SMALLBUFSIZE];
char sender[SMALLBUFSIZE];
char sender_domain[SMALLBUFSIZE];
char to[SMALLBUFSIZE];
char to_domain[SMALLBUFSIZE];
char reference[SMALLBUFSIZE];
@ -26,22 +28,22 @@ static void test_parser(struct config *cfg){
struct data data;
struct parser_test tests[] = {
{"1.eml", "<ajahhdddhjdhddh@jatekokbirodalma.hu>", "játékok birodalma játékbolt hirlevel@jatekokbirodalma.hu hirlevel jatekokbirodalma hu ", "jatekokbirodalma.hu", "architerv m sj@acts.hu sj acts hu ", "acts.hu ", "", "BLACK FRIDAY - Hihetetlen kedvezmények csak 1 napig november 27-én", 2},
{"2.eml", "<20151101142653.111156815AF6D@acts.hu>", "jml lighting huixinsoft67@foxmail.com huixinsoft67 foxmail com ", "foxmail.com", "sj@acts.hu sj acts hu ", "acts.hu ", "", "New design ultra slim led panel light", 0},
{"5-ibm-images.eml", "<OFC1576266.0E8F7B7D-ONC12577CB.00303030-C12577CB.003053CD@hu.ibm.com>", "ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "hu.ibm.com", "cim1@aaaa.bbb.fu cim1 aaaa bbb fu ajajaj@piler.aaa.fu ajajaj piler aaa fu ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "aaaa.bbb.fu piler.aaa.fu hu.ibm.com ", "", "***Emlékeztető*** - Egészségipar - eEgészségügy (Út a jövőbe, párbeszéd a gazdaságélénkítésről) 2010. november 4.", 5},
{"9-attached-text.eml", "<list-507327664@mail.aaa.fu>", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "usuarios-no-listados ", "", "", "Please read my attached letter", 1},
{"13-xlsx.eml", "<alpine.LNX.2.00.1209261517040.17054@aaa.fu>", "aaaaa@aaa.fu aaaaa aaa fu ", "aaa.fu", "sj@acts.hu sj acts hu ", "acts.hu ", "", "ez egy teszt", 1},
{"15-image-only-spam.eml", "<av5f1fCf5XO0oBab757826337RSFKvu@pnmarketing.com>", "kriegel paff sketches@pnmarketing.com sketches pnmarketing com ", "pnmarketing.com", "holmon knobel aaaaa@acts.hu aaaaa acts hu ", "acts.hu ", "", "Lack of concentration, backed up by a vocabulary of tremendous scope, a", 1},
{"16-rfc822-attachment-1.eml", "<list-423974736@mail.aaa.fu>", "martonagnes martonagnes@lajt.hu martonagnes lajt hu erős istván eistvan@marosheviz.info ", "lajt.hu", "martonagnes@lajt.hu martonagnes lajt hu ", "lajt.hu ", "", "Féláras akció! 31000Ft/2fő/3nap húsvétkor is a Park Inn****-ben!", 2 },
{"17-attached-text-bogus-mime.eml", "<list-507327664@mail.aaa.fu>", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "usuarios-no-listados ", "", "", "Please read my attached letter", 1},
{"18-spam-html-encoding.eml", "<list-435458392@mail.aaa.fu>", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁSVargay Péter", 0},
{"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1},
{"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1},
{"21-register-tricky-urls.eml", "<E1IBifn-0001un-MD@admin4.theregister.co.uk>", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0},
{"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0},
{"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0},
{"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "<GD-XXXX/1-2015> www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0},
{"33-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "hello@acts.hu hello acts hu ", "acts.hu ", "", "[JIRA] Commented: (AAAA-151) A aaa-nek kerek egy XXX-et, ZH74617282, ACC27363484944", 0},
{"1.eml", "<ajahhdddhjdhddh@jatekokbirodalma.hu>", "játékok birodalma játékbolt hirlevel@jatekokbirodalma.hu hirlevel jatekokbirodalma hu ", "jatekokbirodalma.hu", "", "", "architerv m sj@acts.hu sj acts hu ", "acts.hu ", "", "BLACK FRIDAY - Hihetetlen kedvezmények csak 1 napig november 27-én", 2},
{"2.eml", "<20151101142653.111156815AF6D@acts.hu>", "jml lighting huixinsoft67@foxmail.com huixinsoft67 foxmail com ", "foxmail.com", "jml lighting hwtwi@mcqw.com hwtwi mcqw com ", "mcqw.com", "sj@acts.hu sj acts hu ", "acts.hu ", "", "New design ultra slim led panel light", 0},
{"5-ibm-images.eml", "<OFC1576266.0E8F7B7D-ONC12577CB.00303030-C12577CB.003053CD@hu.ibm.com>", "ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "hu.ibm.com", "marta riman rimanmarta@hu.ibm.com rimanmarta hu ibm com ", "hu.ibm.com", "cim1@aaaa.bbb.fu cim1 aaaa bbb fu ajajaj@piler.aaa.fu ajajaj piler aaa fu ibm rendezveny rendezveny@hu.ibm.com rendezveny hu ibm com ", "aaaa.bbb.fu piler.aaa.fu hu.ibm.com ", "", "***Emlékeztető*** - Egészségipar - eEgészségügy (Út a jövőbe, párbeszéd a gazdaságélénkítésről) 2010. november 4.", 5},
{"9-attached-text.eml", "<list-507327664@mail.aaa.fu>", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "aaa aaa aaa@aaa.fu aaa aaa fu ", "aaa.fu", "usuarios-no-listados ", "", "", "Please read my attached letter", 1},
{"13-xlsx.eml", "<alpine.LNX.2.00.1209261517040.17054@aaa.fu>", "aaaaa@aaa.fu aaaaa aaa fu ", "aaa.fu", "", "", "sj@acts.hu sj acts hu ", "acts.hu ", "", "ez egy teszt", 1},
{"15-image-only-spam.eml", "<av5f1fCf5XO0oBab757826337RSFKvu@pnmarketing.com>", "kriegel paff sketches@pnmarketing.com sketches pnmarketing com ", "pnmarketing.com", "", "", "holmon knobel aaaaa@acts.hu aaaaa acts hu ", "acts.hu ", "", "Lack of concentration, backed up by a vocabulary of tremendous scope, a", 1},
{"16-rfc822-attachment-1.eml", "<list-423974736@mail.aaa.fu>", "martonagnes martonagnes@lajt.hu martonagnes lajt hu erős istván eistvan@marosheviz.info ", "lajt.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "martonagnes@lajt.hu martonagnes lajt hu ", "lajt.hu ", "", "Féláras akció! 31000Ft/2fő/3nap húsvétkor is a Park Inn****-ben!", 2 },
{"17-attached-text-bogus-mime.eml", "<list-507327664@mail.aaa.fu>", "dr lucky amechi clubzenit@zenithoteles.com clubzenit zenithoteles com ", "zenithoteles.com", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "usuarios-no-listados ", "", "", "Please read my attached letter", 1},
{"18-spam-html-encoding.eml", "<list-435458392@mail.aaa.fu>", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "postmaster@aaa.fu postmaster aaa fu ", "aaa.fu ", "", "TÁJÉKOZTATÁSVargay Péter", 0},
{"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1},
{"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1},
{"21-register-tricky-urls.eml", "<E1IBifn-0001un-MD@admin4.theregister.co.uk>", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0},
{"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0},
{"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0},
{"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "<GD-XXXX/1-2015> www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0},
{"33-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "[JIRA] Commented: (AAAA-151) A aaa-nek kerek egy XXX-et, ZH74617282, ACC27363484944", 0},
};
@ -66,7 +68,9 @@ static void test_parser(struct config *cfg){
assert(strcmp(state.message_id, tests[i].message_id) == 0 && "test_parser()1");
assert(strcmp(state.b_from, tests[i].from) == 0 && "test_parser()2a");
assert(strcmp(state.b_from_domain, tests[i].from_domain) == 0 && "test_parser()2b");
assert(strcmp(state.b_sender, tests[i].sender) == 0 && "test_parser()2b");
assert(strcmp(state.b_from_domain, tests[i].from_domain) == 0 && "test_parser()2c");
assert(strcmp(state.b_sender_domain, tests[i].sender_domain) == 0 && "test_parser()2d");
assert(strcmp(state.b_to, tests[i].to) == 0 && "test_parser()3a");
assert(strcmp(state.b_to_domain, tests[i].to_domain) == 0 && "test_parser()3b");
assert(strcmp(state.b_subject, tests[i].subject) == 0 && "test_parser()4");