From 27119a17ab2f0dbd5953f838da98be38515fe0cf Mon Sep 17 00:00:00 2001 From: Janos SUTO Date: Fri, 18 Feb 2022 18:10:05 +0100 Subject: [PATCH] Added support for long email addresses to mitigate sphinx <=3.1.1 issue Signed-off-by: Janos SUTO --- src/config.h | 7 +++++++ src/defs.h | 1 + src/digest.c | 24 ++++++++++++++++++++++++ src/piler.h | 1 + src/tokenizer.c | 17 +++++++++++++++++ unit_tests/check_parser.c | 2 +- 6 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/config.h b/src/config.h index 610d407b..d9de24a0 100644 --- a/src/config.h +++ b/src/config.h @@ -39,6 +39,13 @@ #define IPLEN 16+1 #define KEYLEN 56 #define MIN_EMAIL_ADDRESS_LEN 9 +// Sphinx 3.1.1 has an issue with tokens longer than 41 characters. +// Sphinx-3.3.1+ seems to be fine, and not affected. +// +// Note that we use 42, because the parser adds a trailing space to the tokens +// See https://www.mailpiler.org/wiki/current:sphinx3 and +// https://bitbucket.org/jsuto/piler/issues/1082/no-sphinx-results-with-long-email for more +#define MAX_EMAIL_ADDRESS_SPHINX_LEN 42 #define CRLF "\n" diff --git a/src/defs.h b/src/defs.h index ffc2849c..4160a694 100644 --- a/src/defs.h +++ b/src/defs.h @@ -14,6 +14,7 @@ #include #endif +#include #include #include #include diff --git a/src/digest.c b/src/digest.c index 20134730..50d98cd5 100644 --- a/src/digest.c +++ b/src/digest.c @@ -132,3 +132,27 @@ void digest_string(char *s, char *digest){ snprintf(digest + i*2, 2*DIGEST_LENGTH, "%02x", md[i]); } + + +void md5_string(char *s, char *digest){ + int i; + unsigned char md[MD5_DIGEST_LENGTH]; + MD5_CTX context; + + memset(digest, 0, 2*MD5_DIGEST_LENGTH+2); + + MD5_Init(&context); + + MD5_Update(&context, s, strlen(s)); + + MD5_Final(md, &context); + + for(i=0;imessage_state == MSG_FROM && state->is_1st_header == 1 && strlen(state->b_from) < SMALLBUFSIZE-len-1){ strtolower(puf); @@ -69,6 +71,11 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, if(is_email_address_on_my_domains(puf, data) == 1) sdata->internal_sender = 1; + if(len >= MAX_EMAIL_ADDRESS_SPHINX_LEN && strlen(state->b_from) < SMALLBUFSIZE-len-1){ + create_md5_from_email_address(puf, md5buf); + memcpy(&(state->b_from[strlen(state->b_from)]), md5buf, strlen(md5buf)); + } + if(strlen(state->b_from) < SMALLBUFSIZE-len-1){ split_email_address(puf); memcpy(&(state->b_from[strlen(state->b_from)]), puf, len); @@ -88,6 +95,11 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, memcpy(&(state->b_sender_domain), q+1, strlen(q+1)-1); } + if(len >= MAX_EMAIL_ADDRESS_SPHINX_LEN && strlen(state->b_sender) < SMALLBUFSIZE-len-1){ + create_md5_from_email_address(puf, md5buf); + memcpy(&(state->b_sender[strlen(state->b_sender)]), md5buf, strlen(md5buf)); + } + if(strlen(state->b_sender) < SMALLBUFSIZE-len-1){ split_email_address(puf); memcpy(&(state->b_sender[strlen(state->b_sender)]), puf, len); @@ -108,6 +120,11 @@ void tokenize(char *buf, struct parser_state *state, struct session_data *sdata, if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: journal rcpt: '%s'", sdata->ttmpfile, puf); } + if(len >= MAX_EMAIL_ADDRESS_SPHINX_LEN){ + create_md5_from_email_address(puf, md5buf); + add_recipient(md5buf, strlen(md5buf), sdata, state, data, cfg); + } + add_recipient(puf, len, sdata, state, data, cfg); } else if(state->message_state == MSG_BODY && len >= (unsigned int)(cfg->min_word_len) && state->bodylen < BIGBUFSIZE-len-1){ diff --git a/unit_tests/check_parser.c b/unit_tests/check_parser.c index 70d53760..6e7958ae 100644 --- a/unit_tests/check_parser.c +++ b/unit_tests/check_parser.c @@ -39,7 +39,7 @@ static void test_parser(struct config *cfg){ {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "postmaster@aaa.fu postmaster aaa fu a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "aaa.fu t-online.hu ", "", "TÁJÉKOZTATÁSVargay Péter", 0}, {"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, {"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, - {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, + {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk 30cbee0b0f411fcf170416fb9f996c6f update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0}, {"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0}, {"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", " www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0},