parser fix to support some emojis in subject line

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2018-01-11 09:28:45 +01:00
parent c846b6aa21
commit 1c8dc1cc68
4 changed files with 72 additions and 48 deletions

View File

@ -206,9 +206,13 @@ void decodeQP(char *p){
i += 2; i += 2;
} }
else if(p[i] == '_'){
c = ' ';
}
p[k] = c; p[k] = c;
k++; k++;
} }
p[k] = '\0'; p[k] = '\0';

View File

@ -320,13 +320,12 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
* but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE * but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE
*/ */
char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE]; char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE];
int need_encoding, ret; int need_encoding, ret, prev_encoded=0, n_tokens=0;
int b64=0, qp=0;
if(buflen < 5) return; if(buflen < 5) return;
memset(puf, 0, sizeof(puf)); memset(puf, 0, sizeof(puf));
memset(encoding, 0, sizeof(encoding));
q = buf; q = buf;
@ -342,18 +341,46 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
* We can't use split_str(p, "=?", ...) it will fail with the following pattern * We can't use split_str(p, "=?", ...) it will fail with the following pattern
* =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?= * =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=
* *
* Also the below patter requires special care: * Also the below pattern requires special care:
* =?gb2312?B?<something>?==?gb2312?Q?<something else>?= * =?gb2312?B?<something>?==?gb2312?Q?<something else>?=
*
* And we have to check the following cases as well:
* Happy New Year! =?utf-8?q?=F0=9F=8E=86?=
*/ */
b64 = qp = 0;
memset(encoding, 0, sizeof(encoding));
r = strstr(p, "=?"); r = strstr(p, "=?");
if(r){ if(r){
p = r + 2; p = r + 2;
e = strchr(p, '?');
if(e){
*e = '\0';
snprintf(encoding, sizeof(encoding)-1, "%s", p);
*e = '?';
s = strcasestr(e, "?B?");
if(s){
b64 = 1;
p = s + 3;
}
else {
s = strcasestr(e, "?Q?");
if(s){
qp = 1;
p = s + 3;
}
}
}
end = strstr(p, "?="); end = strstr(p, "?=");
if(end){ if(end){
*end = '\0'; *end = '\0';
} }
snprintf(u, sizeof(u)-1, "%s", p); snprintf(u, sizeof(u)-1, "%s", p);
if(end) { if(end) {
@ -367,47 +394,38 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
if(u[0] == 0) continue; if(u[0] == 0) continue;
memset(encoding, 0, sizeof(encoding)); n_tokens++;
// Check if it's either ?B? or ?Q? encoding ... if(b64 == 1) decodeBase64(u);
s = strcasestr(u, "?B?"); else if(qp == 1) decodeQP(u);
if(s){
decodeBase64(s+3);
/*
* https://www.ietf.org/rfc/rfc2047.txt says that
*
* "When displaying a particular header field that contains multiple
* 'encoded-word's, any 'linear-white-space' that separates a pair of
* adjacent 'encoded-word's is ignored." (6.2)
*/
if(prev_encoded == 1 && (b64 == 1 || qp == 1)) {}
else if(n_tokens > 1){
strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
} }
else {
s = strcasestr(u, "?Q?"); if(b64 == 1 || qp == 1){
if(s){ prev_encoded = 1;
decodeQP(s+3); need_encoding = 0;
r = s + 3; ret = ERR;
for(; *r; r++){
if(*r == '_') *r = ' '; if(encoding[0] && strcasecmp(encoding, "utf-8")){
} need_encoding = 1;
ret = utf8_encode(u, strlen(u), &tmpbuf[0], sizeof(tmpbuf), encoding);
} }
}
// ... if it is, then get the encoding if(need_encoding == 1 && ret == OK){
if(s){ strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
e = strchr(u, '?');
if(e){
*e = '\0';
snprintf(encoding, sizeof(encoding)-1, "%s", u);
*e = '?';
need_encoding = 0;
ret = ERR;
if(encoding[0] && strcasecmp(encoding, "utf-8")){
need_encoding = 1;
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
}
if(need_encoding == 1 && ret == OK)
strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
else
strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
} }
else { else {
memset(encoding, 0, sizeof(encoding));
strncat(puf, u, sizeof(puf)-strlen(puf)-1); strncat(puf, u, sizeof(puf)-strlen(puf)-1);
} }
} }
@ -417,8 +435,6 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
} while(p); } while(p);
if(q && encoding[0] == 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
} while(q); } while(q);
snprintf(buf, buflen-1, "%s", puf); snprintf(buf, buflen-1, "%s", puf);

View File

@ -125,7 +125,7 @@ static void test_fixupEncodedHeaderLine(){
{"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"}, {"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"},
{"=?iso-8859-2?Q?RE:_test.aaa.fu_z=F3na?=", "RE: test.aaa.fu zóna"}, {"=?iso-8859-2?Q?RE:_test.aaa.fu_z=F3na?=", "RE: test.aaa.fu zóna"},
{"=?iso-8859-2?Q?V=E1ltoz=E1s_az_IT_szervezetben_/_Personal_changes_in_the_?=", "Változás az IT szervezetben / Personal changes in the "}, {"=?iso-8859-2?Q?V=E1ltoz=E1s_az_IT_szervezetben_/_Personal_changes_in_the_?=", "Változás az IT szervezetben / Personal changes in the "},
{"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"}, {"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"},
{"=?UTF-8?Q?[JIRA]_Created:_(HUDSS-196)_T=C5=B1zfal_?=", "[JIRA] Created: (HUDSS-196) Tűzfal "}, {"=?UTF-8?Q?[JIRA]_Created:_(HUDSS-196)_T=C5=B1zfal_?=", "[JIRA] Created: (HUDSS-196) Tűzfal "},
{"=?iso-8859-2?Q?RE:_Baptista_Szeretetszolg=E1lat?=", "RE: Baptista Szeretetszolgálat"}, {"=?iso-8859-2?Q?RE:_Baptista_Szeretetszolg=E1lat?=", "RE: Baptista Szeretetszolgálat"},
{"=?iso-8859-2?B?SXR0IGF6IE1OQiBuYWd5IGRvYuFzYTogaXNt6XQgYmVsZW55+mxuYWsgYSBoaXRlbGV66XNiZSAoMjAxNS4xMS4wMy4gLSBzakBhY3RzLmh1KQ==?=", "Itt az MNB nagy dobása: ismét belenyúlnak a hitelezésbe (2015.11.03. - sj@acts.hu)"}, {"=?iso-8859-2?B?SXR0IGF6IE1OQiBuYWd5IGRvYuFzYTogaXNt6XQgYmVsZW55+mxuYWsgYSBoaXRlbGV66XNiZSAoMjAxNS4xMS4wMy4gLSBzakBhY3RzLmh1KQ==?=", "Itt az MNB nagy dobása: ismét belenyúlnak a hitelezésbe (2015.11.03. - sj@acts.hu)"},
@ -139,7 +139,7 @@ static void test_fixupEncodedHeaderLine(){
{"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"}, {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"},
{"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"}, {"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"},
{"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"}, {"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"},
{"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."},
{"1gy2tt. V3l4d. M5sk6nt", "1gy2tt. V3l4d. M5sk6nt"}, {"1gy2tt. V3l4d. M5sk6nt", "1gy2tt. V3l4d. M5sk6nt"},
{"=?iso-8859-2?B?03Jp4XNpIG1lZ2xlcGV06XMsIG5pbmNzIHT2YmIgbWVudHPpZyBBbWVyaWthIHN64W3hcmEgKDIwMTUuMTEuMDYuIC0gc2pAYWN0cy5odSk=?=", "Óriási meglepetés, nincs több mentség Amerika számára (2015.11.06. - sj@acts.hu)"}, {"=?iso-8859-2?B?03Jp4XNpIG1lZ2xlcGV06XMsIG5pbmNzIHT2YmIgbWVudHPpZyBBbWVyaWthIHN64W3hcmEgKDIwMTUuMTEuMDYuIC0gc2pAYWN0cy5odSk=?=", "Óriási meglepetés, nincs több mentség Amerika számára (2015.11.06. - sj@acts.hu)"},
{"=?utf-8?B?Rlc6IEVtYWlsIGZvZ2Fkw6FzaSBoaWJh?=", "FW: Email fogadási hiba"}, {"=?utf-8?B?Rlc6IEVtYWlsIGZvZ2Fkw6FzaSBoaWJh?=", "FW: Email fogadási hiba"},
@ -154,6 +154,8 @@ static void test_fixupEncodedHeaderLine(){
{"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"}, {"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"},
{"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"}, {"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"},
{"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"}, {"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"},
{"Subject: =?UTF-8?Q?=E2=98=85_JubiDu!Versandkost?= =?UTF-8?Q?enfrei-Verl=C3=A4ngerung!=E2=98=85?=", "Subject: ★ JubiDu!Versandkostenfrei-Verlängerung!★"},
{"Happy New Year! =?utf-8?q?=F0=9F=8E=86?=", "Happy New Year! 🎆"},
}; };
TEST_HEADER(); TEST_HEADER();

View File

@ -5,8 +5,10 @@ set -o pipefail
set -o nounset set -o nounset
set -x set -x
LD_LIBRARY_PATH=../src ./check_parser_utils export LD_LIBRARY_PATH=../src
LD_LIBRARY_PATH=../src ./check_parser
LD_LIBRARY_PATH=../src ./check_rules ./check_parser_utils
LD_LIBRARY_PATH=../src ./check_digest ./check_parser
LD_LIBRARY_PATH=../src ./check_mydomains ./check_rules
./check_digest
./check_mydomains