mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-12-24 19:30:12 +01:00
parser fix to support some emojis in subject line
Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
parent
c846b6aa21
commit
1c8dc1cc68
@ -206,9 +206,13 @@ void decodeQP(char *p){
|
||||
|
||||
i += 2;
|
||||
}
|
||||
else if(p[i] == '_'){
|
||||
c = ' ';
|
||||
}
|
||||
|
||||
p[k] = c;
|
||||
k++;
|
||||
|
||||
}
|
||||
|
||||
p[k] = '\0';
|
||||
|
@ -320,13 +320,12 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
* but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE
|
||||
*/
|
||||
char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE];
|
||||
int need_encoding, ret;
|
||||
int need_encoding, ret, prev_encoded=0, n_tokens=0;
|
||||
int b64=0, qp=0;
|
||||
|
||||
if(buflen < 5) return;
|
||||
|
||||
memset(puf, 0, sizeof(puf));
|
||||
memset(encoding, 0, sizeof(encoding));
|
||||
|
||||
|
||||
q = buf;
|
||||
|
||||
@ -342,18 +341,46 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
* We can't use split_str(p, "=?", ...) it will fail with the following pattern
|
||||
* =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=
|
||||
*
|
||||
* Also the below patter requires special care:
|
||||
* Also the below pattern requires special care:
|
||||
* =?gb2312?B?<something>?==?gb2312?Q?<something else>?=
|
||||
*
|
||||
* And we have to check the following cases as well:
|
||||
* Happy New Year! =?utf-8?q?=F0=9F=8E=86?=
|
||||
*/
|
||||
|
||||
b64 = qp = 0;
|
||||
memset(encoding, 0, sizeof(encoding));
|
||||
|
||||
r = strstr(p, "=?");
|
||||
if(r){
|
||||
p = r + 2;
|
||||
|
||||
e = strchr(p, '?');
|
||||
if(e){
|
||||
*e = '\0';
|
||||
snprintf(encoding, sizeof(encoding)-1, "%s", p);
|
||||
*e = '?';
|
||||
|
||||
s = strcasestr(e, "?B?");
|
||||
if(s){
|
||||
b64 = 1;
|
||||
p = s + 3;
|
||||
}
|
||||
else {
|
||||
s = strcasestr(e, "?Q?");
|
||||
if(s){
|
||||
qp = 1;
|
||||
p = s + 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end = strstr(p, "?=");
|
||||
if(end){
|
||||
*end = '\0';
|
||||
}
|
||||
|
||||
|
||||
snprintf(u, sizeof(u)-1, "%s", p);
|
||||
|
||||
if(end) {
|
||||
@ -367,47 +394,38 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
|
||||
if(u[0] == 0) continue;
|
||||
|
||||
memset(encoding, 0, sizeof(encoding));
|
||||
n_tokens++;
|
||||
|
||||
// Check if it's either ?B? or ?Q? encoding ...
|
||||
s = strcasestr(u, "?B?");
|
||||
if(s){
|
||||
decodeBase64(s+3);
|
||||
if(b64 == 1) decodeBase64(u);
|
||||
else if(qp == 1) decodeQP(u);
|
||||
|
||||
|
||||
/*
|
||||
* https://www.ietf.org/rfc/rfc2047.txt says that
|
||||
*
|
||||
* "When displaying a particular header field that contains multiple
|
||||
* 'encoded-word's, any 'linear-white-space' that separates a pair of
|
||||
* adjacent 'encoded-word's is ignored." (6.2)
|
||||
*/
|
||||
if(prev_encoded == 1 && (b64 == 1 || qp == 1)) {}
|
||||
else if(n_tokens > 1){
|
||||
strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
|
||||
}
|
||||
else {
|
||||
s = strcasestr(u, "?Q?");
|
||||
if(s){
|
||||
decodeQP(s+3);
|
||||
r = s + 3;
|
||||
for(; *r; r++){
|
||||
if(*r == '_') *r = ' ';
|
||||
}
|
||||
|
||||
if(b64 == 1 || qp == 1){
|
||||
prev_encoded = 1;
|
||||
need_encoding = 0;
|
||||
ret = ERR;
|
||||
|
||||
if(encoding[0] && strcasecmp(encoding, "utf-8")){
|
||||
need_encoding = 1;
|
||||
ret = utf8_encode(u, strlen(u), &tmpbuf[0], sizeof(tmpbuf), encoding);
|
||||
}
|
||||
}
|
||||
|
||||
// ... if it is, then get the encoding
|
||||
if(s){
|
||||
e = strchr(u, '?');
|
||||
if(e){
|
||||
*e = '\0';
|
||||
snprintf(encoding, sizeof(encoding)-1, "%s", u);
|
||||
*e = '?';
|
||||
|
||||
need_encoding = 0;
|
||||
ret = ERR;
|
||||
|
||||
if(encoding[0] && strcasecmp(encoding, "utf-8")){
|
||||
need_encoding = 1;
|
||||
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
|
||||
}
|
||||
|
||||
if(need_encoding == 1 && ret == OK)
|
||||
strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
|
||||
else
|
||||
strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
|
||||
if(need_encoding == 1 && ret == OK){
|
||||
strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
|
||||
}
|
||||
else {
|
||||
memset(encoding, 0, sizeof(encoding));
|
||||
strncat(puf, u, sizeof(puf)-strlen(puf)-1);
|
||||
}
|
||||
}
|
||||
@ -417,8 +435,6 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
|
||||
} while(p);
|
||||
|
||||
if(q && encoding[0] == 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
|
||||
|
||||
} while(q);
|
||||
|
||||
snprintf(buf, buflen-1, "%s", puf);
|
||||
|
@ -125,7 +125,7 @@ static void test_fixupEncodedHeaderLine(){
|
||||
{"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"},
|
||||
{"=?iso-8859-2?Q?RE:_test.aaa.fu_z=F3na?=", "RE: test.aaa.fu zóna"},
|
||||
{"=?iso-8859-2?Q?V=E1ltoz=E1s_az_IT_szervezetben_/_Personal_changes_in_the_?=", "Változás az IT szervezetben / Personal changes in the "},
|
||||
{"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"},
|
||||
{"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"},
|
||||
{"=?UTF-8?Q?[JIRA]_Created:_(HUDSS-196)_T=C5=B1zfal_?=", "[JIRA] Created: (HUDSS-196) Tűzfal "},
|
||||
{"=?iso-8859-2?Q?RE:_Baptista_Szeretetszolg=E1lat?=", "RE: Baptista Szeretetszolgálat"},
|
||||
{"=?iso-8859-2?B?SXR0IGF6IE1OQiBuYWd5IGRvYuFzYTogaXNt6XQgYmVsZW55+mxuYWsgYSBoaXRlbGV66XNiZSAoMjAxNS4xMS4wMy4gLSBzakBhY3RzLmh1KQ==?=", "Itt az MNB nagy dobása: ismét belenyúlnak a hitelezésbe (2015.11.03. - sj@acts.hu)"},
|
||||
@ -139,7 +139,7 @@ static void test_fixupEncodedHeaderLine(){
|
||||
{"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"},
|
||||
{"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"},
|
||||
{"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"},
|
||||
{"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."},
|
||||
{"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."},
|
||||
{"1gy2tt. V3l4d. M5sk6nt", "1gy2tt. V3l4d. M5sk6nt"},
|
||||
{"=?iso-8859-2?B?03Jp4XNpIG1lZ2xlcGV06XMsIG5pbmNzIHT2YmIgbWVudHPpZyBBbWVyaWthIHN64W3hcmEgKDIwMTUuMTEuMDYuIC0gc2pAYWN0cy5odSk=?=", "Óriási meglepetés, nincs több mentség Amerika számára (2015.11.06. - sj@acts.hu)"},
|
||||
{"=?utf-8?B?Rlc6IEVtYWlsIGZvZ2Fkw6FzaSBoaWJh?=", "FW: Email fogadási hiba"},
|
||||
@ -154,6 +154,8 @@ static void test_fixupEncodedHeaderLine(){
|
||||
{"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"},
|
||||
{"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"},
|
||||
{"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"},
|
||||
{"Subject: =?UTF-8?Q?=E2=98=85_JubiDu!Versandkost?= =?UTF-8?Q?enfrei-Verl=C3=A4ngerung!=E2=98=85?=", "Subject: ★ JubiDu!Versandkostenfrei-Verlängerung!★"},
|
||||
{"Happy New Year! =?utf-8?q?=F0=9F=8E=86?=", "Happy New Year! 🎆"},
|
||||
};
|
||||
|
||||
TEST_HEADER();
|
||||
|
@ -5,8 +5,10 @@ set -o pipefail
|
||||
set -o nounset
|
||||
set -x
|
||||
|
||||
LD_LIBRARY_PATH=../src ./check_parser_utils
|
||||
LD_LIBRARY_PATH=../src ./check_parser
|
||||
LD_LIBRARY_PATH=../src ./check_rules
|
||||
LD_LIBRARY_PATH=../src ./check_digest
|
||||
LD_LIBRARY_PATH=../src ./check_mydomains
|
||||
export LD_LIBRARY_PATH=../src
|
||||
|
||||
./check_parser_utils
|
||||
./check_parser
|
||||
./check_rules
|
||||
./check_digest
|
||||
./check_mydomains
|
||||
|
Loading…
Reference in New Issue
Block a user