src: decoder and parser fix

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2017-11-08 11:50:28 +01:00
parent 25aeff66ba
commit 1192fc3218
4 changed files with 134 additions and 95 deletions

View File

@ -78,6 +78,8 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
* U+0000..U+007F 00..7F * U+0000..U+007F 00..7F
* U+0080..U+07FF C2..DF 80..BF * U+0080..U+07FF C2..DF 80..BF
* U+0800..U+0FFF E0 A0..BF 80..BF * U+0800..U+0FFF E0 A0..BF 80..BF
*
* FIXME: See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G7404 for valid sequences
*/ */
if(c <= 0x7F){ if(c <= 0x7F){

View File

@ -196,6 +196,7 @@ time_t parse_date_header(char *datestr){
else if(strncasecmp(s, "Sat", 3) == 0) tm.tm_wday = 6; else if(strncasecmp(s, "Sat", 3) == 0) tm.tm_wday = 6;
else if(strncasecmp(s, "Sun", 3) == 0) tm.tm_wday = 0; else if(strncasecmp(s, "Sun", 3) == 0) tm.tm_wday = 0;
if(len <= 2 && tm.tm_mday == 0){ tm.tm_mday = atoi(s); continue; } if(len <= 2 && tm.tm_mday == 0){ tm.tm_mday = atoi(s); continue; }
if(len <= 2 && tm.tm_mon == -1){ tm.tm_mon = atoi(s) - 1; continue; } if(len <= 2 && tm.tm_mon == -1){ tm.tm_mon = atoi(s) - 1; continue; }
@ -313,17 +314,19 @@ int extract_boundary(char *p, struct parser_state *state){
void fixupEncodedHeaderLine(char *buf, int buflen){ void fixupEncodedHeaderLine(char *buf, int buflen){
char *sb, *sq, *p, *q, *r, *s, *e, *start, *end; char *p, *q, *r, *s, *e, *end;
/* /*
* I thought SMALLBUFSIZE would be enough for v, encoding and tmpbuf(2*), * I thought SMALLBUFSIZE would be enough for v, encoding and tmpbuf(2*),
* but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE * but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE
*/ */
char v[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE]; char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE];
int need_encoding, ret; int need_encoding, ret;
if(buflen < 5) return; if(buflen < 5) return;
memset(puf, 0, sizeof(puf)); memset(puf, 0, sizeof(puf));
memset(encoding, 0, sizeof(encoding));
q = buf; q = buf;
@ -332,69 +335,89 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
p = v; p = v;
memset(encoding, 0, sizeof(encoding));
do { do {
start = strstr(p, "=?"); memset(u, 0, sizeof(u));
if(start){
*start = '\0'; /*
if(strlen(p) > 0){ * We can't use split_str(p, "=?", ...) it will fail with the following pattern
strncat(puf, p, sizeof(puf)-strlen(puf)-1); * =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=
*
* Also the below patter requires special care:
* =?gb2312?B?<something>?==?gb2312?Q?<something else>?=
*/
r = strstr(p, "=?");
if(r){
p = r + 2;
end = strstr(p, "?=");
if(end){
*end = '\0';
} }
start++; snprintf(u, sizeof(u)-1, "%s", p);
e = strchr(start+2, '?'); if(end) {
if(e){ p = end + 2;
*e = '\0';
snprintf(encoding, sizeof(encoding)-1, "%s", start+1);
*e = '?';
}
s = NULL;
sb = strcasestr(start, "?B?"); if(sb) s = sb;
sq = strcasestr(start, "?Q?"); if(sq) s = sq;
if(s){
end = strstr(s+3, "?=");
if(end){
*end = '\0';
if(sb){ decodeBase64(s+3); }
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
/* encode everything if it's not utf-8 encoded */
need_encoding = 0;
ret = ERR;
if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
need_encoding = 1;
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
}
if(need_encoding == 1 && ret == OK)
strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
else
strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
p = end + 2;
}
}
else {
strncat(puf, start, sizeof(puf)-strlen(puf)-1);
break;
} }
} }
else { else {
strncat(puf, p, sizeof(puf)-strlen(puf)-1); snprintf(u, sizeof(u)-1, "%s", p);
break; p = NULL;
}
if(u[0] == 0) continue;
memset(encoding, 0, sizeof(encoding));
// Check if it's either ?B? or ?Q? encoding ...
s = strcasestr(u, "?B?");
if(s){
decodeBase64(s+3);
}
else {
s = strcasestr(u, "?Q?");
if(s){
decodeQP(s+3);
r = s + 3;
for(; *r; r++){
if(*r == '_') *r = ' ';
}
}
}
// ... if it is, then get the encoding
if(s){
e = strchr(u, '?');
if(e){
*e = '\0';
snprintf(encoding, sizeof(encoding)-1, "%s", u);
*e = '?';
need_encoding = 0;
ret = ERR;
if(encoding[0] && strcasecmp(encoding, "utf-8")){
need_encoding = 1;
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
}
if(need_encoding == 1 && ret == OK)
strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
else
strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
}
else {
memset(encoding, 0, sizeof(encoding));
strncat(puf, u, sizeof(puf)-strlen(puf)-1);
}
}
else {
strncat(puf, u, sizeof(puf)-strlen(puf)-1);
} }
} while(p); } while(p);
if(q) strncat(puf, " ", sizeof(puf)-strlen(puf)-1); if(q && encoding[0] == 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
} while(q); } while(q);
@ -599,6 +622,7 @@ void translateLine(unsigned char *p, struct parser_state *state){
prev = *p; prev = *p;
} }
if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; } if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; }
if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; } if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; }
@ -658,8 +682,7 @@ int does_it_seem_like_an_email_address(char *email){
*/ */
void reassembleToken(char *p){ void reassembleToken(char *p){
unsigned int i; unsigned int i, k=0;
int k=0;
for(i=0; i<strlen(p); i++){ for(i=0; i<strlen(p); i++){
@ -959,3 +982,4 @@ void fix_plus_sign_in_email_address(char *puf, char **at_sign, unsigned int *len
*at_sign = r; *at_sign = r;
} }
} }

View File

@ -2,12 +2,7 @@
* check_parser_utils.c, SJ * check_parser_utils.c, SJ
*/ */
#include <stdio.h> #include "test.h"
#include <string.h>
#include <locale.h>
#include <stdbool.h>
#include <assert.h>
#include "../src/piler.h"
struct date_test { struct date_test {
@ -29,9 +24,9 @@ struct str_pair {
static void test_parse_date_header(){ static void test_parse_date_header(){
unsigned int i; unsigned int i;
int dst_fix = 0; //time_t t = time(NULL);
time_t t = time(NULL); //int dst_fix = 0;
struct tm lt = {0}; //struct tm lt = {0};
struct config cfg; struct config cfg;
struct date_test date_test[] = { struct date_test date_test[] = {
{"Date: Mon, 02 Nov 2015 09:39:31 -0000", 1446457171}, {"Date: Mon, 02 Nov 2015 09:39:31 -0000", 1446457171},
@ -53,22 +48,22 @@ static void test_parse_date_header(){
setlocale(LC_MESSAGES, cfg.locale); setlocale(LC_MESSAGES, cfg.locale);
setlocale(LC_CTYPE, cfg.locale); setlocale(LC_CTYPE, cfg.locale);
localtime_r(&t, &lt); /*localtime_r(&t, &lt);
if(lt.tm_isdst == 1){ if(lt.tm_isdst == 1){
printf("DST is on\n"); printf("DST is on\n");
dst_fix = 3600; dst_fix = 3600;
} }
else { else {
printf("DST is off\n"); printf("DST is off\n");
} }*/
TEST_HEADER();
for(i=0; i<sizeof(date_test)/sizeof(struct date_test); i++){ for(i=0; i<sizeof(date_test)/sizeof(struct date_test); i++){
printf("%s parsed=%ld, control=%ld\n", date_test[i].date_str, parse_date_header(date_test[i].date_str), date_test[i].timestamp); ASSERT(parse_date_header(date_test[i].date_str) == date_test[i].timestamp, date_test[i].date_str);
assert(parse_date_header(date_test[i].date_str)-dst_fix == date_test[i].timestamp && "test_parse_date_header()");
} }
printf("test_parse_date_header() OK\n"); TEST_FOOTER();
} }
@ -108,13 +103,14 @@ static void test_extractNameFromHeaderLine(){
{"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"} {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"}
}; };
TEST_HEADER();
for(i=0; i<sizeof(name_from_header_test)/sizeof(struct name_from_header_test); i++){ for(i=0; i<sizeof(name_from_header_test)/sizeof(struct name_from_header_test); i++){
extractNameFromHeaderLine(name_from_header_test[i].line, name_from_header_test[i].token, resultbuf); extractNameFromHeaderLine(name_from_header_test[i].line, name_from_header_test[i].token, resultbuf);
assert(strcmp(resultbuf, name_from_header_test[i].expected_result) == 0 && "test_extractNameFromHeaderLine"); ASSERT(strcmp(resultbuf, name_from_header_test[i].expected_result) == 0, name_from_header_test[i].expected_result);
} }
printf("test_extractNameFromHeaderLine() OK\n"); TEST_FOOTER();
} }
@ -123,7 +119,7 @@ static void test_fixupEncodedHeaderLine(){
char buf[SMALLBUFSIZE]; char buf[SMALLBUFSIZE];
struct str_pair pair[] = { struct str_pair pair[] = {
{"=?utf-8?Q?Tanjoubi,_azaz_sz=C3=BClet=C3=A9snap!_10_=C3=A9ves_az_I_Love_Su?= =?utf-8?Q?shi!?=", "Tanjoubi, azaz születésnap! 10 éves az I Love Su shi!"}, {"=?utf-8?Q?Tanjoubi,_azaz_sz=C3=BClet=C3=A9snap!_10_=C3=A9ves_az_I_Love_Su?= =?utf-8?Q?shi!?=", "Tanjoubi, azaz születésnap! 10 éves az I Love Sushi!"},
{"=?UTF-8?Q?IAM:_N2YPF_-_#1_Request_new_privilege?=", "IAM: N2YPF - #1 Request new privilege"}, {"=?UTF-8?Q?IAM:_N2YPF_-_#1_Request_new_privilege?=", "IAM: N2YPF - #1 Request new privilege"},
{"=?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=", "Hogyan írjunk panaszlevelet?"}, {"=?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=", "Hogyan írjunk panaszlevelet?"},
{"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"}, {"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"},
@ -140,7 +136,7 @@ static void test_fixupEncodedHeaderLine(){
{"Re: cccc@aaa.fu - e-mail =?UTF-8?B?a8OpcmTDqXM=?=", "Re: cccc@aaa.fu - e-mail kérdés"}, {"Re: cccc@aaa.fu - e-mail =?UTF-8?B?a8OpcmTDqXM=?=", "Re: cccc@aaa.fu - e-mail kérdés"},
{"=?WINDOWS-1250?Q?<AZ-17226/1-2015>=20www.xxxxx.com=20new=20virtual=20?=", "<AZ-17226/1-2015> www.xxxxx.com new virtual "}, {"=?WINDOWS-1250?Q?<AZ-17226/1-2015>=20www.xxxxx.com=20new=20virtual=20?=", "<AZ-17226/1-2015> www.xxxxx.com new virtual "},
{"Re: FW: =?ISO-8859-2?Q?Sopron-Gy=F5r_optikai_sz=E1l_probl=E9?=", "Re: FW: Sopron-Győr optikai szál problé"}, {"Re: FW: =?ISO-8859-2?Q?Sopron-Gy=F5r_optikai_sz=E1l_probl=E9?=", "Re: FW: Sopron-Győr optikai szál problé"},
{"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Inv itation to the Pulse Survey"}, {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"},
{"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"}, {"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"},
{"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"}, {"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"},
{"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."},
@ -157,20 +153,20 @@ static void test_fixupEncodedHeaderLine(){
{"Subject: =?UTF-8?Q?Experience=20a=20Crazy=20Reward=20Delivered=20to=20you?=", "Subject: Experience a Crazy Reward Delivered to you"}, {"Subject: =?UTF-8?Q?Experience=20a=20Crazy=20Reward=20Delivered=20to=20you?=", "Subject: Experience a Crazy Reward Delivered to you"},
{"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"}, {"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"},
{"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"}, {"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"},
{"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"},
}; };
TEST_HEADER();
for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){ for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
snprintf(buf, sizeof(buf)-1, "%s", pair[i].line); snprintf(buf, sizeof(buf)-1, "%s", pair[i].line);
fixupEncodedHeaderLine(buf, sizeof(buf)-1); fixupEncodedHeaderLine(buf, sizeof(buf)-1);
assert(strcmp(buf, pair[i].expected_result) == 0 && "test_fixupEncodedHeaderLine"); ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
//printf(" {\"%s\", \"%s\"},\n", pair[i].line, buf);
} }
printf("test_fixupEncodedHeaderLine() OK\n"); TEST_FOOTER();
} }
@ -191,6 +187,7 @@ static void test_translateLine(){
*/ */
}; };
TEST_HEADER();
for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){ for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@ -202,12 +199,10 @@ static void test_translateLine(){
translateLine((unsigned char*)buf, &state); translateLine((unsigned char*)buf, &state);
//printf(" {\"%s\", \"%s\"},\n", pair[i].line, buf); ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
assert(strcmp(buf, pair[i].expected_result) == 0 && "test_translateLine");
} }
printf("test_translateLine() OK\n"); TEST_FOOTER();
} }
@ -223,6 +218,7 @@ static void test_fixURL(){
{"https://www.aaa.fu/", "__URL__wwwXaaaXfu "} {"https://www.aaa.fu/", "__URL__wwwXaaaXfu "}
}; };
TEST_HEADER();
for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){ for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@ -230,14 +226,10 @@ static void test_fixURL(){
fixURL(buf, sizeof(buf)-1); fixURL(buf, sizeof(buf)-1);
//printf(" {\"%s\", \"%s\"},\n", pair[i].line, buf); ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
assert(strcmp(buf, pair[i].expected_result) == 0 && "test_fixURL");
} }
printf("test_fixURL() OK\n"); TEST_FOOTER();
} }
@ -257,6 +249,7 @@ static void test_degenerateToken(){
{"Hello...", "Hello"} {"Hello...", "Hello"}
}; };
TEST_HEADER();
for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){ for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@ -264,14 +257,10 @@ static void test_degenerateToken(){
degenerateToken((unsigned char*)buf); degenerateToken((unsigned char*)buf);
//printf(" {\"%s\", \"%s\"},\n", pair[i].line, buf); ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
assert(strcmp(buf, pair[i].expected_result) == 0 && "test_degenerateToken");
} }
printf("test_degenerateToken() OK\n"); TEST_FOOTER();
} }

24
unit_tests/test.h Normal file
View File

@ -0,0 +1,24 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <locale.h>
#include <getopt.h>
#include <stdbool.h>
#include <assert.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#include "../src/piler.h"
#define ASSERT(expr, value) if (!(expr)) { printf("assert failed: '%s'\n", value); abort(); } else { printf("."); }
#define TEST_HEADER() printf("%s() ", __func__);
#define TEST_FOOTER() printf(" OK\n");