src: decoder and parser fix

Signed-off-by: Janos SUTO <sj@acts.hu>
2026-06-12 23:48:24 +02:00 · 2017-11-08 11:50:28 +01:00
parent 25aeff66ba
commit 1192fc3218
4 changed files with 134 additions and 95 deletions
@@ -78,6 +78,8 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
       * U+0000..U+007F      00..7F
       * U+0080..U+07FF      C2..DF      80..BF
       * U+0800..U+0FFF      E0          A0..BF      80..BF
       *
       * FIXME: See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G7404 for valid sequences
       */
      if(c <= 0x7F){
@@ -196,6 +196,7 @@ time_t parse_date_header(char *datestr){
         else if(strncasecmp(s, "Sat", 3) == 0) tm.tm_wday = 6;
         else if(strncasecmp(s, "Sun", 3) == 0) tm.tm_wday = 0;
         if(len <= 2 && tm.tm_mday == 0){ tm.tm_mday = atoi(s); continue; }
         if(len <= 2 && tm.tm_mon == -1){ tm.tm_mon = atoi(s) - 1; continue; }
@@ -313,17 +314,19 @@ int extract_boundary(char *p, struct parser_state *state){
 void fixupEncodedHeaderLine(char *buf, int buflen){
-   char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
+   char *p, *q, *r, *s, *e, *end;
   /*
    * I thought SMALLBUFSIZE would be enough for v, encoding and tmpbuf(2*),
    * but then I saw a 6-7000 byte long subject line, so I've switched to MAXBUFSIZE
    */
-   char v[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE];
+   char v[MAXBUFSIZE], u[MAXBUFSIZE], puf[MAXBUFSIZE], encoding[MAXBUFSIZE], tmpbuf[2*MAXBUFSIZE];
   int need_encoding, ret;
   if(buflen < 5) return;
   memset(puf, 0, sizeof(puf));
   memset(encoding, 0, sizeof(encoding));
   q = buf;
@@ -332,69 +335,89 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
      p = v;
      memset(encoding, 0, sizeof(encoding));
      do {
-         start = strstr(p, "=?");
+         memset(u, 0, sizeof(u));
-         if(start){
+
-            *start = '\0';
+         /*
-            if(strlen(p) > 0){
+          * We can't use split_str(p, "=?", ...) it will fail with the following pattern
-               strncat(puf, p, sizeof(puf)-strlen(puf)-1);
+          *    =?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=
          *
          * Also the below patter requires special care:
          *    =?gb2312?B?<something>?==?gb2312?Q?<something else>?=
          */
         r = strstr(p, "=?");
         if(r){
            p = r + 2;
            end = strstr(p, "?=");
            if(end){
               *end = '\0';
            }
-            start++;
+            snprintf(u, sizeof(u)-1, "%s", p);
-            e = strchr(start+2, '?');
+            if(end) {
-            if(e){
+               p = end + 2;
               *e = '\0';
               snprintf(encoding, sizeof(encoding)-1, "%s", start+1);
               *e = '?';
            }
            s = NULL;
            sb = strcasestr(start, "?B?"); if(sb) s = sb;
            sq = strcasestr(start, "?Q?"); if(sq) s = sq;
            if(s){
               end = strstr(s+3, "?=");
               if(end){
                  *end = '\0';
                  if(sb){ decodeBase64(s+3); }
                  if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
                  /* encode everything if it's not utf-8 encoded */
                  need_encoding = 0;
                  ret = ERR;
                  if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
                     need_encoding = 1;
                     ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
                  }
                  if(need_encoding == 1 && ret == OK)
                     strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
                  else 
                     strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
                  p = end + 2;
               }
            }
            else {
               strncat(puf, start, sizeof(puf)-strlen(puf)-1);
               break;
            }
         }
         else {
-            strncat(puf, p, sizeof(puf)-strlen(puf)-1);
+            snprintf(u, sizeof(u)-1, "%s", p);
-            break;
+            p = NULL;
         }
         if(u[0] == 0) continue;
         memset(encoding, 0, sizeof(encoding));
         // Check if it's either ?B? or ?Q? encoding ...
         s = strcasestr(u, "?B?");
         if(s){
            decodeBase64(s+3);
         }
         else {
            s = strcasestr(u, "?Q?");
            if(s){
               decodeQP(s+3);
               r = s + 3;
               for(; *r; r++){
                  if(*r == '_') *r = ' ';
               }
            }
         }
         // ... if it is, then get the encoding
         if(s){
            e = strchr(u, '?');
            if(e){
               *e = '\0';
               snprintf(encoding, sizeof(encoding)-1, "%s", u);
               *e = '?';
               need_encoding = 0;
               ret = ERR;
               if(encoding[0] && strcasecmp(encoding, "utf-8")){
                  need_encoding = 1;
                  ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
               }
               if(need_encoding == 1 && ret == OK)
                  strncat(puf, tmpbuf, sizeof(puf)-strlen(puf)-1);
               else
                  strncat(puf, s+3, sizeof(puf)-strlen(puf)-1);
            }
            else {
               memset(encoding, 0, sizeof(encoding));
               strncat(puf, u, sizeof(puf)-strlen(puf)-1);
            }
         }
         else {
            strncat(puf, u, sizeof(puf)-strlen(puf)-1);
         }
      } while(p);
-      if(q) strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
+      if(q && encoding[0] == 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1);
   } while(q);
@@ -599,6 +622,7 @@ void translateLine(unsigned char *p, struct parser_state *state){
         prev = *p;
      }
      if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; }
      if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; }
@@ -658,8 +682,7 @@ int does_it_seem_like_an_email_address(char *email){
 */
 void reassembleToken(char *p){
-   unsigned int i;
+   unsigned int i, k=0;
   int k=0;
   for(i=0; i<strlen(p); i++){
@@ -959,3 +982,4 @@ void fix_plus_sign_in_email_address(char *puf, char **at_sign, unsigned int *len
      *at_sign = r;
   }
 }
@@ -2,12 +2,7 @@
 * check_parser_utils.c, SJ
 */
-#include <stdio.h>
+#include "test.h"
 #include <string.h>
 #include <locale.h>
 #include <stdbool.h>
 #include <assert.h>
 #include "../src/piler.h"
 struct date_test {
@@ -29,9 +24,9 @@ struct str_pair {
 static void test_parse_date_header(){
   unsigned int i;
-   int dst_fix = 0;
+   //time_t t = time(NULL);
-   time_t t = time(NULL);
+   //int dst_fix = 0;
-   struct tm lt = {0};
+   //struct tm lt = {0};
   struct config cfg;
   struct date_test date_test[] = {
      {"Date: Mon, 02 Nov 2015 09:39:31 -0000", 1446457171},
@@ -53,22 +48,22 @@ static void test_parse_date_header(){
   setlocale(LC_MESSAGES, cfg.locale);
   setlocale(LC_CTYPE, cfg.locale);
-   localtime_r(&t, &lt);
+   /*localtime_r(&t, &lt);
   if(lt.tm_isdst == 1){
      printf("DST is on\n");
      dst_fix = 3600;
   }
   else {
      printf("DST is off\n");
-   }
+   }*/
   TEST_HEADER();
   for(i=0; i<sizeof(date_test)/sizeof(struct date_test); i++){
-      printf("%s parsed=%ld, control=%ld\n", date_test[i].date_str, parse_date_header(date_test[i].date_str), date_test[i].timestamp);
+      ASSERT(parse_date_header(date_test[i].date_str) == date_test[i].timestamp, date_test[i].date_str);
      assert(parse_date_header(date_test[i].date_str)-dst_fix == date_test[i].timestamp && "test_parse_date_header()");
   }
-   printf("test_parse_date_header() OK\n");
+   TEST_FOOTER();
 }
@@ -108,13 +103,14 @@ static void test_extractNameFromHeaderLine(){
      {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"}
   };
   TEST_HEADER();
   for(i=0; i<sizeof(name_from_header_test)/sizeof(struct name_from_header_test); i++){
      extractNameFromHeaderLine(name_from_header_test[i].line, name_from_header_test[i].token, resultbuf);
-      assert(strcmp(resultbuf, name_from_header_test[i].expected_result) == 0 && "test_extractNameFromHeaderLine");
+      ASSERT(strcmp(resultbuf, name_from_header_test[i].expected_result) == 0, name_from_header_test[i].expected_result);
   }
-   printf("test_extractNameFromHeaderLine() OK\n");
+   TEST_FOOTER();
 }
@@ -123,7 +119,7 @@ static void test_fixupEncodedHeaderLine(){
   char buf[SMALLBUFSIZE];
   struct str_pair pair[] = {
-      {"=?utf-8?Q?Tanjoubi,_azaz_sz=C3=BClet=C3=A9snap!_10_=C3=A9ves_az_I_Love_Su?=  =?utf-8?Q?shi!?=", "Tanjoubi, azaz születésnap! 10 éves az I Love Su  shi!"},
+      {"=?utf-8?Q?Tanjoubi,_azaz_sz=C3=BClet=C3=A9snap!_10_=C3=A9ves_az_I_Love_Su?=  =?utf-8?Q?shi!?=", "Tanjoubi, azaz születésnap! 10 éves az I Love Sushi!"},
      {"=?UTF-8?Q?IAM:_N2YPF_-_#1_Request_new_privilege?=", "IAM: N2YPF - #1 Request new privilege"},
      {"=?UTF-8?B?SG9neWFuIMOtcmp1bmsgcGFuYXN6bGV2ZWxldD8=?=", "Hogyan írjunk panaszlevelet?"},
      {"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"},
@@ -140,7 +136,7 @@ static void test_fixupEncodedHeaderLine(){
      {"Re: cccc@aaa.fu - e-mail =?UTF-8?B?a8OpcmTDqXM=?=", "Re: cccc@aaa.fu - e-mail kérdés"},
      {"=?WINDOWS-1250?Q?<AZ-17226/1-2015>=20www.xxxxx.com=20new=20virtual=20?=", "<AZ-17226/1-2015> www.xxxxx.com new virtual "},
      {"Re: FW: =?ISO-8859-2?Q?Sopron-Gy=F5r_optikai_sz=E1l_probl=E9?=", "Re: FW: Sopron-Győr optikai szál problé"},
-      {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?=  =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Inv  itation to the Pulse Survey"},
+      {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?=  =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"},
      {"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"},
      {"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA   GTS: 9154521"},
      {"[spam???]  Better Sex. Better Body. Better Life.", "[spam???]  Better Sex. Better Body. Better Life."},
@@ -157,20 +153,20 @@ static void test_fixupEncodedHeaderLine(){
      {"Subject: =?UTF-8?Q?Experience=20a=20Crazy=20Reward=20Delivered=20to=20you?=", "Subject: Experience a Crazy Reward Delivered to you"},
      {"Subject: =?windows-1251?B?ze7i7uPu5O3o5SDv7uTg8OroIOTr/yDC4Pjo?=", "Subject: Новогодние подарки для Ваши"},
      {"Subject: =?utf-8?Q?Divatos,_=C3=BCde_sz=C3=ADneinek_k=C3=B6sz=C3=B6nhet=C5=91en_el?=", "Subject: Divatos, üde színeinek köszönhetően el"},
      {"=?gb2312?B?yc/Gz76pIC0gw7/fTMir0bKy6YjzuOYgKDIwMTcxMDMwLTMxKSBHQlcgUG9k?==?gb2312?Q?ium_&_Basement.docx?=", "上葡京 - 每週全巡查報告 (20171030-31) GBW Podium & Basement.docx"},
   };
   TEST_HEADER();
   for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
      snprintf(buf, sizeof(buf)-1, "%s", pair[i].line);
      fixupEncodedHeaderLine(buf, sizeof(buf)-1);
-      assert(strcmp(buf, pair[i].expected_result) == 0 && "test_fixupEncodedHeaderLine");
+      ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
      //printf("      {\"%s\", \"%s\"},\n", pair[i].line, buf);
   }
-   printf("test_fixupEncodedHeaderLine() OK\n");
+   TEST_FOOTER();
 }
@@ -191,6 +187,7 @@ static void test_translateLine(){
       */
   };
   TEST_HEADER();
   for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@@ -202,12 +199,10 @@ static void test_translateLine(){
      translateLine((unsigned char*)buf, &state);
-      //printf("      {\"%s\", \"%s\"},\n", pair[i].line, buf);
+      ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
      assert(strcmp(buf, pair[i].expected_result) == 0 && "test_translateLine");
   }
-   printf("test_translateLine() OK\n");
+   TEST_FOOTER();
 }
@@ -223,6 +218,7 @@ static void test_fixURL(){
      {"https://www.aaa.fu/", "__URL__wwwXaaaXfu "}
   };
   TEST_HEADER();
   for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@@ -230,14 +226,10 @@ static void test_fixURL(){
      fixURL(buf, sizeof(buf)-1);
-      //printf("      {\"%s\", \"%s\"},\n", pair[i].line, buf);
+      ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
      assert(strcmp(buf, pair[i].expected_result) == 0 && "test_fixURL");
   }
-   printf("test_fixURL() OK\n");
+   TEST_FOOTER();
 }
@@ -257,6 +249,7 @@ static void test_degenerateToken(){
      {"Hello...", "Hello"}
   };
   TEST_HEADER();
   for(i=0; i<sizeof(pair)/sizeof(struct str_pair); i++){
@@ -264,14 +257,10 @@ static void test_degenerateToken(){
      degenerateToken((unsigned char*)buf);
-      //printf("      {\"%s\", \"%s\"},\n", pair[i].line, buf);
+      ASSERT(strcmp(buf, pair[i].expected_result) == 0, pair[i].expected_result);
      assert(strcmp(buf, pair[i].expected_result) == 0 && "test_degenerateToken");
   }
-   printf("test_degenerateToken() OK\n");
+   TEST_FOOTER();
 }
@@ -0,0 +1,24 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <netdb.h>
 #include <arpa/inet.h>
 #include <netinet/in.h>
 #include <fcntl.h>
 #include <locale.h>
 #include <getopt.h>
 #include <stdbool.h>
 #include <assert.h>
 #include <openssl/ssl.h>
 #include <openssl/err.h>
 #include "../src/piler.h"
 #define ASSERT(expr, value) if (!(expr)) { printf("assert failed: '%s'\n", value); abort(); } else { printf("."); }
 #define TEST_HEADER() printf("%s() ", __func__);
 #define TEST_FOOTER() printf(" OK\n");