decoding fixes

2025-07-03 10:39:09 +02:00 · 2014-08-30 21:10:29 +02:00
parent 43bbbfd320
commit 5551df3f9d
8 changed files with 46 additions and 98 deletions
--- a/src/config.h
+++ b/src/config.h
@ -12,9 +12,9 @@
 #define PROGNAME "piler"
 #define PILERGETD_PROGNAME "pilergetd"
-#define VERSION "1.1.0"
+#define VERSION "1.1.1"
-#define BUILD 884
+#define BUILD 885
 #define HOSTID "mailarchiver"
--- a/src/decoder.c
+++ b/src/decoder.c
@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <iconv.h>
 #include "decoder.h"
 #include "htmlentities.h"
 #include "config.h"
@ -182,7 +183,7 @@ void decodeQP(char *p){
 }
-void decodeHTML(char *p){
+void decodeHTML(char *p, int utf8){
   unsigned char buf[MAXBUFSIZE], __u[8];
   char *s, *q;
   int count=0, len, c;
@ -212,10 +213,17 @@ void decodeHTML(char *p){
               res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
               if(res && res->val <= 255){
                  if(utf8 == 1){
                     utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
                     memcpy(&buf[count], &__u[0], len);
                     count += len;
                  }
                  else {
                     buf[count] = res->val;
                     count++;
                  }
               }
               else {
                  buf[count] = 'q';
                  count++;
@ -316,37 +324,25 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
 }
-void utf8_encode(unsigned char *p){
+int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding){
-   int count=0, len;
+   iconv_t cd;
-   unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8];
+   size_t size, inbytesleft, outbytesleft;
-   if(p == NULL || strlen((char *)p) == 0) return;
+   memset(outbuf, 0, outbuflen);
-   memset(utf8, 0, MAXBUFSIZE);
+   cd = iconv_open("utf-8", encoding);
   u = &utf8[0];
   s = p;
-   for(; *s; s++){
+   if(cd != (iconv_t)-1){
      inbytesleft = inbuflen;
      outbytesleft = outbuflen-1;
-      utf8_encode_char(*s, &__u[0], sizeof(__u), &len);
+      size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
-      /*
+      iconv_close(cd);
       * this condition should never happen, as according to the RFCs:
       *
       * "Each line of characters MUST be no more than 998 characters, and
       * SHOULD be no more than 78 characters, excluding the CRLF."
       *
       */
-      if(count+len > sizeof(utf8)-1) break;
+      if(size >= 0) return OK;
      //printf("%s", __u);
      memcpy(u+count, &__u[0], len);
      count += len;
   }
-   *(u+count) = '\0'; count++;
+   return ERR;
   memcpy(p, u, count);
 }
--- a/src/decoder.h
+++ b/src/decoder.h
@ -11,9 +11,9 @@ void sanitiseBase64(char *s);
 int decodeBase64(char *p);
 int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen);
 void decodeQP(char *p);
-void decodeHTML(char *p);
+void decodeHTML(char *p, int utf8);
 void decodeURL(char *p);
 inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
-void utf8_encode(unsigned char *p);
+int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding);
 #endif /* _DECODER_H */
--- a/src/defs.h
+++ b/src/defs.h
@ -165,6 +165,7 @@ struct _state {
   char filename[TINYBUFSIZE];
   char type[TINYBUFSIZE];
   char charset[TINYBUFSIZE];
   char attachment_name_buf[SMALLBUFSIZE];
   int anamepos;
--- a/src/html.h
+++ b/src/html.h
@ -1,38 +0,0 @@
 struct html_tag {
   unsigned char length;
   char *entity;
 };
 #define NUM_OF_SKIP_TAGS2 10
 struct html_tag skip_html_tags2[] = {
   { 4, "html" },
   { 5, "/html" },
   { 5, "/body" },
   { 4, "meta" },
   { 4, "head" },
   { 5, "/head" },
   { 5, "style" },
   { 6, "/style" },
   { 3, "div" },
   { 4, "/div" }
 };
 #define NUM_OF_SKIP_TAGS 11
 struct html_tag skip_html_tags[] = {
   { 5, "style" },
   { 4, "dir=" },
   { 8, "content=" },
   { 5, "name=" },
   { 3, "id=" },
   { 2, "v:" },
   { 6, "class=" },
   { 5, "xmlns" },
   { 10, "http-equiv" },
   { 7, "spidmax" },
   { 5, "data=" }
 };
--- a/src/misc.c
+++ b/src/misc.c
@ -548,7 +548,9 @@ int read_from_stdin(struct session_data *sdata){
 void strtolower(char *s){
-   for(; *s; s++) *s = tolower(*s);
+   for(; *s; s++){
      if(*s >= 65 && *s <= 90) *s = tolower(*s);
   }
 }
--- a/src/parser.c
+++ b/src/parser.c
@ -173,6 +173,7 @@ void storno_attachment(struct _state *state){
 int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __data *data, struct __config *cfg){
   char *p, *q, puf[SMALLBUFSIZE];
   unsigned char b64buffer[MAXBUFSIZE];
   char tmpbuf[MAXBUFSIZE];
   int n64, len, writelen, boundary_line=0, result;
   if(cfg->debug == 1) printf("line: %s", buf);
@ -501,7 +502,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
      }
-      if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1;
+      if(strcasestr(buf, "charset")) extractNameFromHeaderLine(buf, "charset", state->charset);
      if(strcasestr(state->charset, "UTF-8")) state->utf8 = 1;
   }
@ -577,6 +579,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
      memset(state->filename, 0, TINYBUFSIZE);
      memset(state->type, 0, TINYBUFSIZE);
      snprintf(state->charset, TINYBUFSIZE-1, "unknown");
      memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
      state->anamepos = 0;
@ -617,11 +620,13 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
   /* I believe that we can live without this function call */
   //decodeURL(buf);
-   if(state->texthtml == 1) decodeHTML(buf);
+   if(state->texthtml == 1) decodeHTML(buf, state->utf8);
   /* encode the body if it's not utf-8 encoded */
-   if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf);
+   if(state->message_state == MSG_BODY && state->utf8 != 1){
-
+      result = utf8_encode(buf, strlen(buf), &tmpbuf[0], sizeof(tmpbuf), state->charset);
      if(result == OK) snprintf(buf, MAXBUFSIZE-1, "%s", tmpbuf);
   }
   translateLine((unsigned char*)buf, state);
--- a/src/parser_utils.c
+++ b/src/parser_utils.c
@ -15,10 +15,8 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <time.h>
 #include <iconv.h>
 #include <piler.h>
 #include "trans.h"
 #include "html.h"
 void init_state(struct _state *state){
@ -328,10 +326,7 @@ int extract_boundary(char *p, struct _state *state){
 void fixupEncodedHeaderLine(char *buf, int buflen){
   char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
   char v[SMALLBUFSIZE], puf[MAXBUFSIZE], encoding[SMALLBUFSIZE], tmpbuf[2*SMALLBUFSIZE];
-   iconv_t cd;
+   int need_encoding, ret;
   size_t size, inbytesleft, outbytesleft;
   char *inbuf, *outbuf;
   int need_encoding;
   if(buflen < 5) return;
@ -376,29 +371,16 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
                  if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
                  /* encode everything if it's not utf-8 encoded */
                  //if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3);
                  //strncat(puf, s+3, sizeof(puf)-1);
-                  size = need_encoding = 0;
+                  need_encoding = 0;
                  ret = ERR;
                  if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
                     need_encoding = 1;
-                     memset(tmpbuf, 0, sizeof(tmpbuf));
+                     ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
                     cd = iconv_open("utf-8", encoding);
                     if(cd != (iconv_t)-1){
                        inbuf = s+3;
                        outbuf = &tmpbuf[0];
                        inbytesleft = strlen(s+3);
                        outbytesleft = sizeof(tmpbuf)-1;
                        size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
                        iconv_close(cd);
                     }
                     else { syslog(LOG_PRIORITY, "unsupported encoding: '%s'", encoding); }
                  }
-                  if(need_encoding == 1 && size >= 0)
+                  if(need_encoding == 1 && ret == OK)
                     strncat(puf, tmpbuf, sizeof(puf)-1);
                  else 
                     strncat(puf, s+3, sizeof(puf)-1);