decoding fixes

This commit is contained in:
SJ 2014-08-30 21:10:29 +02:00
parent 43bbbfd320
commit 5551df3f9d
8 changed files with 46 additions and 98 deletions

View File

@ -12,9 +12,9 @@
#define PROGNAME "piler"
#define PILERGETD_PROGNAME "pilergetd"
#define VERSION "1.1.0"
#define VERSION "1.1.1"
#define BUILD 884
#define BUILD 885
#define HOSTID "mailarchiver"

View File

@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <iconv.h>
#include "decoder.h"
#include "htmlentities.h"
#include "config.h"
@ -182,7 +183,7 @@ void decodeQP(char *p){
}
void decodeHTML(char *p){
void decodeHTML(char *p, int utf8){
unsigned char buf[MAXBUFSIZE], __u[8];
char *s, *q;
int count=0, len, c;
@ -212,9 +213,16 @@ void decodeHTML(char *p){
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
if(res && res->val <= 255){
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
memcpy(&buf[count], &__u[0], len);
count += len;
if(utf8 == 1){
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
memcpy(&buf[count], &__u[0], len);
count += len;
}
else {
buf[count] = res->val;
count++;
}
}
else {
buf[count] = 'q';
@ -316,37 +324,25 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
}
void utf8_encode(unsigned char *p){
int count=0, len;
unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8];
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding){
iconv_t cd;
size_t size, inbytesleft, outbytesleft;
if(p == NULL || strlen((char *)p) == 0) return;
memset(outbuf, 0, outbuflen);
memset(utf8, 0, MAXBUFSIZE);
u = &utf8[0];
s = p;
cd = iconv_open("utf-8", encoding);
for(; *s; s++){
if(cd != (iconv_t)-1){
inbytesleft = inbuflen;
outbytesleft = outbuflen-1;
utf8_encode_char(*s, &__u[0], sizeof(__u), &len);
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
/*
* this condition should never happen, as according to the RFCs:
*
* "Each line of characters MUST be no more than 998 characters, and
* SHOULD be no more than 78 characters, excluding the CRLF."
*
*/
iconv_close(cd);
if(count+len > sizeof(utf8)-1) break;
//printf("%s", __u);
memcpy(u+count, &__u[0], len);
count += len;
if(size >= 0) return OK;
}
*(u+count) = '\0'; count++;
memcpy(p, u, count);
return ERR;
}

View File

@ -11,9 +11,9 @@ void sanitiseBase64(char *s);
int decodeBase64(char *p);
int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen);
void decodeQP(char *p);
void decodeHTML(char *p);
void decodeHTML(char *p, int utf8);
void decodeURL(char *p);
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
void utf8_encode(unsigned char *p);
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding);
#endif /* _DECODER_H */

View File

@ -165,6 +165,7 @@ struct _state {
char filename[TINYBUFSIZE];
char type[TINYBUFSIZE];
char charset[TINYBUFSIZE];
char attachment_name_buf[SMALLBUFSIZE];
int anamepos;

View File

@ -1,38 +0,0 @@
struct html_tag {
unsigned char length;
char *entity;
};
#define NUM_OF_SKIP_TAGS2 10
struct html_tag skip_html_tags2[] = {
{ 4, "html" },
{ 5, "/html" },
{ 5, "/body" },
{ 4, "meta" },
{ 4, "head" },
{ 5, "/head" },
{ 5, "style" },
{ 6, "/style" },
{ 3, "div" },
{ 4, "/div" }
};
#define NUM_OF_SKIP_TAGS 11
struct html_tag skip_html_tags[] = {
{ 5, "style" },
{ 4, "dir=" },
{ 8, "content=" },
{ 5, "name=" },
{ 3, "id=" },
{ 2, "v:" },
{ 6, "class=" },
{ 5, "xmlns" },
{ 10, "http-equiv" },
{ 7, "spidmax" },
{ 5, "data=" }
};

View File

@ -548,7 +548,9 @@ int read_from_stdin(struct session_data *sdata){
void strtolower(char *s){
for(; *s; s++) *s = tolower(*s);
for(; *s; s++){
if(*s >= 65 && *s <= 90) *s = tolower(*s);
}
}

View File

@ -173,6 +173,7 @@ void storno_attachment(struct _state *state){
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __data *data, struct __config *cfg){
char *p, *q, puf[SMALLBUFSIZE];
unsigned char b64buffer[MAXBUFSIZE];
char tmpbuf[MAXBUFSIZE];
int n64, len, writelen, boundary_line=0, result;
if(cfg->debug == 1) printf("line: %s", buf);
@ -501,7 +502,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
}
if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1;
if(strcasestr(buf, "charset")) extractNameFromHeaderLine(buf, "charset", state->charset);
if(strcasestr(state->charset, "UTF-8")) state->utf8 = 1;
}
@ -577,6 +579,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
memset(state->filename, 0, TINYBUFSIZE);
memset(state->type, 0, TINYBUFSIZE);
snprintf(state->charset, TINYBUFSIZE-1, "unknown");
memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
state->anamepos = 0;
@ -617,11 +620,13 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
/* I believe that we can live without this function call */
//decodeURL(buf);
if(state->texthtml == 1) decodeHTML(buf);
if(state->texthtml == 1) decodeHTML(buf, state->utf8);
/* encode the body if it's not utf-8 encoded */
if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf);
if(state->message_state == MSG_BODY && state->utf8 != 1){
result = utf8_encode(buf, strlen(buf), &tmpbuf[0], sizeof(tmpbuf), state->charset);
if(result == OK) snprintf(buf, MAXBUFSIZE-1, "%s", tmpbuf);
}
translateLine((unsigned char*)buf, state);

View File

@ -15,10 +15,8 @@
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <iconv.h>
#include <piler.h>
#include "trans.h"
#include "html.h"
void init_state(struct _state *state){
@ -328,10 +326,7 @@ int extract_boundary(char *p, struct _state *state){
void fixupEncodedHeaderLine(char *buf, int buflen){
char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
char v[SMALLBUFSIZE], puf[MAXBUFSIZE], encoding[SMALLBUFSIZE], tmpbuf[2*SMALLBUFSIZE];
iconv_t cd;
size_t size, inbytesleft, outbytesleft;
char *inbuf, *outbuf;
int need_encoding;
int need_encoding, ret;
if(buflen < 5) return;
@ -376,29 +371,16 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
/* encode everything if it's not utf-8 encoded */
//if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3);
//strncat(puf, s+3, sizeof(puf)-1);
size = need_encoding = 0;
need_encoding = 0;
ret = ERR;
if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
need_encoding = 1;
memset(tmpbuf, 0, sizeof(tmpbuf));
cd = iconv_open("utf-8", encoding);
if(cd != (iconv_t)-1){
inbuf = s+3;
outbuf = &tmpbuf[0];
inbytesleft = strlen(s+3);
outbytesleft = sizeof(tmpbuf)-1;
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
iconv_close(cd);
}
else { syslog(LOG_PRIORITY, "unsupported encoding: '%s'", encoding); }
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
}
if(need_encoding == 1 && size >= 0)
if(need_encoding == 1 && ret == OK)
strncat(puf, tmpbuf, sizeof(puf)-1);
else
strncat(puf, s+3, sizeof(puf)-1);