mirror of
https://bitbucket.org/jsuto/piler.git
synced 2025-01-24 22:09:59 +01:00
decoding fixes
This commit is contained in:
parent
43bbbfd320
commit
5551df3f9d
@ -12,9 +12,9 @@
|
||||
#define PROGNAME "piler"
|
||||
#define PILERGETD_PROGNAME "pilergetd"
|
||||
|
||||
#define VERSION "1.1.0"
|
||||
#define VERSION "1.1.1"
|
||||
|
||||
#define BUILD 884
|
||||
#define BUILD 885
|
||||
|
||||
#define HOSTID "mailarchiver"
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <iconv.h>
|
||||
#include "decoder.h"
|
||||
#include "htmlentities.h"
|
||||
#include "config.h"
|
||||
@ -182,7 +183,7 @@ void decodeQP(char *p){
|
||||
}
|
||||
|
||||
|
||||
void decodeHTML(char *p){
|
||||
void decodeHTML(char *p, int utf8){
|
||||
unsigned char buf[MAXBUFSIZE], __u[8];
|
||||
char *s, *q;
|
||||
int count=0, len, c;
|
||||
@ -212,9 +213,16 @@ void decodeHTML(char *p){
|
||||
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
|
||||
|
||||
if(res && res->val <= 255){
|
||||
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
|
||||
memcpy(&buf[count], &__u[0], len);
|
||||
count += len;
|
||||
|
||||
if(utf8 == 1){
|
||||
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
|
||||
memcpy(&buf[count], &__u[0], len);
|
||||
count += len;
|
||||
}
|
||||
else {
|
||||
buf[count] = res->val;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf[count] = 'q';
|
||||
@ -316,37 +324,25 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
|
||||
}
|
||||
|
||||
|
||||
void utf8_encode(unsigned char *p){
|
||||
int count=0, len;
|
||||
unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8];
|
||||
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding){
|
||||
iconv_t cd;
|
||||
size_t size, inbytesleft, outbytesleft;
|
||||
|
||||
if(p == NULL || strlen((char *)p) == 0) return;
|
||||
memset(outbuf, 0, outbuflen);
|
||||
|
||||
memset(utf8, 0, MAXBUFSIZE);
|
||||
u = &utf8[0];
|
||||
s = p;
|
||||
cd = iconv_open("utf-8", encoding);
|
||||
|
||||
for(; *s; s++){
|
||||
if(cd != (iconv_t)-1){
|
||||
inbytesleft = inbuflen;
|
||||
outbytesleft = outbuflen-1;
|
||||
|
||||
utf8_encode_char(*s, &__u[0], sizeof(__u), &len);
|
||||
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
|
||||
/*
|
||||
* this condition should never happen, as according to the RFCs:
|
||||
*
|
||||
* "Each line of characters MUST be no more than 998 characters, and
|
||||
* SHOULD be no more than 78 characters, excluding the CRLF."
|
||||
*
|
||||
*/
|
||||
iconv_close(cd);
|
||||
|
||||
if(count+len > sizeof(utf8)-1) break;
|
||||
|
||||
//printf("%s", __u);
|
||||
memcpy(u+count, &__u[0], len);
|
||||
|
||||
count += len;
|
||||
if(size >= 0) return OK;
|
||||
}
|
||||
|
||||
*(u+count) = '\0'; count++;
|
||||
memcpy(p, u, count);
|
||||
return ERR;
|
||||
}
|
||||
|
||||
|
@ -11,9 +11,9 @@ void sanitiseBase64(char *s);
|
||||
int decodeBase64(char *p);
|
||||
int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen);
|
||||
void decodeQP(char *p);
|
||||
void decodeHTML(char *p);
|
||||
void decodeHTML(char *p, int utf8);
|
||||
void decodeURL(char *p);
|
||||
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
|
||||
void utf8_encode(unsigned char *p);
|
||||
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding);
|
||||
|
||||
#endif /* _DECODER_H */
|
||||
|
@ -165,6 +165,7 @@ struct _state {
|
||||
|
||||
char filename[TINYBUFSIZE];
|
||||
char type[TINYBUFSIZE];
|
||||
char charset[TINYBUFSIZE];
|
||||
|
||||
char attachment_name_buf[SMALLBUFSIZE];
|
||||
int anamepos;
|
||||
|
38
src/html.h
38
src/html.h
@ -1,38 +0,0 @@
|
||||
|
||||
struct html_tag {
|
||||
unsigned char length;
|
||||
char *entity;
|
||||
};
|
||||
|
||||
#define NUM_OF_SKIP_TAGS2 10
|
||||
|
||||
struct html_tag skip_html_tags2[] = {
|
||||
{ 4, "html" },
|
||||
{ 5, "/html" },
|
||||
{ 5, "/body" },
|
||||
{ 4, "meta" },
|
||||
{ 4, "head" },
|
||||
{ 5, "/head" },
|
||||
{ 5, "style" },
|
||||
{ 6, "/style" },
|
||||
{ 3, "div" },
|
||||
{ 4, "/div" }
|
||||
};
|
||||
|
||||
|
||||
#define NUM_OF_SKIP_TAGS 11
|
||||
|
||||
struct html_tag skip_html_tags[] = {
|
||||
{ 5, "style" },
|
||||
{ 4, "dir=" },
|
||||
{ 8, "content=" },
|
||||
{ 5, "name=" },
|
||||
{ 3, "id=" },
|
||||
{ 2, "v:" },
|
||||
{ 6, "class=" },
|
||||
{ 5, "xmlns" },
|
||||
{ 10, "http-equiv" },
|
||||
{ 7, "spidmax" },
|
||||
{ 5, "data=" }
|
||||
};
|
||||
|
@ -548,7 +548,9 @@ int read_from_stdin(struct session_data *sdata){
|
||||
|
||||
|
||||
void strtolower(char *s){
|
||||
for(; *s; s++) *s = tolower(*s);
|
||||
for(; *s; s++){
|
||||
if(*s >= 65 && *s <= 90) *s = tolower(*s);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
13
src/parser.c
13
src/parser.c
@ -173,6 +173,7 @@ void storno_attachment(struct _state *state){
|
||||
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __data *data, struct __config *cfg){
|
||||
char *p, *q, puf[SMALLBUFSIZE];
|
||||
unsigned char b64buffer[MAXBUFSIZE];
|
||||
char tmpbuf[MAXBUFSIZE];
|
||||
int n64, len, writelen, boundary_line=0, result;
|
||||
|
||||
if(cfg->debug == 1) printf("line: %s", buf);
|
||||
@ -501,7 +502,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
||||
}
|
||||
|
||||
|
||||
if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1;
|
||||
if(strcasestr(buf, "charset")) extractNameFromHeaderLine(buf, "charset", state->charset);
|
||||
if(strcasestr(state->charset, "UTF-8")) state->utf8 = 1;
|
||||
}
|
||||
|
||||
|
||||
@ -577,6 +579,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
||||
|
||||
memset(state->filename, 0, TINYBUFSIZE);
|
||||
memset(state->type, 0, TINYBUFSIZE);
|
||||
snprintf(state->charset, TINYBUFSIZE-1, "unknown");
|
||||
|
||||
memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
|
||||
state->anamepos = 0;
|
||||
@ -617,11 +620,13 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
||||
/* I believe that we can live without this function call */
|
||||
//decodeURL(buf);
|
||||
|
||||
if(state->texthtml == 1) decodeHTML(buf);
|
||||
if(state->texthtml == 1) decodeHTML(buf, state->utf8);
|
||||
|
||||
/* encode the body if it's not utf-8 encoded */
|
||||
if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf);
|
||||
|
||||
if(state->message_state == MSG_BODY && state->utf8 != 1){
|
||||
result = utf8_encode(buf, strlen(buf), &tmpbuf[0], sizeof(tmpbuf), state->charset);
|
||||
if(result == OK) snprintf(buf, MAXBUFSIZE-1, "%s", tmpbuf);
|
||||
}
|
||||
|
||||
translateLine((unsigned char*)buf, state);
|
||||
|
||||
|
@ -15,10 +15,8 @@
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <iconv.h>
|
||||
#include <piler.h>
|
||||
#include "trans.h"
|
||||
#include "html.h"
|
||||
|
||||
|
||||
void init_state(struct _state *state){
|
||||
@ -328,10 +326,7 @@ int extract_boundary(char *p, struct _state *state){
|
||||
void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
|
||||
char v[SMALLBUFSIZE], puf[MAXBUFSIZE], encoding[SMALLBUFSIZE], tmpbuf[2*SMALLBUFSIZE];
|
||||
iconv_t cd;
|
||||
size_t size, inbytesleft, outbytesleft;
|
||||
char *inbuf, *outbuf;
|
||||
int need_encoding;
|
||||
int need_encoding, ret;
|
||||
|
||||
if(buflen < 5) return;
|
||||
|
||||
@ -376,29 +371,16 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
|
||||
|
||||
/* encode everything if it's not utf-8 encoded */
|
||||
//if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3);
|
||||
//strncat(puf, s+3, sizeof(puf)-1);
|
||||
|
||||
size = need_encoding = 0;
|
||||
need_encoding = 0;
|
||||
ret = ERR;
|
||||
|
||||
if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
|
||||
need_encoding = 1;
|
||||
memset(tmpbuf, 0, sizeof(tmpbuf));
|
||||
|
||||
cd = iconv_open("utf-8", encoding);
|
||||
|
||||
if(cd != (iconv_t)-1){
|
||||
inbuf = s+3;
|
||||
outbuf = &tmpbuf[0];
|
||||
inbytesleft = strlen(s+3);
|
||||
outbytesleft = sizeof(tmpbuf)-1;
|
||||
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
iconv_close(cd);
|
||||
}
|
||||
else { syslog(LOG_PRIORITY, "unsupported encoding: '%s'", encoding); }
|
||||
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
|
||||
}
|
||||
|
||||
if(need_encoding == 1 && size >= 0)
|
||||
if(need_encoding == 1 && ret == OK)
|
||||
strncat(puf, tmpbuf, sizeof(puf)-1);
|
||||
else
|
||||
strncat(puf, s+3, sizeof(puf)-1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user