mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-12-26 06:50:11 +01:00
decoding fixes
This commit is contained in:
parent
43bbbfd320
commit
5551df3f9d
@ -12,9 +12,9 @@
|
|||||||
#define PROGNAME "piler"
|
#define PROGNAME "piler"
|
||||||
#define PILERGETD_PROGNAME "pilergetd"
|
#define PILERGETD_PROGNAME "pilergetd"
|
||||||
|
|
||||||
#define VERSION "1.1.0"
|
#define VERSION "1.1.1"
|
||||||
|
|
||||||
#define BUILD 884
|
#define BUILD 885
|
||||||
|
|
||||||
#define HOSTID "mailarchiver"
|
#define HOSTID "mailarchiver"
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <iconv.h>
|
||||||
#include "decoder.h"
|
#include "decoder.h"
|
||||||
#include "htmlentities.h"
|
#include "htmlentities.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
@ -182,7 +183,7 @@ void decodeQP(char *p){
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void decodeHTML(char *p){
|
void decodeHTML(char *p, int utf8){
|
||||||
unsigned char buf[MAXBUFSIZE], __u[8];
|
unsigned char buf[MAXBUFSIZE], __u[8];
|
||||||
char *s, *q;
|
char *s, *q;
|
||||||
int count=0, len, c;
|
int count=0, len, c;
|
||||||
@ -212,10 +213,17 @@ void decodeHTML(char *p){
|
|||||||
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
|
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
|
||||||
|
|
||||||
if(res && res->val <= 255){
|
if(res && res->val <= 255){
|
||||||
|
|
||||||
|
if(utf8 == 1){
|
||||||
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
|
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
|
||||||
memcpy(&buf[count], &__u[0], len);
|
memcpy(&buf[count], &__u[0], len);
|
||||||
count += len;
|
count += len;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
buf[count] = res->val;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
buf[count] = 'q';
|
buf[count] = 'q';
|
||||||
count++;
|
count++;
|
||||||
@ -316,37 +324,25 @@ inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, in
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void utf8_encode(unsigned char *p){
|
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding){
|
||||||
int count=0, len;
|
iconv_t cd;
|
||||||
unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8];
|
size_t size, inbytesleft, outbytesleft;
|
||||||
|
|
||||||
if(p == NULL || strlen((char *)p) == 0) return;
|
memset(outbuf, 0, outbuflen);
|
||||||
|
|
||||||
memset(utf8, 0, MAXBUFSIZE);
|
cd = iconv_open("utf-8", encoding);
|
||||||
u = &utf8[0];
|
|
||||||
s = p;
|
|
||||||
|
|
||||||
for(; *s; s++){
|
if(cd != (iconv_t)-1){
|
||||||
|
inbytesleft = inbuflen;
|
||||||
|
outbytesleft = outbuflen-1;
|
||||||
|
|
||||||
utf8_encode_char(*s, &__u[0], sizeof(__u), &len);
|
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||||
|
|
||||||
/*
|
iconv_close(cd);
|
||||||
* this condition should never happen, as according to the RFCs:
|
|
||||||
*
|
|
||||||
* "Each line of characters MUST be no more than 998 characters, and
|
|
||||||
* SHOULD be no more than 78 characters, excluding the CRLF."
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
if(count+len > sizeof(utf8)-1) break;
|
if(size >= 0) return OK;
|
||||||
|
|
||||||
//printf("%s", __u);
|
|
||||||
memcpy(u+count, &__u[0], len);
|
|
||||||
|
|
||||||
count += len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
*(u+count) = '\0'; count++;
|
return ERR;
|
||||||
memcpy(p, u, count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,9 +11,9 @@ void sanitiseBase64(char *s);
|
|||||||
int decodeBase64(char *p);
|
int decodeBase64(char *p);
|
||||||
int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen);
|
int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen);
|
||||||
void decodeQP(char *p);
|
void decodeQP(char *p);
|
||||||
void decodeHTML(char *p);
|
void decodeHTML(char *p, int utf8);
|
||||||
void decodeURL(char *p);
|
void decodeURL(char *p);
|
||||||
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
|
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
|
||||||
void utf8_encode(unsigned char *p);
|
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding);
|
||||||
|
|
||||||
#endif /* _DECODER_H */
|
#endif /* _DECODER_H */
|
||||||
|
@ -165,6 +165,7 @@ struct _state {
|
|||||||
|
|
||||||
char filename[TINYBUFSIZE];
|
char filename[TINYBUFSIZE];
|
||||||
char type[TINYBUFSIZE];
|
char type[TINYBUFSIZE];
|
||||||
|
char charset[TINYBUFSIZE];
|
||||||
|
|
||||||
char attachment_name_buf[SMALLBUFSIZE];
|
char attachment_name_buf[SMALLBUFSIZE];
|
||||||
int anamepos;
|
int anamepos;
|
||||||
|
38
src/html.h
38
src/html.h
@ -1,38 +0,0 @@
|
|||||||
|
|
||||||
struct html_tag {
|
|
||||||
unsigned char length;
|
|
||||||
char *entity;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NUM_OF_SKIP_TAGS2 10
|
|
||||||
|
|
||||||
struct html_tag skip_html_tags2[] = {
|
|
||||||
{ 4, "html" },
|
|
||||||
{ 5, "/html" },
|
|
||||||
{ 5, "/body" },
|
|
||||||
{ 4, "meta" },
|
|
||||||
{ 4, "head" },
|
|
||||||
{ 5, "/head" },
|
|
||||||
{ 5, "style" },
|
|
||||||
{ 6, "/style" },
|
|
||||||
{ 3, "div" },
|
|
||||||
{ 4, "/div" }
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#define NUM_OF_SKIP_TAGS 11
|
|
||||||
|
|
||||||
struct html_tag skip_html_tags[] = {
|
|
||||||
{ 5, "style" },
|
|
||||||
{ 4, "dir=" },
|
|
||||||
{ 8, "content=" },
|
|
||||||
{ 5, "name=" },
|
|
||||||
{ 3, "id=" },
|
|
||||||
{ 2, "v:" },
|
|
||||||
{ 6, "class=" },
|
|
||||||
{ 5, "xmlns" },
|
|
||||||
{ 10, "http-equiv" },
|
|
||||||
{ 7, "spidmax" },
|
|
||||||
{ 5, "data=" }
|
|
||||||
};
|
|
||||||
|
|
@ -548,7 +548,9 @@ int read_from_stdin(struct session_data *sdata){
|
|||||||
|
|
||||||
|
|
||||||
void strtolower(char *s){
|
void strtolower(char *s){
|
||||||
for(; *s; s++) *s = tolower(*s);
|
for(; *s; s++){
|
||||||
|
if(*s >= 65 && *s <= 90) *s = tolower(*s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
13
src/parser.c
13
src/parser.c
@ -173,6 +173,7 @@ void storno_attachment(struct _state *state){
|
|||||||
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __data *data, struct __config *cfg){
|
int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __data *data, struct __config *cfg){
|
||||||
char *p, *q, puf[SMALLBUFSIZE];
|
char *p, *q, puf[SMALLBUFSIZE];
|
||||||
unsigned char b64buffer[MAXBUFSIZE];
|
unsigned char b64buffer[MAXBUFSIZE];
|
||||||
|
char tmpbuf[MAXBUFSIZE];
|
||||||
int n64, len, writelen, boundary_line=0, result;
|
int n64, len, writelen, boundary_line=0, result;
|
||||||
|
|
||||||
if(cfg->debug == 1) printf("line: %s", buf);
|
if(cfg->debug == 1) printf("line: %s", buf);
|
||||||
@ -501,7 +502,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1;
|
if(strcasestr(buf, "charset")) extractNameFromHeaderLine(buf, "charset", state->charset);
|
||||||
|
if(strcasestr(state->charset, "UTF-8")) state->utf8 = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -577,6 +579,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
|||||||
|
|
||||||
memset(state->filename, 0, TINYBUFSIZE);
|
memset(state->filename, 0, TINYBUFSIZE);
|
||||||
memset(state->type, 0, TINYBUFSIZE);
|
memset(state->type, 0, TINYBUFSIZE);
|
||||||
|
snprintf(state->charset, TINYBUFSIZE-1, "unknown");
|
||||||
|
|
||||||
memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
|
memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
|
||||||
state->anamepos = 0;
|
state->anamepos = 0;
|
||||||
@ -617,11 +620,13 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
|||||||
/* I believe that we can live without this function call */
|
/* I believe that we can live without this function call */
|
||||||
//decodeURL(buf);
|
//decodeURL(buf);
|
||||||
|
|
||||||
if(state->texthtml == 1) decodeHTML(buf);
|
if(state->texthtml == 1) decodeHTML(buf, state->utf8);
|
||||||
|
|
||||||
/* encode the body if it's not utf-8 encoded */
|
/* encode the body if it's not utf-8 encoded */
|
||||||
if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf);
|
if(state->message_state == MSG_BODY && state->utf8 != 1){
|
||||||
|
result = utf8_encode(buf, strlen(buf), &tmpbuf[0], sizeof(tmpbuf), state->charset);
|
||||||
|
if(result == OK) snprintf(buf, MAXBUFSIZE-1, "%s", tmpbuf);
|
||||||
|
}
|
||||||
|
|
||||||
translateLine((unsigned char*)buf, state);
|
translateLine((unsigned char*)buf, state);
|
||||||
|
|
||||||
|
@ -15,10 +15,8 @@
|
|||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <iconv.h>
|
|
||||||
#include <piler.h>
|
#include <piler.h>
|
||||||
#include "trans.h"
|
#include "trans.h"
|
||||||
#include "html.h"
|
|
||||||
|
|
||||||
|
|
||||||
void init_state(struct _state *state){
|
void init_state(struct _state *state){
|
||||||
@ -328,10 +326,7 @@ int extract_boundary(char *p, struct _state *state){
|
|||||||
void fixupEncodedHeaderLine(char *buf, int buflen){
|
void fixupEncodedHeaderLine(char *buf, int buflen){
|
||||||
char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
|
char *sb, *sq, *p, *q, *r, *s, *e, *start, *end;
|
||||||
char v[SMALLBUFSIZE], puf[MAXBUFSIZE], encoding[SMALLBUFSIZE], tmpbuf[2*SMALLBUFSIZE];
|
char v[SMALLBUFSIZE], puf[MAXBUFSIZE], encoding[SMALLBUFSIZE], tmpbuf[2*SMALLBUFSIZE];
|
||||||
iconv_t cd;
|
int need_encoding, ret;
|
||||||
size_t size, inbytesleft, outbytesleft;
|
|
||||||
char *inbuf, *outbuf;
|
|
||||||
int need_encoding;
|
|
||||||
|
|
||||||
if(buflen < 5) return;
|
if(buflen < 5) return;
|
||||||
|
|
||||||
@ -376,29 +371,16 @@ void fixupEncodedHeaderLine(char *buf, int buflen){
|
|||||||
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
|
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
|
||||||
|
|
||||||
/* encode everything if it's not utf-8 encoded */
|
/* encode everything if it's not utf-8 encoded */
|
||||||
//if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3);
|
|
||||||
//strncat(puf, s+3, sizeof(puf)-1);
|
|
||||||
|
|
||||||
size = need_encoding = 0;
|
need_encoding = 0;
|
||||||
|
ret = ERR;
|
||||||
|
|
||||||
if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
|
if(strlen(encoding) > 2 && strcasecmp(encoding, "utf-8")){
|
||||||
need_encoding = 1;
|
need_encoding = 1;
|
||||||
memset(tmpbuf, 0, sizeof(tmpbuf));
|
ret = utf8_encode(s+3, strlen(s+3), &tmpbuf[0], sizeof(tmpbuf), encoding);
|
||||||
|
|
||||||
cd = iconv_open("utf-8", encoding);
|
|
||||||
|
|
||||||
if(cd != (iconv_t)-1){
|
|
||||||
inbuf = s+3;
|
|
||||||
outbuf = &tmpbuf[0];
|
|
||||||
inbytesleft = strlen(s+3);
|
|
||||||
outbytesleft = sizeof(tmpbuf)-1;
|
|
||||||
size = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
|
||||||
iconv_close(cd);
|
|
||||||
}
|
|
||||||
else { syslog(LOG_PRIORITY, "unsupported encoding: '%s'", encoding); }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(need_encoding == 1 && size >= 0)
|
if(need_encoding == 1 && ret == OK)
|
||||||
strncat(puf, tmpbuf, sizeof(puf)-1);
|
strncat(puf, tmpbuf, sizeof(puf)-1);
|
||||||
else
|
else
|
||||||
strncat(puf, s+3, sizeof(puf)-1);
|
strncat(puf, s+3, sizeof(puf)-1);
|
||||||
|
Loading…
Reference in New Issue
Block a user