From 33f0a8867081ba77486a91ea3012a752efc036b8 Mon Sep 17 00:00:00 2001 From: SJ Date: Fri, 7 Sep 2012 15:08:50 +0200 Subject: [PATCH] added attachment support --- configure | 17 +++- configure.in | 13 ++- piler-config.h.in | 1 + src/config.h | 2 +- src/decoder.c | 85 +++++++++++++------ src/decoder.h | 1 + src/defs.h | 3 + src/extract.c | 33 +++++++ src/extract.h | 7 ++ src/parser.c | 54 ++++++++++-- src/parser.h | 2 +- src/parser_utils.c | 17 ++++ src/piler.h | 1 + util/db-upgrade-0.20-vs-0.21.sql | 22 ++++- .../theme/default/templates/search/popup.tpl | 2 +- 15 files changed, 221 insertions(+), 39 deletions(-) create mode 100644 src/extract.c create mode 100644 src/extract.h diff --git a/configure b/configure index c05e224b..bb11075e 100755 --- a/configure +++ b/configure @@ -3414,6 +3414,8 @@ have_mysql="no" have_tre="no" have_zlib="no" +pdftotext="no" + have_static_build="no" @@ -4151,6 +4153,19 @@ if test "$have_icc_guide" = "yes" && test "$have_mysql" = "yes"; then fi +if test z`which pdftotext 2>/dev/null` != "z"; then + pdftotext=`which pdftotext` + +cat >>confdefs.h <<_ACEOF +#define HAVE_PDFTOTEXT "$pdftotext" +_ACEOF + +fi + + +echo "pdftotext: $pdftotext" + + id -u $RUNNING_USER 2>/dev/null 1>/dev/null if test $? -eq 1; then echo "the user \"$RUNNING_USER\" does not exists, please create it, first with adduser..."; exit 1; fi @@ -4168,7 +4183,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs " -OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o $objs" +OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o extract.o $objs" ac_config_files="$ac_config_files Makefile src/Makefile etc/Makefile util/Makefile init.d/Makefile test/Makefile" diff --git a/configure.in b/configure.in index e81a5703..9bde0229 100644 --- a/configure.in +++ b/configure.in @@ -40,6 +40,8 @@ have_mysql="no" have_tre="no" have_zlib="no" +pdftotext="no" + have_static_build="no" @@ -273,6 +275,15 @@ if test "$have_icc_guide" = "yes" && test "$have_mysql" = "yes"; then fi +if test z`which pdftotext 2>/dev/null` != "z"; then + pdftotext=`which pdftotext` + AC_DEFINE_UNQUOTED(HAVE_PDFTOTEXT, "$pdftotext", [path to pdftotext]) +fi + + +echo "pdftotext: $pdftotext" + + id -u $RUNNING_USER 2>/dev/null 1>/dev/null if test $? -eq 1; then echo "the user \"$RUNNING_USER\" does not exists, please create it, first with adduser..."; exit 1; fi @@ -290,7 +301,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs " -OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o $objs" +OBJS="dirs.o base64.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o rules.o session.o message.o attachment.o digest.o store.o archive.o tai.o import.o imap.o extract.o $objs" AC_CONFIG_FILES([Makefile src/Makefile etc/Makefile util/Makefile init.d/Makefile test/Makefile]) AC_OUTPUT diff --git a/piler-config.h.in b/piler-config.h.in index eba91524..cedace2f 100644 --- a/piler-config.h.in +++ b/piler-config.h.in @@ -9,3 +9,4 @@ #define HAVE_DAEMON 1 +#undef HAVE_PDFTOTEXT diff --git a/src/config.h b/src/config.h index e65f49a9..9fa69c49 100644 --- a/src/config.h +++ b/src/config.h @@ -30,7 +30,7 @@ #define SESSION_TIMEOUT 420 #define MAXBUFSIZE 8192 #define SMALLBUFSIZE 512 -#define BIGBUFSIZE 65536 +#define BIGBUFSIZE 131072 #define REALLYBIGBUFSIZE 524288 #define TINYBUFSIZE 128 #define MAXVAL 256 diff --git a/src/decoder.c b/src/decoder.c index 2d3d902c..fa0f4470 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -80,8 +80,38 @@ void sanitiseBase64(char *s){ } +inline void pack_4_into_3(char *s, char *s2){ + int j, n[4], k1, k2; + + memset(s2, 0, 3); + + if(strlen(s) != 4) return; + + for(j=0; j<4; j++){ + k1 = s[j]; + n[j] = b64[k1]; + } + + k1 = n[0]; k1 = k1 << 2; + k2 = n[1]; k2 = k2 >> 4; + + s2[0] = k1 | k2; + + k1 = (n[1] & 0x0F) << 4; + k2 = n[2]; k2 = k2 >> 2; + + s2[1] = k1 | k2; + + k1 = n[2] << 6; + k2 = n[3] >> 0; + + + s2[2] = k1 | k2; +} + + int decodeBase64(char *p){ - int i, j, n[4], k1, k2, len=0; + int i, len=0; char s[5], s2[3], puf[MAXBUFSIZE]; if(strlen(p) < 4 || strlen(p) > MAXBUFSIZE/2) @@ -98,29 +128,7 @@ int decodeBase64(char *p){ if(len + 3 > sizeof(puf)-1) break; if(strlen(s) == 4){ - memset(s2, 0, 3); - - for(j=0; j<4; j++){ - k1 = s[j]; - n[j] = b64[k1]; - } - - k1 = n[0]; k1 = k1 << 2; - k2 = n[1]; k2 = k2 >> 4; - - s2[0] = k1 | k2; - - k1 = (n[1] & 0x0F) << 4; - k2 = n[2]; k2 = k2 >> 2; - - s2[1] = k1 | k2; - - k1 = n[2] << 6; - k2 = n[3] >> 0; - - - s2[2] = k1 | k2; - + pack_4_into_3(s, s2); memcpy(puf+len, s2, 3); len += 3; @@ -133,7 +141,36 @@ int decodeBase64(char *p){ snprintf(p, MAXBUFSIZE-1, "%s", puf); return len; +} + +int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen){ + int i, len=0; + char s[5], s2[3]; + + if(plen < 4 || plen > blen) + return 0; + + for(i=0; i blen-1) break; + + if(strlen(s) == 4){ + pack_4_into_3(s, s2); + memcpy(b+len, s2, 3); + + len += 3; + } + + } + + return len; } diff --git a/src/decoder.h b/src/decoder.h index 48f33f3e..c582cd90 100644 --- a/src/decoder.h +++ b/src/decoder.h @@ -9,6 +9,7 @@ void base64_encode(unsigned char *in, int inlen, char *out, int outlen); void sanitiseBase64(char *s); int decodeBase64(char *p); +int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen); void decodeQP(char *p); void decodeHTML(char *p); void decodeURL(char *p); diff --git a/src/defs.h b/src/defs.h index 14e5779b..0655bcdc 100644 --- a/src/defs.h +++ b/src/defs.h @@ -68,9 +68,11 @@ struct child { struct attachment { int size; char type[TINYBUFSIZE]; + char aname[TINYBUFSIZE]; char filename[TINYBUFSIZE]; char internalname[TINYBUFSIZE]; char digest[2*DIGEST_LENGTH+1]; + char dumped; }; @@ -125,6 +127,7 @@ struct _state { int skip_html; int has_to_dump; int fd; + int b64fd; int mfd; int octetstream; int realbinary; diff --git a/src/extract.c b/src/extract.c new file mode 100644 index 00000000..f004887e --- /dev/null +++ b/src/extract.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include +#include + + +void extract_pdf(struct session_data *sdata, struct _state *state, char *filename, struct __config *cfg){ + int len; + char buf[MAXBUFSIZE]; + FILE *f; + + snprintf(buf, sizeof(buf)-1, "%s -enc UTF-8 %s -", HAVE_PDFTOTEXT, filename); + + f = popen(buf, "r"); + if(f){ + while(fgets(buf, sizeof(buf)-1, f)){ + len = strlen(buf); + + if(state->bodylen < BIGBUFSIZE-len-1){ + memcpy(&(state->b_body[state->bodylen]), buf, len); + state->bodylen += len; + } + else break; + } + + fclose(f); + } + else syslog(LOG_PRIORITY, "%s: popen(): %s", sdata->ttmpfile, buf); + +} + + diff --git a/src/extract.h b/src/extract.h new file mode 100644 index 00000000..d7e4d73d --- /dev/null +++ b/src/extract.h @@ -0,0 +1,7 @@ +#ifndef _EXTRACT_H + #define _EXTRACT_H + +void extract_pdf(struct session_data *sdata, struct _state *state, char *filename, struct __config *cfg); + + +#endif /* _EXTRACT_H */ diff --git a/src/parser.c b/src/parser.c index 7d08acf8..acd07752 100644 --- a/src/parser.c +++ b/src/parser.c @@ -129,6 +129,19 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi len = strlen(p); if(strlen(sdata->attachments) < SMALLBUFSIZE-len-1 && !strstr(sdata->attachments, p)) memcpy(&(sdata->attachments[strlen(sdata->attachments)]), p, len); + + if(state->attachments[i].dumped == 1){ + + #ifdef HAVE_PDFTOTEXT + if( + strcmp(p, "pdf,") == 0 || + (strcmp(p, "other,") == 0 && strcasestr(state->attachments[i].filename, ".pdf")) + ) extract_pdf(sdata, state, state->attachments[i].aname, cfg); + #endif + + unlink(state->attachments[i].aname); + } + } @@ -138,18 +151,13 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi else snprintf(state->message_id, SMALLBUFSIZE-1, "null"); } - //len = strlen(state->b_from); - //if(state->b_from[len-1] == ' ') state->b_from[len-1] = '\0'; - - //len = strlen(state->b_to); - //if(state->b_to[len-1] == ' ') state->b_to[len-1] = '\0'; - } int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __config *cfg){ char *p, *q, puf[SMALLBUFSIZE]; - int x, n, len, writelen, b64_len, boundary_line=0; + unsigned char b64buffer[MAXBUFSIZE]; + int x, n, n64, len, writelen, b64_len, boundary_line=0; if(cfg->debug == 1) printf("line: %s", buf); @@ -192,7 +200,15 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int if(state->message_state == MSG_BODY && state->fd != -1 && is_item_on_string(state->boundaries, buf) == 0){ //n = write(state->fd, buf, len); // WRITE if(len + state->abufpos > abuffersize-1){ - n = write(state->fd, abuffer, state->abufpos); state->abufpos = 0; memset(abuffer, 0, abuffersize); + n = write(state->fd, abuffer, state->abufpos); + + if(state->b64fd != -1){ + abuffer[state->abufpos] = '\0'; + n64 = base64_decode_attachment_buffer(abuffer, state->abufpos, &b64buffer[0], sizeof(b64buffer)); + n64 = write(state->b64fd, b64buffer, n64); + } + + state->abufpos = 0; memset(abuffer, 0, abuffersize); } memcpy(abuffer+state->abufpos, buf, len); state->abufpos += len; @@ -222,14 +238,24 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int snprintf(state->attachments[state->n_attachments].filename, TINYBUFSIZE-1, "%s", state->filename); snprintf(state->attachments[state->n_attachments].type, TINYBUFSIZE-1, "%s", state->type); snprintf(state->attachments[state->n_attachments].internalname, TINYBUFSIZE-1, "%s.a%d", sdata->ttmpfile, state->n_attachments); + snprintf(state->attachments[state->n_attachments].aname, TINYBUFSIZE-1, "%s.a%d.bin", sdata->ttmpfile, state->n_attachments); //printf("DUMP FILE: %s\n", state->attachments[state->n_attachments].internalname); if(take_into_pieces == 1){ state->fd = open(state->attachments[state->n_attachments].internalname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + + p = determine_attachment_type(state->attachments[state->n_attachments].filename, state->attachments[state->n_attachments].type); + + if(strcmp("pdf,", p) == 0 || strcmp("other,", p) == 0){ + state->b64fd = open(state->attachments[state->n_attachments].aname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + state->attachments[state->n_attachments].dumped = 1; + } + if(state->fd == -1){ state->attachments[state->n_attachments].size = 0; + state->attachments[state->n_attachments].dumped = 0; memset(state->attachments[state->n_attachments].type, 0, TINYBUFSIZE); memset(state->attachments[state->n_attachments].filename, 0, TINYBUFSIZE); memset(state->attachments[state->n_attachments].internalname, 0, TINYBUFSIZE); @@ -425,11 +451,21 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int if(state->has_to_dump == 1){ if(take_into_pieces == 1 && state->fd != -1){ if(state->abufpos > 0){ - n = write(state->fd, abuffer, state->abufpos); state->abufpos = 0; memset(abuffer, 0, abuffersize); + n = write(state->fd, abuffer, state->abufpos); + + if(state->b64fd != -1){ + abuffer[state->abufpos] = '\0'; + n64 = base64_decode_attachment_buffer(abuffer, state->abufpos, &b64buffer[0], sizeof(b64buffer)); + n64 = write(state->b64fd, b64buffer, n64); + } + + state->abufpos = 0; memset(abuffer, 0, abuffersize); } close(state->fd); + close(state->b64fd); } state->fd = -1; + state->b64fd = -1; } diff --git a/src/parser.h b/src/parser.h index aa88f9aa..19d70b41 100644 --- a/src/parser.h +++ b/src/parser.h @@ -11,7 +11,6 @@ struct _state parse_message(struct session_data *sdata, int take_into_pieces, struct __config *cfg); void post_parse(struct session_data *sdata, struct _state *state, struct __config *cfg); -//int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, struct __config *cfg); int parse_line(char *buf, struct _state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, int writebuffersize, char *abuffer, int abuffersize, struct __config *cfg); void init_state(struct _state *state); @@ -33,5 +32,6 @@ void fixURL(char *url); int extractNameFromHeaderLine(char *s, char *name, char *resultbuf); char *determine_attachment_type(char *filename, char *type); void parse_reference(struct _state *state, char *s); +int base64_decode_attachment_buffer(char *p, int plen, unsigned char *b, int blen); #endif /* _PARSER_H */ diff --git a/src/parser_utils.c b/src/parser_utils.c index 6d040d40..efd64324 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -56,6 +56,7 @@ void init_state(struct _state *state){ state->has_to_dump = 0; state->fd = -1; + state->b64fd = -1; state->mfd = -1; state->realbinary = 0; state->octetstream = 0; @@ -73,7 +74,9 @@ void init_state(struct _state *state){ for(i=0; iattachments[i].size = 0; + state->attachments[i].dumped = 0; memset(state->attachments[i].type, 0, TINYBUFSIZE); + memset(state->attachments[i].aname, 0, TINYBUFSIZE); memset(state->attachments[i].filename, 0, TINYBUFSIZE); memset(state->attachments[i].internalname, 0, TINYBUFSIZE); memset(state->attachments[i].digest, 0, 2*DIGEST_LENGTH+1); @@ -748,3 +751,17 @@ void parse_reference(struct _state *state, char *s){ } + +int base64_decode_attachment_buffer(char *p, int plen, unsigned char *b, int blen){ + int b64len=0; + char puf[2*SMALLBUFSIZE]; + + do { + p = split_str(p, "\n", puf, sizeof(puf)-1); + trimBuffer(puf); + b64len += decode_base64_to_buffer(puf, strlen(puf), b+b64len, blen); + } while(p); + + return b64len; +} + diff --git a/src/piler.h b/src/piler.h index 10181656..c8b5a25a 100644 --- a/src/piler.h +++ b/src/piler.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/util/db-upgrade-0.20-vs-0.21.sql b/util/db-upgrade-0.20-vs-0.21.sql index 5e949071..c5da5774 100644 --- a/util/db-upgrade-0.20-vs-0.21.sql +++ b/util/db-upgrade-0.20-vs-0.21.sql @@ -1,5 +1,15 @@ alter table `sph_index` add column `folder` int default 0; +drop table if exists `tag`; +create table if not exists `tag` ( + `_id` bigint unsigned auto_increment not null, + `id` bigint not null, + `uid` int not null, + `tag` char(255) default null, + unique(`id`, `uid`), + key (`_id`) +) ENGINE=InnoDB; + create table if not exists `folder` ( `id` int not null auto_increment, `parent_id` int default 0, @@ -8,11 +18,21 @@ create table if not exists `folder` ( ) Engine=InnoDB; +create table if not exists `folder_user` ( + `id` bigint unsigned not null, + `uid` int unsigned not null, + key `folder_user_idx` (`id`), + key `folder_user_idx2` (`uid`) +) ENGINE=InnoDB; + + create table if not exists `note` ( + `_id` bigint unsigned auto_increment not null, `id` bigint unsigned not null, `uid` int not null, `note` text default null, unique(`id`, `uid`), - key (`id`) + key (`_id`) ) ENGINE=InnoDB; + diff --git a/webui/view/theme/default/templates/search/popup.tpl b/webui/view/theme/default/templates/search/popup.tpl index 0b3848c5..3655b9d3 100644 --- a/webui/view/theme/default/templates/search/popup.tpl +++ b/webui/view/theme/default/templates/search/popup.tpl @@ -47,7 +47,7 @@
:
-
checked="checked" />
+
checked="checked" />