From 152351e3730be9a5737a60fc5128a330915b4d64 Mon Sep 17 00:00:00 2001 From: SJ Date: Tue, 11 Sep 2012 14:11:17 +0200 Subject: [PATCH] added zip support --- src/config.h | 3 +- src/extract.c | 70 +++++++- src/extract.h | 9 - src/piler.h | 3 +- test/Makefile.in | 7 +- test/ptest.c | 449 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 525 insertions(+), 16 deletions(-) delete mode 100644 src/extract.h create mode 100644 test/ptest.c diff --git a/src/config.h b/src/config.h index 9fa69c49..3625eaec 100644 --- a/src/config.h +++ b/src/config.h @@ -13,7 +13,7 @@ #define VERSION "0.1.21" -#define BUILD 701 +#define BUILD 704 #define HOSTID "mailarchiver" @@ -70,6 +70,7 @@ #define DELIMITER ' ' #define BOUNDARY_LEN 255 #define MAX_ATTACHMENTS 16 +#define MAX_ZIP_RECURSION_LEVEL 2 /* SQL stuff */ diff --git a/src/extract.c b/src/extract.c index 3bda0b82..34ce6b99 100644 --- a/src/extract.c +++ b/src/extract.c @@ -2,6 +2,10 @@ #include #include #include +#include +#include +#include +#include #include #ifdef HAVE_ZIP @@ -38,6 +42,7 @@ void remove_xml(char *buf, int *html){ } +#ifdef HAVE_ZIP int extract_opendocument(struct session_data *sdata, struct _state *state, char *filename, char *prefix){ int errorp, i=0, len=0, html=0; char buf[MAXBUFSIZE]; @@ -69,6 +74,7 @@ int extract_opendocument(struct session_data *sdata, struct _state *state, char } zip_fclose(zf); } + else syslog(LOG_PRIORITY, "%s: cannot extract '%s' from '%s'", sdata->ttmpfile, sb.name, filename); if(state->bodylen > BIGBUFSIZE-1024) break; } @@ -83,6 +89,60 @@ int extract_opendocument(struct session_data *sdata, struct _state *state, char } +int unzip_file(struct session_data *sdata, struct _state *state, char *filename, int *rec){ + int errorp, i=0, len=0, fd; + char *p, extracted_filename[SMALLBUFSIZE], buf[MAXBUFSIZE]; + struct zip *z; + struct zip_stat sb; + struct zip_file *zf; + + (*rec)++; + + z = zip_open(filename, 0, &errorp); + if(!z) return 1; + + while(zip_stat_index(z, i, 0, &sb) == 0){ + p = strrchr(sb.name, '.'); + + if(p && strcmp(get_attachment_extractor_by_filename((char*)sb.name), "other")){ + + snprintf(extracted_filename, sizeof(extracted_filename)-1, "%s-%d-%d%s", sdata->ttmpfile, *rec, i, p); + + fd = open(extracted_filename, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + if(fd != -1){ + zf = zip_fopen_index(z, i, 0); + if(zf){ + while((len = zip_fread(zf, buf, sizeof(buf))) > 0){ + write(fd, buf, len); + } + zip_fclose(zf); + } + else syslog(LOG_PRIORITY, "%s: cannot extract '%s' from '%s'", sdata->ttmpfile, sb.name, extracted_filename); + + close(fd); + + extract_attachment_content(sdata, state, extracted_filename, get_attachment_extractor_by_filename(extracted_filename), rec); + + unlink(extracted_filename); + + } + else { + syslog(LOG_PRIORITY, "%s: cannot open '%s'", sdata->ttmpfile, extracted_filename); + } + + } + + i++; + } + + + zip_close(z); + + return 0; +} + +#endif + void read_content_with_popen(struct session_data *sdata, struct _state *state, char *cmd){ int len; char buf[MAXBUFSIZE]; @@ -157,9 +217,13 @@ void extract_attachment_content(struct session_data *sdata, struct _state *state return; } - if(strcmp(type, "zip") == 0 && *rec == 0){ - (*rec)++; - + if(strcmp(type, "zip") == 0){ + if(*rec < MAX_ZIP_RECURSION_LEVEL){ + unzip_file(sdata, state, filename, rec); + } + else { + syslog(LOG_PRIORITY, "%s: multiple recursion level zip attachment, skipping %s", sdata->ttmpfile, filename); + } } #endif diff --git a/src/extract.h b/src/extract.h deleted file mode 100644 index d184a320..00000000 --- a/src/extract.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _EXTRACT_H - #define _EXTRACT_H - -#include "defs.h" - -void extract_attachment_content(struct session_data *sdata, struct _state *state, char *filename, char *type, int *rec); - - -#endif /* _EXTRACT_H */ diff --git a/src/piler.h b/src/piler.h index c8b5a25a..b393e07d 100644 --- a/src/piler.h +++ b/src/piler.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -58,5 +57,7 @@ unsigned long add_new_folder(struct session_data *sdata, char *foldername, int p int store_index_data(struct session_data *sdata, struct _state *state, struct __data *data, uint64 id, struct __config *cfg); +void extract_attachment_content(struct session_data *sdata, struct _state *state, char *filename, char *type, int *rec); + #endif /* _PILER_H */ diff --git a/test/Makefile.in b/test/Makefile.in index e0dee36a..c571a813 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -25,11 +25,14 @@ OBJS = import_helper.o INSTALL = @INSTALL@ -all: $(OBJS) parser debug import +all: $(OBJS) parser debug import ptest parser: parser.c ../src/libpiler.a $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $< -lpiler $(LIBS) $(LIBDIR) +ptest: ptest.c ../src/libpiler.a + $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $< -lpiler $(LIBS) $(LIBDIR) + debug: debug.c ../src/libpiler.a $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $< -lpiler $(LIBS) $(LIBDIR) @@ -44,7 +47,7 @@ import: import.c ../src/libpiler.a install: clean: - rm -f parser debug import + rm -f parser debug import ptest distclean: clean rm -f Makefile diff --git a/test/ptest.c b/test/ptest.c new file mode 100644 index 00000000..b512fc88 --- /dev/null +++ b/test/ptest.c @@ -0,0 +1,449 @@ +/* + * pilerimport.c, SJ + */ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define SKIPLIST "junk,trash,spam,draft" +#define MBOX_ARGS 1024 + +extern char *optarg; +extern int optind; + +int quiet=0; + +int connect_to_imap_server(int sd, int *seq, char *imapserver, char *username, char *password); +int list_folders(int sd, int *seq, char *folders, int foldersize); +int process_imap_folder(int sd, int *seq, char *folder, struct session_data *sdata, struct __data *data, struct __config *cfg); + + +int parse_it(char *filename, struct session_data *sdata, struct __data *data, struct __config *cfg){ + int i, rc=ERR, fd; + struct stat st; + struct _state state; + + + init_session_data(sdata); + + if(stat(filename, &st) != 0){ + printf("cannot stat() %s\n", filename); + return rc; + } + + if(S_ISREG(st.st_mode) == 0){ + printf("%s is not a file\n", filename); + return rc; + } + + fd = open(filename, O_RDONLY); + if(fd == -1){ + printf("cannot open %s\n", filename); + return rc; + } + close(fd); + + snprintf(sdata->filename, SMALLBUFSIZE-1, "%s", filename); + + sdata->tot_len = st.st_size; + + + + sdata->sent = 0; + + state = parse_message(sdata, 1, cfg); + post_parse(sdata, &state, cfg); + + unlink(sdata->tmpframe); + + for(i=1; i<=state.n_attachments; i++) unlink(state.attachments[i].internalname); + + return 0; +} + + +int import_from_mailbox(char *mailbox, struct session_data *sdata, struct __data *data, struct __config *cfg){ + FILE *F, *f=NULL; + int rc=ERR, tot_msgs=0, ret=OK; + char buf[MAXBUFSIZE], fname[SMALLBUFSIZE]; + time_t t; + + + F = fopen(mailbox, "r"); + if(!F){ + printf("cannot open mailbox: %s\n", mailbox); + return rc; + } + + t = time(NULL); + + while(fgets(buf, sizeof(buf)-1, F)){ + + if(buf[0] == 'F' && buf[1] == 'r' && buf[2] == 'o' && buf[3] == 'm' && buf[4] == ' '){ + tot_msgs++; + if(f){ + fclose(f); + rc = parse_it(fname, sdata, data, cfg); + if(rc == ERR) ret = ERR; + unlink(fname); + + if(quiet == 0) printf("processed: %7d\r", tot_msgs); fflush(stdout); + } + + snprintf(fname, sizeof(fname)-1, "%ld-%d", t, tot_msgs); + f = fopen(fname, "w+"); + continue; + } + + if(f) fprintf(f, "%s", buf); + } + + if(f){ + fclose(f); + rc = parse_it(fname, sdata, data, cfg); + if(rc == ERR) ret = ERR; + unlink(fname); + + if(quiet == 0) printf("processed: %7d\r", ++tot_msgs); fflush(stdout); + } + + fclose(F); + + return ret; +} + + +int import_mbox_from_dir(char *directory, struct session_data *sdata, struct __data *data, int *tot_msgs, struct __config *cfg){ + DIR *dir; + struct dirent *de; + int rc=ERR, ret=OK; + char fname[SMALLBUFSIZE]; + struct stat st; + + dir = opendir(directory); + if(!dir){ + printf("cannot open directory: %s\n", directory); + return ERR; + } + + + while((de = readdir(dir))){ + if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + + snprintf(fname, sizeof(fname)-1, "%s/%s", directory, de->d_name); + + if(stat(fname, &st) == 0){ + if(S_ISDIR(st.st_mode)){ + rc = import_mbox_from_dir(fname, sdata, data, tot_msgs, cfg); + if(rc == ERR) ret = ERR; + } + else { + + if(S_ISREG(st.st_mode)){ + rc = import_from_mailbox(fname, sdata, data, cfg); + if(rc == OK) (*tot_msgs)++; + else ret = ERR; + + } + else { + printf("%s is not a file\n", fname); + } + + } + } + else { + printf("cannot stat() %s\n", fname); + } + + } + closedir(dir); + + return ret; +} + + +int import_from_maildir(char *directory, struct session_data *sdata, struct __data *data, int *tot_msgs, struct __config *cfg){ + DIR *dir; + struct dirent *de; + int rc=ERR, ret=OK; + char fname[SMALLBUFSIZE]; + struct stat st; + + dir = opendir(directory); + if(!dir){ + printf("cannot open directory: %s\n", directory); + return ERR; + } + + + while((de = readdir(dir))){ + if(strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + + snprintf(fname, sizeof(fname)-1, "%s/%s", directory, de->d_name); + + if(stat(fname, &st) == 0){ + if(S_ISDIR(st.st_mode)){ + rc = import_from_maildir(fname, sdata, data, tot_msgs, cfg); + if(rc == ERR) ret = ERR; + } + else { + + if(S_ISREG(st.st_mode)){ + rc = parse_it(fname, sdata, data, cfg); + if(rc == OK) (*tot_msgs)++; + else ret = ERR; + + if(quiet == 0) printf("processed: %7d\r", *tot_msgs); fflush(stdout); + } + else { + printf("%s is not a file\n", fname); + } + + } + } + else { + printf("cannot stat() %s\n", fname); + } + + } + closedir(dir); + + return ret; +} + + +int import_from_imap_server(char *imapserver, char *username, char *password, struct session_data *sdata, struct __data *data, char *skiplist, struct __config *cfg){ + int rc=ERR, ret=OK, sd, seq=1, skipmatch; + char *p, puf[SMALLBUFSIZE]; + char *q, muf[SMALLBUFSIZE]; + char folders[MAXBUFSIZE]; + + if((sd = socket(AF_INET, SOCK_STREAM, 0)) == -1){ + printf("cannot create socket\n"); + return ERR; + } + + if(connect_to_imap_server(sd, &seq, imapserver, username, password) == ERR){ + close(sd); + return ERR; + } + + + list_folders(sd, &seq, &folders[0], sizeof(folders)); + + + p = &folders[0]; + do { + memset(puf, 0, sizeof(puf)); + p = split(p, '\n', puf, sizeof(puf)-1); + + if(strlen(puf) < 1) continue; + + skipmatch = 0; + + if(skiplist && strlen(skiplist) > 0){ + q = skiplist; + do { + memset(muf, 0, sizeof(muf)); + q = split(q, ',', muf, sizeof(muf)-1); + if(strncasecmp(puf, muf, strlen(muf)) == 0){ + skipmatch = 1; + break; + } + } while(q); + } + + if(skipmatch == 1){ + if(quiet == 0) printf("SKIPPING FOLDER: %s\n", puf); + continue; + } + + if(quiet == 0) printf("processing folder: %s... ", puf); + + rc = process_imap_folder(sd, &seq, puf, sdata, data, cfg); + if(rc == ERR) ret = ERR; + + } while(p); + + + close(sd); + + return ret; +} + + +void usage(){ + printf("usage: pilerimport [-c ] -e | -m | -M | -d | -i -u -p \n"); + exit(0); +} + + +int main(int argc, char **argv){ + int i, c, rc=0, n_mbox=0, tot_msgs=0; + char *configfile=CONFIG_FILE, *emlfile=NULL, *mboxdir=NULL, *mbox[MBOX_ARGS], *directory=NULL; + char *imapserver=NULL, *username=NULL, *password=NULL, *skiplist=SKIPLIST, *folder=NULL; + struct session_data sdata; + struct __config cfg; + struct __data data; + + for(i=0; i arguments: %s\n", optarg); + } + + break; + + case 'M' : + mboxdir = optarg; + break; + + case 'i' : + imapserver = optarg; + break; + + case 'u' : + username = optarg; + break; + + case 'p' : + password = optarg; + break; + + case 'x' : + skiplist = optarg; + break; + + case 'F' : + folder = optarg; + break; + + case 'h' : + case '?' : + usage(); + break; + + + default : + break; + } + } + + + + if(!mbox[0] && !mboxdir && !emlfile && !directory && !imapserver) usage(); + + + cfg = read_config(configfile); + + if(read_key(&cfg)){ + printf("%s\n", ERR_READING_KEY); + return ERR; + } + + mysql_init(&(sdata.mysql)); + mysql_options(&(sdata.mysql), MYSQL_OPT_CONNECT_TIMEOUT, (const char*)&cfg.mysql_connect_timeout); + if(mysql_real_connect(&(sdata.mysql), cfg.mysqlhost, cfg.mysqluser, cfg.mysqlpwd, cfg.mysqldb, cfg.mysqlport, cfg.mysqlsocket, 0) == 0){ + printf("cant connect to mysql server\n"); + return ERR; + } + + mysql_real_query(&(sdata.mysql), "SET NAMES utf8", strlen("SET NAMES utf8")); + mysql_real_query(&(sdata.mysql), "SET CHARACTER SET utf8", strlen("SET CHARACTER SET utf8")); + + setlocale(LC_CTYPE, cfg.locale); + + (void) openlog("pilerimport", LOG_PID, LOG_MAIL); + + load_rules(&sdata, &(data.archiving_rules), SQL_ARCHIVING_RULE_TABLE); + load_rules(&sdata, &(data.retention_rules), SQL_RETENTION_RULE_TABLE); + + if(emlfile) rc = import_message(emlfile, &sdata, &data, &cfg); + if(mbox[0]){ + for(i=0; i