From 1b57459f89f21c23ed9e7298d838b52fd41a57db Mon Sep 17 00:00:00 2001 From: SJ Date: Fri, 30 Dec 2011 15:52:59 +0100 Subject: [PATCH] added an import utility --- src/Makefile.in | 7 ++- src/attachment.c | 2 +- src/defs.h | 2 +- src/message.c | 11 +++- src/parser.c | 7 ++- src/parser.h | 1 + src/parser_utils.c | 45 ++++++++++++++ src/pilerimport.c | 142 +++++++++++++++++++++++++++++++++++++++++++++ src/session.c | 3 +- src/test.c | 2 + util/db-mysql.sql | 7 ++- 11 files changed, 216 insertions(+), 13 deletions(-) create mode 100644 src/pilerimport.c diff --git a/src/Makefile.in b/src/Makefile.in index 1a4b12f5..e8703169 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -33,7 +33,7 @@ MAKE = `which make` INSTALL = @INSTALL@ -all: libpiler.a piler pilerconf pilerget test +all: libpiler.a piler pilerconf pilerget pilerimport test install: install-piler @@ -51,6 +51,9 @@ libpiler.a: $(OBJS) $(MYSQL_OBJS) pilerget: pilerget.c cfg.o misc.o tai.o store.o attachment.o digest.o $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $^ $(LIBS) $(LIBDIR) +pilerimport: pilerimport.c libpiler.a + $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $^ -lpiler $(LIBS) $(LIBDIR) + pilerconf: pilerconf.c cfg.o misc.o tai.o $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $^ $(LIBDIR) @@ -79,7 +82,7 @@ install-piler: $(INSTALL) -m 0755 pilerget $(DESTDIR)$(bindir) clean: - rm -f *.o *.a libpiler.so* piler pilerconf pilerget pilertest + rm -f *.o *.a libpiler.so* piler pilerconf pilerget pilerimport pilertest distclean: clean rm -f Makefile diff --git a/src/attachment.c b/src/attachment.c index be138205..99e94328 100644 --- a/src/attachment.c +++ b/src/attachment.c @@ -98,7 +98,7 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _ bind[4].buffer_type = MYSQL_TYPE_STRING; bind[4].buffer = state->attachments[i].type; bind[4].is_null = 0; - len[4] = strlen(state->attachments[i].digest); bind[4].length = &len[4]; + len[4] = strlen(state->attachments[i].type); bind[4].length = &len[4]; bind[5].buffer_type = MYSQL_TYPE_LONG; bind[5].buffer = (char *)&(state->attachments[i].size); diff --git a/src/defs.h b/src/defs.h index f59231a4..1c160449 100644 --- a/src/defs.h +++ b/src/defs.h @@ -151,7 +151,7 @@ struct session_data { char ttmpfile[SMALLBUFSIZE], tmpframe[SMALLBUFSIZE], tre, restored_copy; char mailfrom[SMALLBUFSIZE], rcptto[MAX_RCPT_TO][SMALLBUFSIZE], client_addr[SMALLBUFSIZE]; char acceptbuf[SMALLBUFSIZE]; - char whitelist[MAXBUFSIZE], blacklist[MAXBUFSIZE]; + char attachments[SMALLBUFSIZE]; int fd, hdr_len, tot_len, num_of_rcpt_to, rav; int need_scan; float __acquire, __parsed, __av, __store, __compress, __encrypt; diff --git a/src/message.c b/src/message.c index 6bf0714b..144686e9 100644 --- a/src/message.c +++ b/src/message.c @@ -117,8 +117,8 @@ int store_index_data(struct session_data *sdata, struct _state *state, uint64 id char *subj, s[SMALLBUFSIZE]; MYSQL_STMT *stmt; - MYSQL_BIND bind[4]; - unsigned long len[4]; + MYSQL_BIND bind[5]; + unsigned long len[5]; subj = state->b_subject; if(*subj == ' ') subj++; @@ -131,7 +131,7 @@ int store_index_data(struct session_data *sdata, struct _state *state, uint64 id } - snprintf(s, sizeof(s)-1, "INSERT INTO %s (`id`, `from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `attachments`) values(%llu,?,?,?,?,%ld,%ld,%d,%d)", SQL_SPHINX_TABLE, id, sdata->now, sdata->sent, sdata->tot_len, state->n_attachments); + snprintf(s, sizeof(s)-1, "INSERT INTO %s (`id`, `from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `attachments`, `attachment_types`) values(%llu,?,?,?,?,%ld,%ld,%d,%d,?)", SQL_SPHINX_TABLE, id, sdata->now, sdata->sent, sdata->tot_len, state->n_attachments); if(mysql_stmt_prepare(stmt, s, strlen(s))){ @@ -166,6 +166,11 @@ int store_index_data(struct session_data *sdata, struct _state *state, uint64 id bind[3].is_null = 0; len[3] = strlen(state->b_body); bind[3].length = &len[3]; + bind[4].buffer_type = MYSQL_TYPE_STRING; + bind[4].buffer = sdata->attachments; + bind[4].is_null = 0; + len[4] = strlen(sdata->attachments); bind[4].length = &len[3]; + if(mysql_stmt_bind_param(stmt, bind)){ syslog(LOG_PRIORITY, "%s: %s.mysql_stmt_bind_param() error: %s", sdata->ttmpfile, SQL_SPHINX_TABLE, mysql_stmt_error(stmt)); diff --git a/src/parser.c b/src/parser.c index 1169acc4..32d87ca9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -18,7 +18,7 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){ FILE *f; - char buf[MAXBUFSIZE]; + char *p, buf[MAXBUFSIZE]; struct _state state; int i, len; @@ -58,6 +58,11 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){ fixupEncodedHeaderLine(state.attachments[i].filename); if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s", sdata->ttmpfile, i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest); + + p = determine_attachment_type(state.attachments[i].filename, state.attachments[i].type); + len = strlen(p); + + if(strlen(sdata->attachments) < SMALLBUFSIZE-len-1) memcpy(&(sdata->attachments[strlen(sdata->attachments)]), p, len); } diff --git a/src/parser.h b/src/parser.h index 156d7fc2..591e4bc6 100644 --- a/src/parser.h +++ b/src/parser.h @@ -27,5 +27,6 @@ void reassembleToken(char *p); void degenerateToken(unsigned char *p); void fixURL(char *url); int extractNameFromHeaderLine(char *s, char *name, char *resultbuf); +char *determine_attachment_type(char *filename, char *type); #endif /* _PARSER_H */ diff --git a/src/parser_utils.c b/src/parser_utils.c index 3b0fbf89..2f325c11 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -591,4 +591,49 @@ int extractNameFromHeaderLine(char *s, char *name, char *resultbuf){ } +char *determine_attachment_type(char *filename, char *type){ + char *p; + + if(strncasecmp(type, "text/", strlen("text/")) == 0) return "text,"; + if(strncasecmp(type, "image/", strlen("image/")) == 0) return "image,"; + if(strncasecmp(type, "audio/", strlen("audio/")) == 0) return "audio,"; + if(strncasecmp(type, "video/", strlen("video/")) == 0) return "video,"; + if(strncasecmp(type, "text/x-card", strlen("text/x-card")) == 0) return "vcard,"; + + if(strncasecmp(type, "application/pdf", strlen("application/pdf")) == 0) return "pdf,"; + + if(strncasecmp(type, "application/msword", strlen("application/msword")) == 0) return "word,"; + if(strncasecmp(type, "application/vnd.ms-excel", strlen("application/vnd.ms-excel")) == 0) return "excel,"; + if(strncasecmp(type, "application/vnd.ms-powerpoint", strlen("application/vnd.ms-powerpoint")) == 0) return "powerpoint,"; + + if(strncasecmp(type, "application/x-shockwave-flash", strlen("application/x-shockwave-flash")) == 0) return "flash,"; + + if(strcasestr(type, "opendocument")) return "odf,"; + + if(strcasecmp(type, "application/octet-stream") == 0){ + + p = strrchr(type, '.'); + if(p){ + p++; + + if(strncasecmp(p, "zip", 3) == 0) return "compressed,"; + if(strncasecmp(p, "rar", 3) == 0) return "compressed,"; + + if(strncasecmp(p, "doc", 3) == 0) return "word,"; + if(strncasecmp(p, "docx", 4) == 0) return "word,"; + if(strncasecmp(p, "xls", 3) == 0) return "excel,"; + if(strncasecmp(p, "xlsx", 4) == 0) return "excel,"; + if(strncasecmp(p, "ppt", 3) == 0) return "powerpoint,"; + if(strncasecmp(p, "pptx", 4) == 0) return "powerpoint,"; + + if(strncasecmp(p, "png", 3) == 0) return "image,"; + if(strncasecmp(p, "gif", 3) == 0) return "image,"; + if(strncasecmp(p, "jpg", 3) == 0) return "image,"; + if(strncasecmp(p, "jpeg", 4) == 0) return "image,"; + if(strncasecmp(p, "tiff", 4) == 0) return "image,"; + } + } + + return "other,"; +} diff --git a/src/pilerimport.c b/src/pilerimport.c new file mode 100644 index 00000000..0cb4b7c6 --- /dev/null +++ b/src/pilerimport.c @@ -0,0 +1,142 @@ +/* + * pilerimport.c, SJ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +int import_message(char *filename, struct session_data *sdata, struct __data *data, struct __config *cfg){ + int i, rc=ERR; + char *rule; + struct stat st; + struct _state state; + + if(stat(filename, &st) != 0){ + printf("cannot read: %s\n", filename); + return rc; + } + + + create_id(&(sdata->ttmpfile[0])); + + printf("a: %s\n", sdata->ttmpfile); + + link(filename, sdata->ttmpfile); + + sdata->num_of_rcpt_to = -1; + memset(sdata->rcptto[0], 0, SMALLBUFSIZE); + + time(&(sdata->now)); + sdata->sent = 0; + sdata->hdr_len = 0; + sdata->tot_len = st.st_size; + memset(sdata->attachments, 0, SMALLBUFSIZE); + + snprintf(sdata->tmpframe, SMALLBUFSIZE-1, "%s.m", sdata->ttmpfile); + + state = parse_message(sdata, cfg); + + rule = check_againt_ruleset(data->rules, &state, st.st_size); + + if(rule){ + printf("discarding %s by archiving policy: %s\n", filename, rule); + rc = OK; + goto ENDE; + } + + + printf("message-id: %s\n", state.message_id); + printf("from: *%s*\n", state.b_from); + printf("to: *%s*\n", state.b_to); + printf("subject: *%s*\n", state.b_subject); + + make_digests(sdata, cfg); + + printf("hdr len: %d\n", sdata->hdr_len); + + printf("body digest: %s\n", sdata->bodydigest); + + for(i=1; i<=state.n_attachments; i++){ + printf("i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest); + } + + printf("attachments:%s\n", sdata->attachments); + + printf("\n\n"); + + rc = processMessage(sdata, &state, cfg); + +ENDE: + unlink(sdata->ttmpfile); + unlink(sdata->tmpframe); + + if(rc == ERR) return rc; + + if(rc == ERR_EXISTS) printf("discarding duplicate message: %s\n", sdata->ttmpfile); + + + return OK; +} + + +int main(int argc, char **argv){ + int rc; + struct session_data sdata; + struct __config cfg; + struct __data data; + + + if(argc < 2){ + printf("usage: %s \n", argv[0]); + exit(1); + } + + cfg = read_config(CONFIG_FILE); + + if(read_key(&cfg)){ + printf("%s\n", ERR_READING_KEY); + return 1; + } + + mysql_init(&(sdata.mysql)); + mysql_options(&(sdata.mysql), MYSQL_OPT_CONNECT_TIMEOUT, (const char*)&cfg.mysql_connect_timeout); + if(mysql_real_connect(&(sdata.mysql), cfg.mysqlhost, cfg.mysqluser, cfg.mysqlpwd, cfg.mysqldb, cfg.mysqlport, cfg.mysqlsocket, 0) == 0){ + printf("cant connect to mysql server\n"); + return 0; + } + + mysql_real_query(&(sdata.mysql), "SET NAMES utf8", strlen("SET NAMES utf8")); + mysql_real_query(&(sdata.mysql), "SET CHARACTER SET utf8", strlen("SET CHARACTER SET utf8")); + + printf("locale: %s\n", setlocale(LC_MESSAGES, cfg.locale)); + setlocale(LC_CTYPE, cfg.locale); + + data.rules = NULL; + + load_archiving_rules(&sdata, &(data.rules)); + + + + rc = import_message(argv[1], &sdata, &data, &cfg); + + + + free_rule(data.rules); + + mysql_close(&(sdata.mysql)); + + return 0; +} + + diff --git a/src/session.c b/src/session.c index 86479659..582cbf87 100644 --- a/src/session.c +++ b/src/session.c @@ -502,8 +502,7 @@ void initSessionData(struct session_data *sdata){ memset(sdata->mailfrom, 0, SMALLBUFSIZE); snprintf(sdata->client_addr, SMALLBUFSIZE-1, "null"); - memset(sdata->whitelist, 0, MAXBUFSIZE); - memset(sdata->blacklist, 0, MAXBUFSIZE); + memset(sdata->attachments, 0, SMALLBUFSIZE); sdata->restored_copy = 0; diff --git a/src/test.c b/src/test.c index 9d4e716d..250ac051 100644 --- a/src/test.c +++ b/src/test.c @@ -62,6 +62,7 @@ int main(int argc, char **argv){ sdata.sent = 0; sdata.tot_len = st.st_size; memset(sdata.rcptto[0], 0, SMALLBUFSIZE); + memset(sdata.attachments, 0, SMALLBUFSIZE); snprintf(sdata.ttmpfile, SMALLBUFSIZE-1, "%s", argv[1]); snprintf(sdata.tmpframe, SMALLBUFSIZE-1, "%s.m", argv[1]); @@ -90,6 +91,7 @@ int main(int argc, char **argv){ printf("i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest); } + printf("attachments:%s\n", sdata.attachments); printf("\n\n"); diff --git a/util/db-mysql.sql b/util/db-mysql.sql index 7031420e..569e8190 100644 --- a/util/db-mysql.sql +++ b/util/db-mysql.sql @@ -1,4 +1,4 @@ -create database `piler` character set 'utf8'; +create database if not exists `piler` character set 'utf8'; use `piler`; @@ -21,6 +21,7 @@ create table if not exists `sph_index` ( `body` text, `size` int default '0', `attachments` int default 0, + `attachment_types` text(512) default null, primary key (`id`) ) Engine=InnoDB; @@ -121,11 +122,11 @@ drop table if exists `counter`; create table if not exists `counter` ( `rcvd` bigint unsigned default 0, `virus` bigint unsigned default 0, - `duplicate` bigint unsigned default 0 + `duplicate` bigint unsigned default 0, `ignore` bigint unsigned default 0 ) Engine=InnoDB; -insert into `counter` values(0, 0, 0); +insert into `counter` values(0, 0, 0, 0); drop table if exists `search`;