From 29f312a8d2b5dcb6262fb04df624d06fad38b798 Mon Sep 17 00:00:00 2001 From: SJ Date: Wed, 16 Nov 2011 14:47:47 +0100 Subject: [PATCH] release of v0.1.3 --- configure | 2 +- configure.in | 2 +- src/Makefile | 82 ------- src/boundary.c | 98 --------- src/boundary.h | 14 -- src/config.h | 17 +- src/defs.h | 37 +--- src/digest.c | 15 +- src/ijc.h | 27 --- src/list.c | 57 ++--- src/list.h | 9 +- src/message.c | 6 +- src/misc.c | 68 ------ src/parser.c | 255 ++++++++-------------- src/parser.h | 23 +- src/parser_utils.c | 520 ++++++++++----------------------------------- src/piler.c | 3 - src/piler.h | 5 +- src/session.c | 7 +- src/test.c | 26 ++- 20 files changed, 293 insertions(+), 980 deletions(-) delete mode 100644 src/Makefile delete mode 100644 src/boundary.c delete mode 100644 src/boundary.h delete mode 100644 src/ijc.h diff --git a/configure b/configure index c4153b3d..1f1fafae 100755 --- a/configure +++ b/configure @@ -4285,7 +4285,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs $sqlite3_libs" -OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o boundary.o parser.o parser_utils.o session.o message.o digest.o store.o tai.o $objs" +OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o session.o message.o digest.o store.o tai.o $objs" ac_config_files="$ac_config_files Makefile src/Makefile" diff --git a/configure.in b/configure.in index 7a73a506..71fbe20d 100644 --- a/configure.in +++ b/configure.in @@ -317,7 +317,7 @@ echo; echo CFLAGS="$static -O2 -Wall -g" LIBS="$antispam_libs $sunos_libs $sqlite3_libs" -OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o boundary.o parser.o parser_utils.o session.o message.o digest.o store.o tai.o $objs" +OBJS="dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o parser.o parser_utils.o session.o message.o digest.o store.o tai.o $objs" AC_CONFIG_FILES([Makefile src/Makefile]) AC_OUTPUT diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 455e365e..00000000 --- a/src/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -SHELL = /bin/sh - -prefix = /usr/local -exec_prefix = ${prefix} -bindir = ${exec_prefix}/bin -sbindir = ${exec_prefix}/sbin -includedir = ${prefix}/include -libdir = ${exec_prefix}/lib -libexecdir = ${exec_prefix}/libexec -srcdir = . -sysconfdir = ${prefix}/etc -mandir = ${datarootdir}/man -datarootdir = ${prefix}/share -localstatedir = /var - -CC = gcc -CFLAGS = -O2 -Wall -g -DEFS = -D_GNU_SOURCE -DHAVE_ANTIVIRUS -DHAVE_CLAMD -DHAVE_TRE -DNEED_MYSQL -DNEED_SQLITE3 -INCDIR = -I. -I.. -I/usr/local/mysql/include -fPIC -g -static-libgcc -fno-omit-frame-pointer -m32 -fPIC -g -static-libgcc -fno-omit-frame-pointer -fno-strict-aliasing -DMY_PTHREAD_FASTMUTEX=1 -LIBDIR = -L. -LIBS = -lz -lm -ldl -lcrypto -ltre -lsqlite3 -lpthread -L/usr/local/mysql/lib -lmysqlclient_r -lpthread -lm -lrt -ldl -lguide -OBJS = dirs.o misc.o counters.o cfg.o sig.o decoder.o list.o boundary.o parser.o parser_utils.o session.o message.o digest.o store.o tai.o avir.o clamd.o -MYSQL_OBJS = -RUNNING_USER = piler -RUNNING_GROUP = `id -gn $(RUNNING_USER)` - -PILER_VERSION=0 -PILER_REVISION=1 -PILER_RELEASE=1 -LIBPILER_VERSION=$(PILER_VERSION).$(PILER_REVISION).$(PILER_RELEASE) - -MAKE = `which make` - -INSTALL = /bin/ginstall -c - -all: libpiler.a piler pilerconf test -install: install-piler - - -piler: piler.c libpiler.a - $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ piler.c -lpiler $(LIBS) $(LDAP_LIBS) $(LIBDIR) - -libpiler.a: $(OBJS) $(MYSQL_OBJS) - ar cr libpiler.a $(OBJS) $(MYSQL_OBJS) - ranlib libpiler.a - $(CC) -shared -Wl -o libpiler.so.$(LIBPILER_VERSION) $(OBJS) $(MYSQL_OBJS) $(LIBS) $(LDAP_LIBS) - ln -sf libpiler.so.$(LIBPILER_VERSION) libpiler.so - ln -sf libpiler.so.$(LIBPILER_VERSION) libpiler.so.$(PILER_VERSION) - - -pilerconf: pilerconf.c cfg.o misc.o tai.o - $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o $@ $^ $(LIBS) $(LIBDIR) - -test: - $(CC) $(CFLAGS) $(INCDIR) $(DEFS) -o piletest $(srcdir)/test.c -lpiler $(LIBS) $(LDAP_LIBS) $(LIBDIR) - -%.o: $(srcdir)/%.c - $(CC) $(CFLAGS) -fPIC $(INCDIR) $(DEFS) -c $< -o $@ - - -install-piler: - $(INSTALL) -d $(DESTDIR)$(bindir) - $(INSTALL) -d $(DESTDIR)$(sbindir) - $(INSTALL) -d $(DESTDIR)$(libdir) - $(INSTALL) -m 0644 libpiler.a $(DESTDIR)$(libdir) - $(INSTALL) -m 0755 libpiler.so.$(LIBPILER_VERSION) $(DESTDIR)$(libdir) - (cd $(DESTDIR)$(libdir) && ln -sf libpiler.so.$(LIBPILER_VERSION) libpiler.so) - (cd $(DESTDIR)$(libdir) && ln -sf libpiler.so.$(LIBPILER_VERSION) libpiler.so.$(PILER_VERSION)) - - $(INSTALL) -d $(DESTDIR)$(libexecdir)/piler - - $(INSTALL) -d $(DESTDIR)$(datarootdir)/piler - - $(INSTALL) -m 0755 piler $(DESTDIR)$(sbindir) - $(INSTALL) -m 0755 pilerconf $(DESTDIR)$(sbindir) - -clean: - rm -f *.o *.a libpiler.so* piler pilerconf piletest - -distclean: clean - rm -f Makefile - diff --git a/src/boundary.c b/src/boundary.c deleted file mode 100644 index a13e7f4c..00000000 --- a/src/boundary.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * boundary.c, SJ - */ - -#include -#include -#include -#include -#include - - -/* - * append something to the boundary list if we have to - */ - -int append_boundary(struct boundary **boundaries, char *p){ - struct boundary *q, *t, *u=NULL; - - q = *boundaries; - - while(q){ - if(strcmp(q->boundary_str, p) == 0) - return 0; - - u = q; - q = q->r; - } - - t = new_boundary(p); - if(t){ - if(*boundaries == NULL) - *boundaries = t; - else if(u) - u->r = t; - - return 1; - } - - return -1; -} - - - -/* - * create a new boundary structure - */ - -struct boundary *new_boundary(char *s){ - struct boundary *h=NULL; - - if((h = malloc(sizeof(struct boundary))) == NULL) - return NULL; - - strncpy(h->boundary_str, s, BOUNDARY_LEN-1); - h->r = NULL; - - return h; -} - - -/* - * is this a boundary string? - */ - -int is_boundary(struct boundary *boundaries, char *s){ - struct boundary *p; - - p = boundaries; - - while(p != NULL){ - if(strstr(s, p->boundary_str)) return 1; - p = p->r; - } - - return 0; -} - - -/* - * free boundary list - */ - -void free_boundary(struct boundary *b){ - struct boundary *p; - - while(b){ - p = b->r; - - //printf("free boundary: %s\n", b->boundary_str); - if(b) - free(b); - - b = p; - } -} - - - diff --git a/src/boundary.h b/src/boundary.h deleted file mode 100644 index 42c0d5c7..00000000 --- a/src/boundary.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * boundary.h, SJ - */ - -#ifndef _BOUNDARY_H - #define _BOUNDARY_H - -int append_boundary(struct boundary **boundaries, char *p); -struct boundary *new_boundary(char *s); -int is_boundary(struct boundary *boundaries, char *s); -void free_boundary(struct boundary *b); - -#endif /* _LIST_H */ - diff --git a/src/config.h b/src/config.h index 7c068180..00d0261c 100644 --- a/src/config.h +++ b/src/config.h @@ -11,7 +11,7 @@ #define PROGNAME "piler" -#define VERSION "0.1.2" +#define VERSION "0.1.3" #define PROGINFO VERSION ", Janos SUTO \n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n" @@ -19,7 +19,7 @@ #define CONFIG_FILE CONFDIR "/piler.conf" #define WORK_DIR DATADIR "/spool/piler/tmp" -#define QUEUE_DIR DATADIR "/spool/piler/new" +#define QUEUE_DIR DATADIR "/piler/new" #define DEFER_DIR DATADIR "/spool/piler/deferred" #define CLAMD_SOCKET "/tmp/clamd" @@ -32,6 +32,7 @@ #define MAXBUFSIZE 8192 #define SMALLBUFSIZE 512 #define BIGBUFSIZE 65535 +#define TINYBUFSIZE 128 #define MAXVAL 256 #define RANDOM_POOL "/dev/urandom" #define RND_STR_LEN 36 @@ -61,22 +62,12 @@ #define MAX_RCPT_TO 128 -#ifdef HAVE_SQLITE3 - #define MAX_KEY_VAL 9223372036854775807ULL -#else - #define MAX_KEY_VAL 18446744073709551615ULL -#endif - - #define MIN_WORD_LEN 3 #define MAX_WORD_LEN 25 #define MAX_TOKEN_LEN 4*MAX_WORD_LEN -#define URL_LEN 48 #define DELIMITER ' ' -#define SPAMINESS_HEADER_FIELD "X-Clapf-spamicity: " #define BOUNDARY_LEN 255 -#define JUNK_REPLACEMENT_CHAR 'j' -#define MAX_ATTACHMENTS 8 +#define MAX_ATTACHMENTS 16 /* SQL stuff */ diff --git a/src/defs.h b/src/defs.h index b4975753..16f16516 100644 --- a/src/defs.h +++ b/src/defs.h @@ -52,26 +52,22 @@ struct attachment { int size; - char type[SMALLBUFSIZE]; - char filename[SMALLBUFSIZE]; + char type[TINYBUFSIZE]; + char filename[TINYBUFSIZE]; }; -struct url { - char url_str[URL_LEN]; - struct url *r; +struct list { + char s[SMALLBUFSIZE]; + struct list *r; }; -struct boundary { - char boundary_str[BOUNDARY_LEN]; - struct boundary *r; -}; struct _state { + int line_num; int message_state; int is_header; int textplain; int texthtml; - int octetstream; int message_rfc822; int base64; int has_base64; @@ -80,33 +76,20 @@ struct _state { int htmltag; int style; int skip_html; - int ipcnt; int has_to_dump; int fd; - int num_of_msword; - int num_of_images; + int octetstream; int realbinary; int content_type_is_set; - int train_mode; - unsigned long c_shit; - unsigned long l_shit; - unsigned long line_num; - char ip[SMALLBUFSIZE]; - char hostname[SMALLBUFSIZE]; - char miscbuf[MAX_TOKEN_LEN]; - char qpbuf[MAX_TOKEN_LEN]; char attachedfile[RND_STR_LEN+SMALLBUFSIZE]; - char from[SMALLBUFSIZE]; char message_id[SMALLBUFSIZE]; + char miscbuf[MAX_TOKEN_LEN]; unsigned long n_token; unsigned long n_subject_token; unsigned long n_body_token; unsigned long n_chain_token; - struct url *urls; - int found_our_signo; - - struct boundary *boundaries; + struct list *boundaries; int n_attachments; struct attachment attachments[MAX_ATTACHMENTS]; @@ -123,8 +106,6 @@ struct session_data { int fd, hdr_len, tot_len, num_of_rcpt_to, rav; int need_scan; float __acquire, __parsed, __av, __store, __compress, __encrypt; - SHA256_CTX context; - unsigned char md[DIGEST_LENGTH]; char bodydigest[2*DIGEST_LENGTH+1]; time_t now, sent; #ifdef NEED_MYSQL diff --git a/src/digest.c b/src/digest.c index 618c2cf2..456a8b83 100644 --- a/src/digest.c +++ b/src/digest.c @@ -14,13 +14,16 @@ #include -int make_body_digest(struct session_data *sdata){ +int make_body_digest(struct session_data *sdata, struct __config *cfg){ int i=0, n, fd; char *p, *body=NULL; - unsigned char buf[MAXBUFSIZE]; + unsigned char buf[MAXBUFSIZE], md[DIGEST_LENGTH]; + SHA256_CTX context; + + if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: digesting", sdata->ttmpfile); memset(sdata->bodydigest, 0, 2*DIGEST_LENGTH+1); - SHA256_Init(&(sdata->context)); + SHA256_Init(&context); fd = open(sdata->ttmpfile, O_RDONLY); if(fd == -1) return -1; @@ -43,16 +46,16 @@ int make_body_digest(struct session_data *sdata){ } } - SHA256_Update(&(sdata->context), body, n); + SHA256_Update(&context, body, n); } close(fd); - SHA256_Final(sdata->md, &(sdata->context)); + SHA256_Final(md, &context); for(i=0;ibodydigest + i*2, 2*DIGEST_LENGTH, "%02x", sdata->md[i]); + snprintf(sdata->bodydigest + i*2, 2*DIGEST_LENGTH, "%02x", md[i]); return 0; } diff --git a/src/ijc.h b/src/ijc.h deleted file mode 100644 index a51d88d6..00000000 --- a/src/ijc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * ijc.h, SJ - */ - -static char invalid_junk_characters[] = { - - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ','', ' ','',' ',' ', - 'x',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', ' ',' ',' ',' ', - ' ','¡','¢',' ', '¤','¥','¦',' ', '¨',' ','ª','«', '¬','­','®','¯', - '°','±','²','³', ' ','µ','¶',' ', '¸','¹','º','»', ' ','½','¾','¿', - - ' ',' ','Â','Ã', 'Ä','Å','Æ','Ç', 'È',' ',' ','Ë', 'Ì',' ',' ','Ï', - 'Ð','Ñ','Ò',' ', 'Ô',' ',' ','×', 'Ø',' ',' ',' ', ' ','Ý',' ',' ', - 'à',' ',' ','ã', ' ','å','æ','ç', 'è',' ','ê','ë', 'ì',' ','î',' ', - 'ð','ñ','ò',' ', 'ô',' ',' ',' ', ' ','ù',' ',' ', ' ','ý',' ','ÿ' -}; - diff --git a/src/list.c b/src/list.c index cb9dfd33..74a7b81c 100644 --- a/src/list.c +++ b/src/list.c @@ -10,23 +10,23 @@ #include "config.h" -int append_list(struct url **urls, char *p){ - struct url *q, *t, *u=NULL; +int append_list(struct list **list, char *p){ + struct list *q, *t, *u=NULL; - q = *urls; + q = *list; while(q){ - if(strcmp(q->url_str, p) == 0) + if(strcmp(q->s, p) == 0) return 0; u = q; q = q->r; } - t = createListItem(p); + t = create_list_item(p); if(t){ - if(*urls == NULL) - *urls = t; + if(*list == NULL) + *list = t; else if(u) u->r = t; @@ -37,42 +37,51 @@ int append_list(struct url **urls, char *p){ } -struct url *createListItem(char *s){ - struct url *h=NULL; +struct list *create_list_item(char *s){ + struct list *h=NULL; - if((h = malloc(sizeof(struct url))) == NULL) + if((h = malloc(sizeof(struct list))) == NULL) return NULL; - strncpy(h->url_str, s, URL_LEN-1); + snprintf(h->s, SMALLBUFSIZE-1, "%s", s); h->r = NULL; return h; } -int isOnList(struct url *u, char *item){ - struct url *p, *q; +int is_string_on_list(struct list *list, char *s){ + struct list *p; - p = u; + p = list; while(p != NULL){ - q = p->r; - - if(p){ - if(strcmp(p->url_str, item) == 0) return 1; - } - - p = q; + if(strcmp(p->s, s) == 0) return 1; + p = p->r; } return 0; } -void freeList(struct url *u){ - struct url *p, *q; +int is_item_on_string(struct list *list, char *s){ + struct list *p; - p = u; + p = list; + + while(p != NULL){ + if(strstr(s, p->s)) return 1; + p = p->r; + } + + return 0; +} + + +void free_list(struct list *list){ + struct list *p, *q; + + p = list; while(p != NULL){ q = p->r; diff --git a/src/list.h b/src/list.h index c2c9d12f..6e0ab6d7 100644 --- a/src/list.h +++ b/src/list.h @@ -7,10 +7,11 @@ #include "defs.h" -int append_list(struct url **urls, char *p); -struct url *createListItem(char *s); -int isOnList(struct url *u, char *item); -void freeList(struct url *u); +int append_list(struct list **list, char *p); +struct list *create_list_item(char *s); +int is_string_on_list(struct list *list, char *s); +int is_item_on_string(struct list *list, char *s); +void free_list(struct list *list); #endif /* _LIST_H */ diff --git a/src/message.c b/src/message.c index 9b2709b1..7fc2b7f5 100644 --- a/src/message.c +++ b/src/message.c @@ -133,7 +133,7 @@ int hand_to_sphinx(struct session_data *sdata, struct _state *state, struct __co int store_meta_data(struct session_data *sdata, struct _state *state, struct __config *cfg){ int i=0, rc, ret=ERR; char *p, s[MAXBUFSIZE], s2[SMALLBUFSIZE]; - struct url *list = NULL; + struct list *list = NULL; MYSQL_STMT *stmt; MYSQL_BIND bind[4]; @@ -168,7 +168,7 @@ int store_meta_data(struct session_data *sdata, struct _state *state, struct __c if(strlen(s2) > 5){ LABEL1: - if(isOnList(list, s2) == 1) continue; + if(is_string_on_list(list, s2) == 1) continue; append_list(&list, s2); i++; @@ -222,7 +222,7 @@ LABEL1: ENDE_META: - freeList(list); + free_list(list); return ret; } diff --git a/src/misc.c b/src/misc.c index 1dfb4d6c..77b889c5 100644 --- a/src/misc.c +++ b/src/misc.c @@ -192,10 +192,6 @@ void trimBuffer(char *s){ } -/* - * extract email - */ - int extractEmail(char *rawmail, char *email){ char *p; @@ -213,10 +209,6 @@ int extractEmail(char *rawmail, char *email){ } -/* - * create an ID - */ - void create_id(char *id){ int i; unsigned char buf[RND_STR_LEN/2]; @@ -306,28 +298,6 @@ int recvtimeout(int s, char *buf, int len, int timeout){ } -/* - * check if it's a valid ID - */ - -int isValidClapfID(char *p){ - - if(strlen(p) != 30 && strlen(p) != 31) - return 0; - - for(; *p; p++){ - /* 0-9: 0x30-0x39, a-f: 0x61-0x66 */ - - if(! ((*p >= 0x30 && *p <= 0x39) || (*p >= 0x61 && *p <= 0x66) || *p == 0x0d) ){ - //printf("%c*\n", *p); - return 0; - } - } - - return 1; -} - - /* * is it a valid dotted IPv4 address */ @@ -341,44 +311,6 @@ int isDottedIPv4Address(char *s){ } -/* - * whitelist check - */ - -int isEmailAddressOnList(char *list, char *tmpfile, char *email, struct __config *cfg){ - char *p, *q, w[SMALLBUFSIZE]; - - if(email == NULL) return 0; - - p = list; - - if(cfg->verbosity >= _LOG_INFO) syslog(LOG_PRIORITY, "%s: list: %s", tmpfile, list); - - do { - p = split(p, '\n', w, SMALLBUFSIZE-1); - - trimBuffer(w); - - if(strlen(w) > 2){ - - if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: matching '%s' on '%s'", tmpfile, w, email); - - if(w[strlen(w)-1] == '$'){ - q = email + strlen(email) - strlen(w) + 1; - if(strncasecmp(q, w, strlen(w)-1) == 0) - return 1; - } - else if(strcasestr(email, w)) - return 1; - - } - - } while(p); - - return 0; -} - - void write_pid_file(char *pidfile){ FILE *f; diff --git a/src/parser.c b/src/parser.c index da2f25c6..d4e3a19b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,12 +16,13 @@ #include -struct _state parseMessage(struct session_data *sdata, struct __config *cfg){ +struct _state parse_message(struct session_data *sdata, struct __config *cfg){ FILE *f; char buf[MAXBUFSIZE]; struct _state state; + int len; - initState(&state); + init_state(&state); f = fopen(sdata->ttmpfile, "r"); if(!f){ @@ -30,20 +31,21 @@ struct _state parseMessage(struct session_data *sdata, struct __config *cfg){ } while(fgets(buf, MAXBUFSIZE-1, f)){ - parseLine(buf, &state, sdata, cfg); + parse_line(buf, &state, sdata, cfg); } + fclose(f); - free_boundary(state.boundaries); + free_list(state.boundaries); if(state.message_id[0] == 0) snprintf(state.message_id, SMALLBUFSIZE-1, "null"); - if(state.b_from[strlen(state.b_from)-1] == ' ') state.b_from[strlen(state.b_from)-1] = '\0'; - if(state.b_to[strlen(state.b_to)-1] == ' ') state.b_to[strlen(state.b_to)-1] = '\0'; - if(state.b_subject[strlen(state.b_subject)-1] == ' ') state.b_subject[strlen(state.b_subject)-1] = '\0'; + len = strlen(state.b_from); + if(state.b_from[len-1] == ' ') state.b_from[len-1] = '\0'; - make_body_digest(sdata); + len = strlen(state.b_to); + if(state.b_to[len-1] == ' ') state.b_to[len-1] = '\0'; syslog(LOG_PRIORITY, "%s: from=%s, to=%s, subj=%s, message-id=%s", sdata->ttmpfile, state.b_from, state.b_to, state.b_subject, state.message_id); @@ -51,11 +53,10 @@ struct _state parseMessage(struct session_data *sdata, struct __config *cfg){ } -int parseLine(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg){ - char *p, *q, puf[MAXBUFSIZE], muf[MAXBUFSIZE], u[SMALLBUFSIZE], token[MAX_TOKEN_LEN]; - int x, b64_len, boundary_line=0; +int parse_line(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg){ + char *p, *r, puf[SMALLBUFSIZE]; + int x, len, b64_len, boundary_line=0; - memset(token, 0, MAX_TOKEN_LEN); state->line_num++; @@ -74,14 +75,12 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc return 0; } - trimBuffer(buf); /* skip the first line, if it's a "From date" format */ if(state->line_num == 1 && strncmp(buf, "From ", 5) == 0) return 0; - if(state->is_header == 0 && buf[0] != ' ' && buf[0] != '\t') state->message_state = MSG_BODY; if((state->content_type_is_set == 0 || state->is_header == 1) && strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0) state->message_state = MSG_CONTENT_TYPE; @@ -96,20 +95,13 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc if(state->is_header == 1){ - if(strncasecmp(buf, "Received: from ", strlen("Received: from ")) == 0) state->message_state = MSG_RECEIVED; - else if(strncasecmp(buf, "From:", strlen("From:")) == 0) state->message_state = MSG_FROM; + if(strncasecmp(buf, "From:", strlen("From:")) == 0) state->message_state = MSG_FROM; else if(strncasecmp(buf, "To:", 3) == 0) state->message_state = MSG_TO; else if(strncasecmp(buf, "Cc:", 3) == 0) state->message_state = MSG_CC; else if(strncasecmp(buf, "Message-Id:", 11) == 0) state->message_state = MSG_MESSAGE_ID; else if(strncasecmp(buf, "Subject:", strlen("Subject:")) == 0) state->message_state = MSG_SUBJECT; else if(strncasecmp(buf, "Date:", strlen("Date:")) == 0 && sdata->sent == 0) sdata->sent = parse_date_header(buf); - if(state->message_state == MSG_SUBJECT){ - p = &buf[0]; - if(strncmp(buf, "Subject:", strlen("Subject:")) == 0) p = &buf[8]; - if(*p == ' ') p++; - } - if(state->message_state == MSG_MESSAGE_ID && state->message_id[0] == 0){ p = strchr(buf+11, ' '); if(p) p = buf + 12; @@ -118,27 +110,17 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc snprintf(state->message_id, SMALLBUFSIZE-1, "%s", p); } - if(state->message_state == MSG_FROM){ - p = strchr(buf+5, ' '); - if(p) p = buf + 6; - else p = buf + 5; - - snprintf(state->from, SMALLBUFSIZE-1, "FROM*%s", p); - } - - /* we are interested in only From:, To:, Subject:, Received:, Content-*: header lines */ if(state->message_state <= 0) return 0; } - - if((p = strcasestr(buf, "boundary"))){ x = extract_boundary(p, state); } + /* Content-type: checking */ if(state->message_state == MSG_CONTENT_TYPE){ @@ -150,16 +132,13 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc if(p){ p++; if(*p == ' ' || *p == '\t') p++; - snprintf(state->attachments[state->n_attachments].type, SMALLBUFSIZE-1, "%s", p); + snprintf(state->attachments[state->n_attachments].type, TINYBUFSIZE-1, "%s", p); state->content_type_is_set = 1; p = strchr(state->attachments[state->n_attachments].type, ';'); if(p) *p = '\0'; } - p = strstr(buf, "name="); - if(p){ - snprintf(state->attachments[state->n_attachments].filename, SMALLBUFSIZE-1, "%s", p); - } + extractNameFromHeaderLine(buf, "name", state->attachments[state->n_attachments].filename); if(strcasestr(buf, "text/plain") || @@ -171,15 +150,13 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc strcasestr(buf, "message/rfc822") || strcasestr(buf, "application/ms-tnef") ){ - - state->textplain = 1; + state->textplain = 1; } else if(strcasestr(buf, "text/html")){ - state->texthtml = 1; + state->texthtml = 1; } - /* switch (back) to header mode if we encounterd an attachment with - "message/rfc822" content-type, 2010.05.16, SJ */ + /* switch (back) to header mode if we encounterd an attachment with "message/rfc822" content-type */ if(strcasestr(buf, "message/rfc822")){ state->message_rfc822 = 1; @@ -187,30 +164,20 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc } - if(strcasestr(buf, "application/octet-stream")) state->octetstream = 1; - if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1; - extractNameFromHeaderLine(buf, "name", state->attachments[state->n_attachments].filename); - /*if(strlen(state->attachments[state->n_attachments].filename) > 5){ + if(strlen(state->attachments[state->n_attachments].filename) > 4){ state->has_to_dump = 1; - snprintf(u, sizeof(u)-1, "%s.%d", sdata->ttmpfile, state->n_attachments); - state->fd = open(u, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - }*/ + snprintf(puf, sizeof(puf)-1, "%s.%d", sdata->ttmpfile, state->n_attachments); + printf("dump file: %s\n", puf); + + //state->fd = open(u, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + } } - if(state->message_state == MSG_CONTENT_DISPOSITION && state->attachments[state->n_attachments].filename[0] == 0) - extractNameFromHeaderLine(buf, "name", state->attachments[state->n_attachments].filename); - - - if(state->message_state > 0 && state->message_state <= MSG_SUBJECT && state->message_rfc822 == 1) state->message_rfc822 = 0; - - - /* check for textual base64 encoded part, 2005.03.25, SJ */ - if(state->message_state == MSG_CONTENT_TRANSFER_ENCODING){ if(strcasestr(buf, "base64")){ @@ -219,30 +186,30 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc } if(strcasestr(buf, "quoted-printable")) state->qp = 1; - - - if(strcasestr(buf, "image")) - state->num_of_images++; - - if(strcasestr(buf, "msword")) - state->num_of_msword++; } + /* skip the boundary itself */ - boundary_line = is_boundary(state->boundaries, buf); + boundary_line = is_item_on_string(state->boundaries, buf); if(!strstr(buf, "boundary=") && !strstr(buf, "boundary =") && boundary_line == 1){ state->content_type_is_set = 0; - /*if(state->has_to_dump == 1){ + if(state->has_to_dump == 1){ if(state->fd != -1) close(state->fd); state->fd = -1; - }*/ + } + if(state->n_attachments < MAX_ATTACHMENTS-1) state->n_attachments++; + /* use the previous attachment slot if it was not an attached file */ + if(state->n_attachments > 0 && strlen(state->attachments[state->n_attachments-1].filename) < 5){ + state->n_attachments--; + } + state->has_to_dump = 0; state->base64 = 0; state->textplain = 0; state->texthtml = state->octetstream = 0; @@ -261,68 +228,35 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc /* end of boundary check */ - /* skip non textual stuff */ + + if(state->is_header == 1){ + /* skip irrelevant headers */ + if(state->message_state != MSG_SUBJECT && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0; + + if(state->message_state == MSG_SUBJECT) fixupEncodedHeaderLine(buf); + } + if(state->message_state == MSG_BODY){ - /*if(state->has_to_dump == 1 && state->fd != -1){ - write(state->fd, buf, strlen(buf)); - }*/ - if(state->base64 == 1) state->attachments[state->n_attachments].size += strlen(buf) / BASE64_RATIO; - else state->attachments[state->n_attachments].size += strlen(buf); + if(state->has_to_dump == 1){ + //printf("dumping: %s *%s*\n", state->attachments[state->n_attachments].filename, buf); + state->attachments[state->n_attachments].size += strlen(buf); + } + + /* don't process body if it's not a text or html part */ + if(state->textplain == 0 && state->texthtml == 0) return 0; } - if(state->message_state == MSG_BODY && strlen(buf) < 2) return 0; - - - /* - * sometimes spammers screw up their junk messages, and - * use "application/octet-stream" type for textual parts. - * Now clapf checks whether the attachment is really - * binary. If it has no non-printable characters in a - * base64 encoded line, then let's tokenize it. - * - * Note: in this case we cannot expect fully compliant - * message part. However this case should be very rare - * since legitim messages use proper mime types. - * - * 2010.10.23, SJ - */ - - if(state->message_state == MSG_BODY && state->realbinary == 0 && state->octetstream == 1){ - snprintf(puf, MAXBUFSIZE-1, "%s", buf); - if(state->base64 == 1) decodeBase64(puf); - if(state->qp == 1) decodeQP(puf); - state->realbinary += countNonPrintableCharacters(puf); + if(state->base64 == 1 && state->message_state == MSG_BODY){ + b64_len = decodeBase64(buf); + fixupBase64EncodedLine(buf, state); } - if(state->is_header == 0 && state->textplain == 0 && state->texthtml == 0 && (state->message_state == MSG_BODY || state->message_state == MSG_CONTENT_DISPOSITION) && (state->octetstream == 0 || state->realbinary > 0) ) return 0; - - - /* base64 decode buffer */ - - if(state->base64 == 1 && state->message_state == MSG_BODY) b64_len = decodeBase64(buf); - - - /* fix encoded From:, To: and Subject: lines, 2008.11.24, SJ */ - - if(state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC || state->message_state == MSG_SUBJECT) fixupEncodedHeaderLine(buf); - - - /* fix soft breaks with quoted-printable decoded stuff, 2006.03.01, SJ */ - - if(state->qp == 1) fixupSoftBreakInQuotedPritableLine(buf, state); - - - /* fix base64 stuff if the line does not end with a line break, 2006.03.01, SJ */ - - if(state->base64 == 1 && state->message_state == MSG_BODY) fixupBase64EncodedLine(buf, state); - if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state); - if(state->message_state == MSG_BODY){ if(state->qp == 1) decodeQP(buf); if(state->utf8 == 1) decodeUTF8(buf); @@ -333,7 +267,6 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc if(state->texthtml == 1) decodeHTML(buf); - translateLine((unsigned char*)buf, state); reassembleToken(buf); @@ -342,62 +275,48 @@ int parseLine(char *buf, struct _state *state, struct session_data *sdata, struc if(state->is_header == 1) p = strchr(buf, ' '); else p = buf; - - //if(strlen(buf) > 3) printf("%d original: %s\n", state->message_state, buf); - - do { - p = split(p, DELIMITER, puf, MAXBUFSIZE-1); + memset(puf, 0, sizeof(puf)); + p = split(p, ' ', puf, sizeof(puf)-1); - if(strcasestr(puf, "http://") || strcasestr(puf, "https://")){ - q = puf; - do { - q = split_str(q, "http://", u, SMALLBUFSIZE-1); - - if(u[strlen(u)-1] == '.') u[strlen(u)-1] = '\0'; - - if(strlen(u) > 2 && strncasecmp(u, "www.w3.org", 10) && strchr(u, '.') ){ - - snprintf(muf, MAXBUFSIZE-1, "http://%s", u); - fixURL(muf); - - strncat(muf, " ", MAXBUFSIZE-1); - strncat(state->b_body, muf, BIGBUFSIZE-1); - - } - } while(q); - - continue; - } - - - if(state->message_state != MSG_SUBJECT && (strlen(puf) < MIN_WORD_LEN || (strlen(puf) > MAX_WORD_LEN && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) || isHexNumber(puf))) - continue; - - if(strlen(puf) < 2 || strncmp(puf, "HTML*", 5) == 0) continue; - - if(state->message_state == MSG_CONTENT_TYPE && strncmp(puf, "content-type", 12) == 0) continue; - if(state->message_state == MSG_CONTENT_DISPOSITION && strncmp(puf, "content-disposition", 19) == 0) continue; - if(state->message_state == MSG_CONTENT_TRANSFER_ENCODING && strncmp(puf, "content-transfer-encoding", 25) == 0) continue; + if(puf[0] == '\0') continue; degenerateToken((unsigned char*)puf); - strncat(puf, " ", MAXBUFSIZE-1); + if(puf[0] == '\0') continue; - if(state->message_state == MSG_SUBJECT) - strncat(state->b_subject, puf, MAXBUFSIZE-1); - else if(state->message_state == MSG_FROM && strchr(puf, '@')) - strncat(state->b_from, puf, SMALLBUFSIZE-1); - else if(state->message_state == MSG_TO && strchr(puf, '@')) - strncat(state->b_to, puf, SMALLBUFSIZE-1); - else if(state->message_state == MSG_CC && strchr(puf, '@')) - strncat(state->b_to, puf, SMALLBUFSIZE-1); - else if(state->is_header == 0) - strncat(state->b_body, puf, BIGBUFSIZE-1); + if(state->message_state == MSG_SUBJECT){ + r = &puf[0]; for(; *r; r++){ if(*r == '_') *r = ' '; } + } + + if(state->qp == 1 && puf[strlen(puf)-1] == '='){ + puf[strlen(puf)-1] = '\0'; + } + else if(state->message_state != MSG_SUBJECT || (p && strchr(p, ' ')) ){ + strncat(puf, " ", sizeof(puf)-1); + } + + if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf); + + if(state->is_header == 0 && strncmp(puf, "URL*", 4) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue; + + + len = strlen(puf); + + if(state->message_state == MSG_SUBJECT && strlen(state->b_subject) < MAXBUFSIZE-len-1) + memcpy(&(state->b_subject[strlen(state->b_subject)]), puf, len); + + else if(state->message_state == MSG_FROM && strchr(puf, '@') && strlen(state->b_from) < SMALLBUFSIZE-len-1) + memcpy(&(state->b_from[strlen(state->b_from)]), puf, len); + + else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1) + memcpy(&(state->b_to[strlen(state->b_to)]), puf, len); + + else if(state->message_state == MSG_BODY && strlen(state->b_body) < BIGBUFSIZE-len-1) + memcpy(&(state->b_body[strlen(state->b_body)]), puf, len); } while(p); - return 0; } diff --git a/src/parser.h b/src/parser.h index e2990779..e4ffd47d 100644 --- a/src/parser.h +++ b/src/parser.h @@ -9,33 +9,22 @@ #include "config.h" #include "defs.h" -struct _state parseMessage(struct session_data *sdata, struct __config *cfg); -struct _state parseBuffer(struct session_data *sdata, struct __config *cfg); -int parseLine(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg); +struct _state parse_message(struct session_data *sdata, struct __config *cfg); +int parse_line(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg); -void initState(struct _state *state); -void freeState(struct _state *state); +void init_state(struct _state *state); +unsigned long parse_date_header(char *s); +int isHexNumber(char *p); int extract_boundary(char *p, struct _state *state); -int extractNameFromHeaderLine(char *s, char *name, char *resultbuf); -int attachment_by_type(struct _state *state, char *type); void fixupEncodedHeaderLine(char *buf); void fixupSoftBreakInQuotedPritableLine(char *buf, struct _state *state); void fixupBase64EncodedLine(char *buf, struct _state *state); void markHTML(char *buf, struct _state *state); int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state); -void fixupHTML(char *buf, struct _state *state, struct __config *cfg); -int isSkipHTMLTag(char *s); void translateLine(unsigned char *p, struct _state *state); void reassembleToken(char *p); void degenerateToken(unsigned char *p); -int countInvalidJunkLines(char *p); -int countInvalidJunkCharacters(char *p, int replace_junk); -int countNonPrintableCharacters(char *p); -int isHexNumber(char *p); void fixURL(char *url); -void fixFQDN(char *fqdn); -void getTLDFromName(char *name); -int isItemOnList(char *item, char *list, char *extralist); -unsigned long parse_date_header(char *s); +int extractNameFromHeaderLine(char *s, char *name, char *resultbuf); #endif /* _PARSER_H */ diff --git a/src/parser_utils.c b/src/parser_utils.c index e4a0264a..f05f3a35 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -1,5 +1,5 @@ /* - * parser_utils.c, SJ + * parser_utils.c */ #include @@ -16,23 +16,21 @@ #include #include #include "trans.h" -#include "ijc.h" #include "html.h" -void initState(struct _state *state){ +void init_state(struct _state *state){ int i; state->message_state = MSG_UNDEF; + state->line_num = 0; + state->is_header = 1; - /* by default we are a text/plain message */ - - state->textplain = 1; + state->textplain = 1; /* by default we are a text/plain message */ state->texthtml = 0; state->message_rfc822 = 0; - state->octetstream = 0; state->base64 = 0; state->utf8 = 0; @@ -44,36 +42,15 @@ void initState(struct _state *state){ state->skip_html = 0; - state->n_token = 0; - state->n_body_token = 0; - state->n_chain_token = 0; - state->n_subject_token = 0; - state->content_type_is_set = 0; - state->c_shit = 0; - state->l_shit = 0; - - state->line_num = 0; - - state->ipcnt = 0; - - memset(state->ip, 0, SMALLBUFSIZE); - memset(state->hostname, 0, SMALLBUFSIZE); - memset(state->miscbuf, 0, MAX_TOKEN_LEN); - memset(state->qpbuf, 0, MAX_TOKEN_LEN); - memset(state->from, 0, SMALLBUFSIZE); memset(state->message_id, 0, SMALLBUFSIZE); - - state->urls = NULL; - - state->found_our_signo = 0; + memset(state->miscbuf, 0, MAX_TOKEN_LEN); state->has_to_dump = 0; state->fd = -1; - state->num_of_images = 0; - state->num_of_msword = 0; state->realbinary = 0; + state->octetstream = 0; state->boundaries = NULL; @@ -82,8 +59,8 @@ void initState(struct _state *state){ for(i=0; iattachments[i].size = 0; - memset(state->attachments[i].type, 0, SMALLBUFSIZE); - memset(state->attachments[i].filename, 0, SMALLBUFSIZE); + memset(state->attachments[i].type, 0, TINYBUFSIZE); + memset(state->attachments[i].filename, 0, TINYBUFSIZE); } memset(state->b_from, 0, SMALLBUFSIZE); @@ -93,27 +70,79 @@ void initState(struct _state *state){ } -void freeState(struct _state *state){ - freeList(state->urls); +unsigned long parse_date_header(char *s){ + char *p; + unsigned long ts=0; + struct tm tm; + + s += 5; + p = s; + + if(*p == ' '){ p++; s++; } + + p = strchr(s, ','); + if(!p) goto ENDE; + + *p = '\0'; + if(strcmp(s, "Mon") == 0) tm.tm_wday = 1; + else if(strcmp(s, "Tue") == 0) tm.tm_wday = 2; + else if(strcmp(s, "Wed") == 0) tm.tm_wday = 3; + else if(strcmp(s, "Thu") == 0) tm.tm_wday = 4; + else if(strcmp(s, "Fri") == 0) tm.tm_wday = 5; + else if(strcmp(s, "Sat") == 0) tm.tm_wday = 6; + else if(strcmp(s, "Sun") == 0) tm.tm_wday = 0; + s += 5; + + p = strchr(s, ' '); if(!p) goto ENDE; + *p = '\0'; tm.tm_mday = atoi(s); s += 3; + + p = strchr(s, ' '); if(!p) goto ENDE; + *p = '\0'; + if(strcmp(s, "Jan") == 0) tm.tm_mon = 0; + else if(strcmp(s, "Feb") == 0) tm.tm_mon = 1; + else if(strcmp(s, "Mar") == 0) tm.tm_mon = 2; + else if(strcmp(s, "Apr") == 0) tm.tm_mon = 3; + else if(strcmp(s, "May") == 0) tm.tm_mon = 4; + else if(strcmp(s, "Jun") == 0) tm.tm_mon = 5; + else if(strcmp(s, "Jul") == 0) tm.tm_mon = 6; + else if(strcmp(s, "Aug") == 0) tm.tm_mon = 7; + else if(strcmp(s, "Sep") == 0) tm.tm_mon = 8; + else if(strcmp(s, "Oct") == 0) tm.tm_mon = 9; + else if(strcmp(s, "Nov") == 0) tm.tm_mon = 10; + else if(strcmp(s, "Dec") == 0) tm.tm_mon = 11; + s = p+1; + + p = strchr(s, ' '); if(!p) goto ENDE; + tm.tm_year = atoi(s) - 1900; s = p+1; + + p = strchr(s, ':'); if(!p) goto ENDE; + *p = '\0'; tm.tm_hour = atoi(s); s = p+1; + + p = strchr(s, ':'); if(!p) goto ENDE; + *p = '\0'; tm.tm_min = atoi(s); s = p+1; + + p = strchr(s, ' '); if(!p) goto ENDE; + *p = '\0'; tm.tm_sec = atoi(s); s = p+1; + + tm.tm_isdst = -1; + + ts = mktime(&tm); + +ENDE: + return ts; } -int attachment_by_type(struct _state *state, char *type){ - int i; - - for(i=0; iattachments[i].type, type)) - return 1; +int isHexNumber(char *p){ + for(; *p; p++){ + if(!(*p == '-' || (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x46) || (*p >= 0x61 && *p <= 0x66)) ) + return 0; } - return 0; + return 1; } -/* - * extract bondary - */ - int extract_boundary(char *p, struct _state *state){ char *q; @@ -150,7 +179,7 @@ int extract_boundary(char *p, struct _state *state){ q = strrchr(p, '\n'); if(q) *q = '\0'; - append_boundary(&(state->boundaries), p); + append_list(&(state->boundaries), p); return 1; } @@ -159,42 +188,10 @@ int extract_boundary(char *p, struct _state *state){ } -int extractNameFromHeaderLine(char *s, char *name, char *resultbuf){ - int rc=0; - char buf[SMALLBUFSIZE], *p, *q; - - snprintf(buf, SMALLBUFSIZE-1, "%s", s); - - p = strstr(buf, name); - if(p){ - p += strlen(name); - p = strchr(p, '='); - if(p){ - p++; - q = strrchr(p, ';'); - if(q) *q = '\0'; - q = strrchr(p, '"'); - if(q){ - *q = '\0'; - p = strchr(p, '"'); - if(p){ - p++; - } - } - snprintf(resultbuf, SMALLBUFSIZE-1, "%s", p); - rc = 1; - } - } - - return rc; -} - - void fixupEncodedHeaderLine(char *buf){ char *p, *q, *r, *s, u[SMALLBUFSIZE], puf[MAXBUFSIZE]; char *start, *end; - memset(puf, 0, MAXBUFSIZE); q = buf; @@ -228,12 +225,6 @@ void fixupEncodedHeaderLine(char *buf){ } else if((s = strcasestr(start+2, "?Q?"))){ *s = '\0'; - - r = s + 3; - for(; *r; r++){ - if(*r == '_') *r = ' '; - } - decodeQP(s+3); } @@ -262,47 +253,13 @@ void fixupEncodedHeaderLine(char *buf){ } -void fixupSoftBreakInQuotedPritableLine(char *buf, struct _state *state){ - int i=0; - char *p, puf[MAXBUFSIZE]; - - if(strlen(state->qpbuf) > 0){ - memset(puf, 0, MAXBUFSIZE); - strncpy(puf, state->qpbuf, MAXBUFSIZE-1); - strncat(puf, buf, MAXBUFSIZE-1); - - memset(buf, 0, MAXBUFSIZE); - memcpy(buf, puf, MAXBUFSIZE); - - memset(state->qpbuf, 0, MAX_TOKEN_LEN); - } - - if(buf[strlen(buf)-1] == '='){ - buf[strlen(buf)-1] = '\0'; - i = 1; - } - - if(i == 1){ - p = strrchr(buf, ' '); - if(p){ - memset(state->qpbuf, 0, MAX_TOKEN_LEN); - if(strlen(p) < MAX_TOKEN_LEN-1){ - snprintf(state->qpbuf, MAXBUFSIZE-1, "%s", p); - *p = '\0'; - } - - } - } -} - - void fixupBase64EncodedLine(char *buf, struct _state *state){ char *p, puf[MAXBUFSIZE]; if(strlen(state->miscbuf) > 0){ - memset(puf, 0, MAXBUFSIZE); - strncpy(puf, state->miscbuf, MAXBUFSIZE-1); - strncat(puf, buf, MAXBUFSIZE-1); + memset(puf, 0, sizeof(puf)); + strncpy(puf, state->miscbuf, sizeof(puf)-1); + strncat(puf, buf, sizeof(puf)-1); memset(buf, 0, MAXBUFSIZE); memcpy(buf, puf, MAXBUFSIZE); @@ -313,7 +270,9 @@ void fixupBase64EncodedLine(char *buf, struct _state *state){ if(buf[strlen(buf)-1] != '\n'){ p = strrchr(buf, ' '); if(p){ - strncpy(state->miscbuf, p+1, MAX_TOKEN_LEN-1); + //strncpy(state->miscbuf, p+1, MAX_TOKEN_LEN-1); + memcpy(&(state->miscbuf[0]), p+1, MAX_TOKEN_LEN-1); + *p = '\0'; } } @@ -404,6 +363,8 @@ int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state){ if(pos == 0 && strncmp(htmlbuf, "style ", 6) == 0) state->style = 1; if(pos == 0 && strncmp(htmlbuf, "/style ", 7) == 0) state->style = 0; + return 0; + //printf("appendHTML: pos:%d, +%s+\n", pos, htmlbuf); if(state->style == 1) return 0; @@ -413,8 +374,6 @@ int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state){ snprintf(html, SMALLBUFSIZE-1, "HTML*%s", htmlbuf); len = strlen(html); - if(isSkipHTMLTag(html) == 1) return 0; - if(len > 8 && strchr(html, '=')){ p = strstr(html, "cid:"); if(p){ @@ -435,18 +394,6 @@ int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state){ } -int isSkipHTMLTag(char *s){ - int i=0; - - for(i=0; imessage_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; } - if(state->message_state == MSG_SUBJECT && *p == '%'){ continue; } + if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_') ){ continue; } if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; } - if(state->message_state != MSG_BODY && (*p == '.' || *p == '-') ){ continue; } + if(*p == '.' || *p == '-'){ continue; } if(strncasecmp((char *)p, "http://", 7) == 0){ p += 7; url = 1; continue; } if(strncasecmp((char *)p, "https://", 8) == 0){ p += 8; url = 1; continue; } @@ -488,7 +435,7 @@ void translateLine(unsigned char *p, struct _state *state){ } - /* restore the soft break in quoted-printable parts, 2006.01.05, SJ */ + /* restore the soft break in quoted-printable parts */ if(state->qp == 1 && q && (q > P + strlen((char*)P) - 3)) *q = '='; @@ -522,11 +469,6 @@ void reassembleToken(char *p){ } -/* - * degenerate a token - */ - - void degenerateToken(unsigned char *p){ int i=1, d=0, dp=0; unsigned char *s; @@ -557,280 +499,54 @@ void degenerateToken(unsigned char *p){ } -/* - * count the invalid characters (ie. garbage on your display) in the buffer - */ - -int countInvalidJunkCharacters(char *p, int replace_junk){ - int i=0; - - for(; *p; p++){ - if(invalid_junk_characters[(unsigned char)*p] == *p){ - i++; - if(replace_junk == 1) *p = JUNK_REPLACEMENT_CHAR; - } - } - - return i; -} - - -/* - * detect Chinese, Japan, Korean, ... lines - */ - -int countInvalidJunkLines(char *p){ - int i=0; - - if(*p == '' && *(p+1) == '$' && *(p+2) == 'B'){ - for(; *p; p++){ - if(*p == '' && *(p+1) == '(' && *(p+2) == 'B') - i++; - } - } - - return i; -} - - -int countNonPrintableCharacters(char *p){ - int n = 0; - - for(; *p; p++){ - if(!isprint(*p) && !isspace(*p)) n++; - } - - return n; -} - - -/* - * is this a hexadecimal numeric string? - */ - -int isHexNumber(char *p){ - for(; *p; p++){ - if(!(*p == '-' || (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x46) || (*p >= 0x61 && *p <= 0x66)) ) - return 0; - } - - return 1; -} - - void fixURL(char *url){ - char *p, *q, m[MAX_TOKEN_LEN], fixed_url[MAXBUFSIZE]; - int i, dots=0; - struct in_addr addr; + char *p, *q, fixed_url[SMALLBUFSIZE]; - /* chop trailing dot */ + memset(fixed_url, 0, sizeof(fixed_url)); - if(url[strlen(url)-1] == '.') - url[strlen(url)-1] = '\0'; + p = url; - memset(fixed_url, 0, MAXBUFSIZE); + if(strncasecmp(url, "http://", 7) == 0) p += 7; + if(strncasecmp(url, "https://", 8) == 0) p += 8; - if((strncasecmp(url, "http://", 7) == 0 || strncasecmp(url, "https://", 8) == 0) ){ - p = url; + q = strchr(p, '/'); + if(q) *q = '\0'; - if(strncasecmp(p, "http://", 7) == 0) p += 7; - if(strncasecmp(p, "https://", 8) == 0) p += 8; - - /* skip anything after the host part, 2006.12.11, SJ */ - q = strchr(p, '/'); - if(q) - *q = '\0'; - - /* - http://www.ajandekkaracsonyra.hu/email.php?page=email&cmd=unsubscribe&email=yy@xxxx.kom is - chopped to www.ajandekkaracsonyra.hu at this point, 2006.12.15, SJ - */ - - dots = countCharacterInBuffer(p, '.'); - if(dots < 1) - return; - - strncpy(fixed_url, "URL*", MAXBUFSIZE-1); - - /* is it a numeric IP-address? */ - - if(inet_aton(p, &addr)){ - addr.s_addr = ntohl(addr.s_addr); - strncat(fixed_url, inet_ntoa(addr), MAXBUFSIZE-1); - strcpy(url, fixed_url); - } - else { - for(i=0; i<=dots; i++){ - q = split(p, '.', m, MAX_TOKEN_LEN-1); - if(i>dots-2){ - strncat(fixed_url, m, MAXBUFSIZE-1); - if(i < dots) - strncat(fixed_url, ".", MAXBUFSIZE-1); - } - p = q; - } - - /* if it does not contain a single dot, the rest of the URL may be - in the next line or it is a fake one, anyway skip, 2006.04.06, SJ - */ - - if(countCharacterInBuffer(fixed_url, '.') != 1) - memset(url, 0, MAXBUFSIZE); - else { - for(i=4; idots-2){ - strncat(fixed_fqdn, m, MAXBUFSIZE-1); - if(i < dots) - strncat(fixed_fqdn, ".", MAXBUFSIZE-1); - } - p = q; - } - - strcpy(fqdn, fixed_fqdn); -} - - -/* - * extract the .tld from a name (URL, FQDN, ...) - */ - -void getTLDFromName(char *name){ - char *p, fixed_name[SMALLBUFSIZE];; - - p = strrchr(name, '.'); + snprintf(buf, sizeof(buf)-1, "%s", s); + p = strstr(buf, name); if(p){ - snprintf(fixed_name, SMALLBUFSIZE-1, "URL*%s", p+1); - strcpy(name, fixed_name); + p += strlen(name); + p = strchr(p, '='); + if(p){ + p++; + q = strrchr(p, ';'); + if(q) *q = '\0'; + q = strrchr(p, '"'); + if(q){ + *q = '\0'; + p = strchr(p, '"'); + if(p){ + p++; + } + } + snprintf(resultbuf, TINYBUFSIZE-1, "%s", p); + rc = 1; + } } + return rc; } -int isItemOnList(char *item, char *list, char *extralist){ - char *p, *q, w[SMALLBUFSIZE], my_list[SMALLBUFSIZE]; - - if(!item) return 0; - - snprintf(my_list, SMALLBUFSIZE-1, "%s,%s", extralist, list); - - p = my_list; - - do { - p = split(p, ',', w, SMALLBUFSIZE-1); - - trimBuffer(w); - - if(strlen(w) > 2){ - - if(w[strlen(w)-1] == '$'){ - q = item + strlen(item) - strlen(w) + 1; - if(strncasecmp(q, w, strlen(w)-1) == 0) - return 1; - } - else if(strcasestr(item, w)) - return 1; - - } - - } while(p); - - return 0; -} - - -unsigned long parse_date_header(char *s){ - char *p; - unsigned long ts=0; - struct tm tm; - - s += 5; - p = s; - - if(*p == ' '){ p++; s++; } - - p = strchr(s, ','); - if(!p) goto ENDE; - - *p = '\0'; - if(strcmp(s, "Mon") == 0) tm.tm_wday = 1; - else if(strcmp(s, "Tue") == 0) tm.tm_wday = 2; - else if(strcmp(s, "Wed") == 0) tm.tm_wday = 3; - else if(strcmp(s, "Thu") == 0) tm.tm_wday = 4; - else if(strcmp(s, "Fri") == 0) tm.tm_wday = 5; - else if(strcmp(s, "Sat") == 0) tm.tm_wday = 6; - else if(strcmp(s, "Sun") == 0) tm.tm_wday = 0; - s += 5; - - p = strchr(s, ' '); if(!p) goto ENDE; - *p = '\0'; tm.tm_mday = atoi(s); s += 3; - - p = strchr(s, ' '); if(!p) goto ENDE; - *p = '\0'; - if(strcmp(s, "Jan") == 0) tm.tm_mon = 0; - else if(strcmp(s, "Feb") == 0) tm.tm_mon = 1; - else if(strcmp(s, "Mar") == 0) tm.tm_mon = 2; - else if(strcmp(s, "Apr") == 0) tm.tm_mon = 3; - else if(strcmp(s, "May") == 0) tm.tm_mon = 4; - else if(strcmp(s, "Jun") == 0) tm.tm_mon = 5; - else if(strcmp(s, "Jul") == 0) tm.tm_mon = 6; - else if(strcmp(s, "Aug") == 0) tm.tm_mon = 7; - else if(strcmp(s, "Sep") == 0) tm.tm_mon = 8; - else if(strcmp(s, "Oct") == 0) tm.tm_mon = 9; - else if(strcmp(s, "Nov") == 0) tm.tm_mon = 10; - else if(strcmp(s, "Dec") == 0) tm.tm_mon = 11; - s = p+1; - - p = strchr(s, ' '); if(!p) goto ENDE; - tm.tm_year = atoi(s) - 1900; s = p+1; - - p = strchr(s, ':'); if(!p) goto ENDE; - *p = '\0'; tm.tm_hour = atoi(s); s = p+1; - - p = strchr(s, ':'); if(!p) goto ENDE; - *p = '\0'; tm.tm_min = atoi(s); s = p+1; - - p = strchr(s, ' '); if(!p) goto ENDE; - *p = '\0'; tm.tm_sec = atoi(s); s = p+1; - - tm.tm_isdst = -1; - - ts = mktime(&tm); - -ENDE: - return ts; -} diff --git a/src/piler.c b/src/piler.c index 96f4a788..2726efe7 100644 --- a/src/piler.c +++ b/src/piler.c @@ -38,8 +38,6 @@ struct passwd *pwd; void clean_exit(){ if(sd != -1) close(sd); - freeList(data.blackhole); - syslog(LOG_PRIORITY, "%s has been terminated", PROGNAME); unlink(cfg.pidfile); @@ -86,7 +84,6 @@ void initialiseConfiguration(){ setlocale(LC_MESSAGES, cfg.locale); setlocale(LC_CTYPE, cfg.locale); - freeList(data.blackhole); data.blackhole = NULL; syslog(LOG_PRIORITY, "reloaded config: %s", configfile); diff --git a/src/piler.h b/src/piler.h index 664e3e89..475b3291 100644 --- a/src/piler.h +++ b/src/piler.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,8 +25,7 @@ int do_av_check(struct session_data *sdata, char *rcpttoemail, char *fromemail, char *virusinfo, struct __data *data, struct __config *cfg); -int make_header_digest(struct session_data *sdata, struct _state *state); -int make_body_digest(struct session_data *sdata); +int make_body_digest(struct session_data *sdata, struct __config *cfg); int processMessage(struct session_data *sdata, struct _state *sstate, struct __config *cfg); int store_message(struct session_data *sdata, struct _state *state, int stored, struct __config *cfg); diff --git a/src/session.c b/src/session.c index a9bf851e..1d07f41f 100644 --- a/src/session.c +++ b/src/session.c @@ -137,15 +137,15 @@ void handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){ gettimeofday(&tv1, &tz); - sstate = parseMessage(&sdata, cfg); + sstate = parse_message(&sdata, cfg); gettimeofday(&tv2, &tz); sdata.__parsed = tvdiff(tv2, tv1); if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: parsed message", sdata.ttmpfile); - sdata.need_scan = 1; + make_body_digest(&sdata, cfg); #ifdef HAVE_ANTIVIRUS if(cfg->use_antivirus == 1){ @@ -221,7 +221,6 @@ void handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){ #endif unlink(sdata.ttmpfile); - freeState(&sstate); alarm(cfg->session_timeout); @@ -430,7 +429,7 @@ AFTER_PERIOD: * ie. we have timed out than send back 421 error message */ - if(state < SMTP_STATE_QUIT && inj != OK){ + if(state < SMTP_STATE_QUIT && inj == ERR){ snprintf(buf, MAXBUFSIZE-1, SMTP_RESP_421_ERR, cfg->hostid); send(new_sd, buf, strlen(buf), 0); if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: sent: %s", sdata.ttmpfile, buf); diff --git a/src/test.c b/src/test.c index 3545f8be..8e3f76f8 100644 --- a/src/test.c +++ b/src/test.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -15,12 +17,10 @@ int main(int argc, char **argv){ int i, rc; - struct timezone tz; - struct timeval tv_spam_start, tv_spam_stop; + struct stat st; struct session_data sdata; struct _state state; struct __config cfg; - struct stat st; if(argc < 2){ fprintf(stderr, "usage: %s \n", argv[0]); @@ -38,21 +38,23 @@ int main(int argc, char **argv){ setlocale(LC_CTYPE, cfg.locale); rc = 0; - + sdata.num_of_rcpt_to = -1; time(&(sdata.now)); sdata.sent = 0; + sdata.tot_len = st.st_size; memset(sdata.rcptto[0], 0, SMALLBUFSIZE); snprintf(sdata.ttmpfile, SMALLBUFSIZE-1, "%s", argv[1]); - state = parseMessage(&sdata, &cfg); + state = parse_message(&sdata, &cfg); - printf("from: %s\n", state.b_from); - printf("to: %s\n", state.b_to); - printf("subject: %s\n", state.b_subject); printf("message-id: %s\n", state.message_id); - printf("body: %s\n", state.b_body); + printf("from: *%s*\n", state.b_from); + printf("to: *%s*\n", state.b_to); + printf("subject: *%s*\n", state.b_subject); + printf("body: *%s*\n", state.b_body); + make_body_digest(&sdata, &cfg); printf("body digest: %s\n", sdata.bodydigest); @@ -60,11 +62,7 @@ int main(int argc, char **argv){ printf("i:%d, name=*%s*, type: *%s*, size: %d\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size); } - gettimeofday(&tv_spam_start, &tz); - - freeState(&state); - - gettimeofday(&tv_spam_stop, &tz); + printf("\n\n"); return 0; }