diff --git a/etc/sphinx.conf b/etc/sphinx.conf index b3c09570..d8a88e8e 100644 --- a/etc/sphinx.conf +++ b/etc/sphinx.conf @@ -11,14 +11,14 @@ source main sql_pass = sphinx sql_query_pre = SET NAMES utf8 - sql_query = SELECT id, `from`, `to`, `subject`, `date`, `body`, `piler_id`, `header_id`, `body_id`, `size` FROM sph_index \ + sql_query = SELECT id, `from`, `to`, `subject`, `arrived`, `sent`, `body`, `size`, `attachments`, `piler_id` FROM sph_index \ WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 ) sql_attr_uint = size - sql_attr_uint = date + sql_attr_uint = arrived + sql_attr_uint = sent + sql_attr_uint = attachments sql_attr_string = piler_id - sql_attr_string = header_id - sql_attr_string = body_id } source delta @@ -32,14 +32,15 @@ source delta sql_query_pre = SET NAMES utf8 sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sph_index sql_query_post_index = DELETE FROM sph_index WHERE id<=(SELECT max_doc_id FROM sph_counter WHERE counter_id=1) - sql_query = SELECT id, `from`, `to`, `subject`, `date`, `body`, `piler_id`, `header_id`, `body_id`, `size` FROM sph_index \ + sql_query = SELECT id, `from`, `to`, `subject`, `arrived`, `sent`, `body`, `size`, `attachments`, `piler_id` FROM sph_index \ WHERE id <= (SELECT max_doc_id FROM sph_counter WHERE counter_id=1) sql_attr_uint = size - sql_attr_uint = date + sql_attr_uint = arrived + sql_attr_uint = sent + sql_attr_uint = attachments sql_attr_string = piler_id - sql_attr_string = header_id - sql_attr_string = body_id + } @@ -64,7 +65,7 @@ index delta1 indexer { - mem_limit = 32M + mem_limit = 64M } diff --git a/src/attachment.c b/src/attachment.c index 483c16d5..5dd3e6a6 100644 --- a/src/attachment.c +++ b/src/attachment.c @@ -24,8 +24,8 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _ MYSQL_ROW row; MYSQL_STMT *stmt; - MYSQL_BIND bind[6]; - unsigned long len[6]; + MYSQL_BIND bind[7]; + unsigned long len[7]; stmt = mysql_stmt_init(&(sdata->mysql)); @@ -34,7 +34,7 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _ return 1; } - snprintf(s, sizeof(s)-1, "INSERT INTO %s (`piler_id`,`attachment_id`,`sig`,`type`,`size`,`ptr`) VALUES(?,?,?,?,?,?)", SQL_ATTACHMENT_TABLE); + snprintf(s, sizeof(s)-1, "INSERT INTO %s (`piler_id`,`attachment_id`,`sig`,`name`,`type`,`size`,`ptr`) VALUES(?,?,?,?,?,?,?)", SQL_ATTACHMENT_TABLE); if(mysql_stmt_prepare(stmt, s, strlen(s))){ syslog(LOG_PRIORITY, "%s: %s.mysql_stmt_prepare() error: %s", sdata->ttmpfile, SQL_ATTACHMENT_TABLE, mysql_stmt_error(stmt)); @@ -91,20 +91,25 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _ len[2] = strlen(state->attachments[i].digest); bind[2].length = &len[2]; bind[3].buffer_type = MYSQL_TYPE_STRING; - bind[3].buffer = state->attachments[i].type; + bind[3].buffer = state->attachments[i].filename; bind[3].is_null = 0; - len[3] = strlen(state->attachments[i].digest); bind[3].length = &len[3]; + len[3] = strlen(state->attachments[i].filename); bind[3].length = &len[3]; - bind[4].buffer_type = MYSQL_TYPE_LONG; - bind[4].buffer = (char *)&(state->attachments[i].size); + bind[4].buffer_type = MYSQL_TYPE_STRING; + bind[4].buffer = state->attachments[i].type; bind[4].is_null = 0; - bind[4].length = 0; + len[4] = strlen(state->attachments[i].digest); bind[4].length = &len[4]; - bind[5].buffer_type = MYSQL_TYPE_LONGLONG; - bind[5].buffer = (char *)&id; + bind[5].buffer_type = MYSQL_TYPE_LONG; + bind[5].buffer = (char *)&(state->attachments[i].size); bind[5].is_null = 0; bind[5].length = 0; + bind[6].buffer_type = MYSQL_TYPE_LONGLONG; + bind[6].buffer = (char *)&id; + bind[6].is_null = 0; + bind[6].length = 0; + if(mysql_stmt_bind_param(stmt, bind)){ syslog(LOG_PRIORITY, "%s: %s.mysql_stmt_bind_param() error: %s", sdata->ttmpfile, SQL_ATTACHMENT_TABLE, mysql_stmt_error(stmt)); diff --git a/src/config.h b/src/config.h index c434b7fa..6d277b83 100644 --- a/src/config.h +++ b/src/config.h @@ -11,7 +11,7 @@ #define PROGNAME "piler" -#define VERSION "0.1.7" +#define VERSION "0.1.8" #define PROGINFO VERSION ", Janos SUTO \n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n" diff --git a/src/defs.h b/src/defs.h index 077cd493..b595fe9b 100644 --- a/src/defs.h +++ b/src/defs.h @@ -117,6 +117,7 @@ struct _state { char type[TINYBUFSIZE]; struct list *boundaries; + struct list *rcpt; int n_attachments; struct attachment attachments[MAX_ATTACHMENTS]; diff --git a/src/digest.c b/src/digest.c index 5ab0f4f4..e99c2eb4 100644 --- a/src/digest.c +++ b/src/digest.c @@ -17,7 +17,7 @@ int make_body_digest(struct session_data *sdata, struct __config *cfg){ int i=0, n, fd, hdr_len=0, offset=3; char *body=NULL; - unsigned char buf[MAXBUFSIZE], md[DIGEST_LENGTH]; + unsigned char buf[BIGBUFSIZE], md[DIGEST_LENGTH]; SHA256_CTX context; //if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: digesting", sdata->ttmpfile); @@ -28,14 +28,14 @@ int make_body_digest(struct session_data *sdata, struct __config *cfg){ fd = open(sdata->ttmpfile, O_RDONLY); if(fd == -1) return -1; - while((n = read(fd, buf, MAXBUFSIZE)) > 0){ + while((n = read(fd, buf, sizeof(buf))) > 0){ body = (char *)&buf[0]; if(i == 0){ - hdr_len = searchStringInBuffer(body, MAXBUFSIZE, "\n\r\n", 3); + hdr_len = searchStringInBuffer(body, sizeof(buf), "\n\r\n", 3); if(hdr_len == 0){ - searchStringInBuffer(body, 2*MAXBUFSIZE+1, "\n\n", 2); + searchStringInBuffer(body, sizeof(buf), "\n\n", 2); offset = 2; } @@ -80,7 +80,7 @@ void digest_file(char *filename, char *digest){ SHA256_Init(&context); - while((n = read(fd, buf, MAXBUFSIZE)) > 0){ + while((n = read(fd, buf, sizeof(buf))) > 0){ SHA256_Update(&context, buf, n); } diff --git a/src/message.c b/src/message.c index 1cfb2e7b..a59f17ca 100644 --- a/src/message.c +++ b/src/message.c @@ -116,9 +116,12 @@ int is_body_digest_already_stored(struct session_data *sdata, struct _state *sta int hand_to_sphinx(struct session_data *sdata, struct _state *state, struct __config *cfg){ int rc; - char s[BIGBUFSIZE+2*MAXBUFSIZE]; + char *subj, s[BIGBUFSIZE+2*MAXBUFSIZE]; - snprintf(s, sizeof(s)-1, "INSERT INTO %s (`from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `piler_id`) values('%s','%s','%s','%s',%ld,%ld,%d,'%s')", SQL_SPHINX_TABLE, state->b_from, state->b_to, state->b_subject, state->b_body, sdata->now, sdata->sent, sdata->tot_len, sdata->ttmpfile); + subj = state->b_subject; + if(*subj == ' ') subj++; + + snprintf(s, sizeof(s)-1, "INSERT INTO %s (`from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `attachments`, `piler_id`) values('%s','%s','%s','%s',%ld,%ld,%d,%d,'%s')", SQL_SPHINX_TABLE, state->b_from, state->b_to, subj, state->b_body, sdata->now, sdata->sent, sdata->tot_len, state->n_attachments, sdata->ttmpfile); rc = mysql_real_query(&(sdata->mysql), s, strlen(s)); @@ -132,8 +135,7 @@ int hand_to_sphinx(struct session_data *sdata, struct _state *state, struct __co int store_meta_data(struct session_data *sdata, struct _state *state, struct __config *cfg){ int i=0, rc, ret=ERR; - char *p, s[MAXBUFSIZE], s2[SMALLBUFSIZE]; - struct list *list = NULL; + char *p, *subj, s[MAXBUFSIZE], s2[SMALLBUFSIZE]; MYSQL_STMT *stmt; MYSQL_BIND bind[4]; @@ -145,6 +147,9 @@ int store_meta_data(struct session_data *sdata, struct _state *state, struct __c goto ENDE_META; } + subj = state->b_subject; + if(*subj == ' ') subj++; + snprintf(s, MAXBUFSIZE-1, "INSERT INTO %s (`from`,`to`,`subject`,`arrived`,`sent`,`size`,`hlen`,`attachments`,`piler_id`,`message_id`,`digest`,`bodydigest`) VALUES(?,?,?,%ld,%ld,%d,%d,%d,'%s',?,'%s','%s')", SQL_METADATA_TABLE, sdata->now, sdata->sent, sdata->tot_len, sdata->hdr_len, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest); if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: meta sql: *%s*", sdata->ttmpfile, s); @@ -168,12 +173,8 @@ int store_meta_data(struct session_data *sdata, struct _state *state, struct __c if(strlen(s2) > 5){ LABEL1: - if(is_string_on_list(list, s2) == 1) continue; - - append_list(&list, s2); i++; - memset(bind, 0, sizeof(bind)); bind[0].buffer_type = MYSQL_TYPE_STRING; @@ -187,9 +188,9 @@ LABEL1: len[1] = strlen(s2); bind[1].length = &len[1]; bind[2].buffer_type = MYSQL_TYPE_STRING; - bind[2].buffer = state->b_subject; + bind[2].buffer = subj; bind[2].is_null = 0; - len[2] = strlen(state->b_subject); bind[2].length = &len[2]; + len[2] = strlen(subj); bind[2].length = &len[2]; bind[3].buffer_type = MYSQL_TYPE_STRING; bind[3].buffer = state->message_id; @@ -221,7 +222,6 @@ LABEL1: ENDE_META: - free_list(list); return ret; } diff --git a/src/parser.c b/src/parser.c index cdb98955..d75cc6a2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -47,12 +47,18 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){ free_list(state.boundaries); + free_list(state.rcpt); + + trimBuffer(state.b_subject); + fixupEncodedHeaderLine(state.b_subject); + translateLine((unsigned char*)&state.b_subject, &state); for(i=1; i<=state.n_attachments; i++){ digest_file(state.attachments[i].internalname, &(state.attachments[i].digest[0])); + fixupEncodedHeaderLine(state.attachments[i].filename); + if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s", sdata->ttmpfile, i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest); - //printf("attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest); } @@ -71,7 +77,7 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){ int parse_line(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg){ - char *p, *r, puf[SMALLBUFSIZE]; + char *p, puf[SMALLBUFSIZE]; int x, len, b64_len, boundary_line=0; state->line_num++; @@ -199,6 +205,23 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru } + if(state->is_1st_header == 1 && state->message_state == MSG_SUBJECT && strlen(state->b_subject) + strlen(buf) < MAXBUFSIZE-1){ + + if(state->b_subject[0] == '\0'){ + strncat(state->b_subject, buf+strlen("Subject:"), MAXBUFSIZE-1); + } + else { + + p = strrchr(state->b_subject, ' '); + if(p && ( strcasestr(p+1, "?Q?") || strcasestr(p+1, "?B?") ) ){ + strncat(state->b_subject, buf+1, MAXBUFSIZE-1); + } + else strncat(state->b_subject, buf, MAXBUFSIZE-1); + + } + + } + /* Content-type: checking */ @@ -295,13 +318,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru /* end of boundary check */ - - if(state->is_header == 1){ - /* skip irrelevant headers */ - if(state->message_state != MSG_SUBJECT && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0; - - if(state->message_state == MSG_SUBJECT) fixupEncodedHeaderLine(buf); - } + /* skip irrelevant headers */ + if(state->is_header == 1 && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0; /* don't process body if it's not a text or html part */ @@ -334,6 +352,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru if(state->is_header == 1) p = strchr(buf, ' '); else p = buf; + //printf("a: *%s*\n", buf); + do { memset(puf, 0, sizeof(puf)); p = split(p, ' ', puf, sizeof(puf)-1); @@ -344,16 +364,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru if(puf[0] == '\0') continue; - if(state->message_state == MSG_SUBJECT){ - r = &puf[0]; for(; *r; r++){ if(*r == '_') *r = ' '; } - } - - if(state->qp == 1 && puf[strlen(puf)-1] == '='){ - puf[strlen(puf)-1] = '\0'; - } - else if(state->message_state != MSG_SUBJECT || (p && strchr(p, ' ')) ){ - strncat(puf, " ", sizeof(puf)-1); - } + strncat(puf, " ", sizeof(puf)-1); if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf); @@ -362,15 +373,16 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru len = strlen(puf); - if(state->message_state == MSG_SUBJECT && state->is_1st_header == 1 && strlen(state->b_subject) < MAXBUFSIZE-len-1) - memcpy(&(state->b_subject[strlen(state->b_subject)]), puf, len); - - else if(state->message_state == MSG_FROM && strchr(puf, '@') && state->is_1st_header == 1 && state->b_from[0] == '\0' && strlen(state->b_from) < SMALLBUFSIZE-len-1) + if(state->message_state == MSG_FROM && strchr(puf, '@') && state->is_1st_header == 1 && state->b_from[0] == '\0' && strlen(state->b_from) < SMALLBUFSIZE-len-1) memcpy(&(state->b_from[strlen(state->b_from)]), puf, len); - else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1) - memcpy(&(state->b_to[strlen(state->b_to)]), puf, len); + else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1){ + if(is_string_on_list(state->rcpt, puf) == 0){ + append_list(&(state->rcpt), puf); + memcpy(&(state->b_to[strlen(state->b_to)]), puf, len); + } + } else if(state->message_state == MSG_BODY && strlen(state->b_body) < BIGBUFSIZE-len-1) memcpy(&(state->b_body[strlen(state->b_body)]), puf, len); diff --git a/src/parser_utils.c b/src/parser_utils.c index 38590843..14ba5aea 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -60,6 +60,7 @@ void init_state(struct _state *state){ state->saved_size = 0; state->boundaries = NULL; + state->rcpt = NULL; state->n_attachments = 0; @@ -197,66 +198,78 @@ int extract_boundary(char *p, struct _state *state){ void fixupEncodedHeaderLine(char *buf){ - char *p, *q, *r, *s, u[SMALLBUFSIZE], puf[MAXBUFSIZE]; + char *sb, *sq, *p, *q, *r, *s, v[SMALLBUFSIZE], puf[MAXBUFSIZE]; char *start, *end; - memset(puf, 0, MAXBUFSIZE); + memset(puf, 0, sizeof(puf)); + + //printf("hdr: *%s*\n", buf); q = buf; do { - q = split_str(q, " ", u, SMALLBUFSIZE-1); + q = split_str(q, " ", v, sizeof(v)-1); + + //printf("v: %s\n", v); + + p = v; - p = u; do { start = strstr(p, "=?"); if(start){ - if(start != p){ - *start = '\0'; - strncat(puf, p, MAXBUFSIZE-1); - *start = '='; + *start = '\0'; + if(strlen(p) > 0){ + //printf("flushed, no decode: *%s*\n", p); + strncat(puf, p, sizeof(puf)-1); } - /* find the trailing '?=' sequence */ + start++; - end = strrchr(p, '?'); r = strrchr(p, '='); + s = NULL; + sb = strcasestr(start, "?B?"); if(sb) s = sb; + sq = strcasestr(start, "?Q?"); if(sq) s = sq; - if(end && r && r == end+1){ - *end = '\0'; - p = end + 2; + if(s){ + end = strstr(s+3, "?="); + if(end){ + *end = '\0'; + //printf("ez az: *%s*\n", s+3); + if(sb){ decodeBase64(s+3); } + if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } } - s = NULL; - if((s = strcasestr(start+2, "?B?"))){ - *s = '\0'; - decodeBase64(s+3); + //printf("dekodolva: *%s*\n", s+3); + + //printf("start: %s\n", start+1); + if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3); + + strncat(puf, s+3, sizeof(puf)-1); + + p = end + 2; + //printf("maradek: +%s+\n", p); } - else if((s = strcasestr(start+2, "?Q?"))){ - *s = '\0'; - decodeQP(s+3); - } - - if(s && strncasecmp(start, "=?utf-8", 5) == 0){ - decodeUTF8(s+3); - } - - if(s) strncat(puf, s+3, MAXBUFSIZE-1); } else { - start = NULL; + //printf("aaaa: *%s*\n", start); + strncat(puf, start, sizeof(puf)-1); + + break; } } - - if(!start){ - strncat(puf, p, MAXBUFSIZE-1); + else { + //printf("keiene dekod: +%s+\n", p); + strncat(puf, p, sizeof(puf)-1); + break; } - } while(start); + } while(p); - strncat(puf, " ", MAXBUFSIZE-1); + if(q) strncat(puf, " ", sizeof(puf)-1); } while(q); + //printf("=> *%s*\n", puf); + snprintf(buf, MAXBUFSIZE-1, "%s", puf); } @@ -416,7 +429,7 @@ void translateLine(unsigned char *p, struct _state *state){ if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; } - if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_') ){ continue; } + if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; } if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; } diff --git a/util/db-mysql.sql b/util/db-mysql.sql index 059466af..591630ac 100644 --- a/util/db-mysql.sql +++ b/util/db-mysql.sql @@ -15,12 +15,12 @@ create table `sph_index` ( `sent` int not null, `body` text, `size` int default '0', + `attachments` int default 0, `piler_id` char(36) not null, - `header_id` char(16) default null, - `body_id` char(16) default null, primary key (`id`) ) Engine=InnoDB; + drop table if exists `metadata`; create table `metadata` ( `id` bigint unsigned not null auto_increment, @@ -49,7 +49,8 @@ create table `attachment` ( `id` bigint unsigned not null auto_increment, `piler_id` char(36) not null, `attachment_id` int not null, - `type` char(64) default null, + `name` char(64) default null, + `type` char(72) default null, `sig` char(64) not null, `size` int default 0, `ptr` int default 0,