This commit is contained in:
SJ 2011-11-28 14:21:14 +01:00
parent 0f5ae4fb2d
commit b806c3548f
9 changed files with 131 additions and 98 deletions

View File

@ -11,14 +11,14 @@ source main
sql_pass = sphinx
sql_query_pre = SET NAMES utf8
sql_query = SELECT id, `from`, `to`, `subject`, `date`, `body`, `piler_id`, `header_id`, `body_id`, `size` FROM sph_index \
sql_query = SELECT id, `from`, `to`, `subject`, `arrived`, `sent`, `body`, `size`, `attachments`, `piler_id` FROM sph_index \
WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
sql_attr_uint = size
sql_attr_uint = date
sql_attr_uint = arrived
sql_attr_uint = sent
sql_attr_uint = attachments
sql_attr_string = piler_id
sql_attr_string = header_id
sql_attr_string = body_id
}
source delta
@ -32,14 +32,15 @@ source delta
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sph_index
sql_query_post_index = DELETE FROM sph_index WHERE id<=(SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_query = SELECT id, `from`, `to`, `subject`, `date`, `body`, `piler_id`, `header_id`, `body_id`, `size` FROM sph_index \
sql_query = SELECT id, `from`, `to`, `subject`, `arrived`, `sent`, `body`, `size`, `attachments`, `piler_id` FROM sph_index \
WHERE id <= (SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_attr_uint = size
sql_attr_uint = date
sql_attr_uint = arrived
sql_attr_uint = sent
sql_attr_uint = attachments
sql_attr_string = piler_id
sql_attr_string = header_id
sql_attr_string = body_id
}
@ -64,7 +65,7 @@ index delta1
indexer
{
mem_limit = 32M
mem_limit = 64M
}

View File

@ -24,8 +24,8 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _
MYSQL_ROW row;
MYSQL_STMT *stmt;
MYSQL_BIND bind[6];
unsigned long len[6];
MYSQL_BIND bind[7];
unsigned long len[7];
stmt = mysql_stmt_init(&(sdata->mysql));
@ -34,7 +34,7 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _
return 1;
}
snprintf(s, sizeof(s)-1, "INSERT INTO %s (`piler_id`,`attachment_id`,`sig`,`type`,`size`,`ptr`) VALUES(?,?,?,?,?,?)", SQL_ATTACHMENT_TABLE);
snprintf(s, sizeof(s)-1, "INSERT INTO %s (`piler_id`,`attachment_id`,`sig`,`name`,`type`,`size`,`ptr`) VALUES(?,?,?,?,?,?,?)", SQL_ATTACHMENT_TABLE);
if(mysql_stmt_prepare(stmt, s, strlen(s))){
syslog(LOG_PRIORITY, "%s: %s.mysql_stmt_prepare() error: %s", sdata->ttmpfile, SQL_ATTACHMENT_TABLE, mysql_stmt_error(stmt));
@ -91,20 +91,25 @@ int store_attachments(struct session_data *sdata, struct _state *state, struct _
len[2] = strlen(state->attachments[i].digest); bind[2].length = &len[2];
bind[3].buffer_type = MYSQL_TYPE_STRING;
bind[3].buffer = state->attachments[i].type;
bind[3].buffer = state->attachments[i].filename;
bind[3].is_null = 0;
len[3] = strlen(state->attachments[i].digest); bind[3].length = &len[3];
len[3] = strlen(state->attachments[i].filename); bind[3].length = &len[3];
bind[4].buffer_type = MYSQL_TYPE_LONG;
bind[4].buffer = (char *)&(state->attachments[i].size);
bind[4].buffer_type = MYSQL_TYPE_STRING;
bind[4].buffer = state->attachments[i].type;
bind[4].is_null = 0;
bind[4].length = 0;
len[4] = strlen(state->attachments[i].digest); bind[4].length = &len[4];
bind[5].buffer_type = MYSQL_TYPE_LONGLONG;
bind[5].buffer = (char *)&id;
bind[5].buffer_type = MYSQL_TYPE_LONG;
bind[5].buffer = (char *)&(state->attachments[i].size);
bind[5].is_null = 0;
bind[5].length = 0;
bind[6].buffer_type = MYSQL_TYPE_LONGLONG;
bind[6].buffer = (char *)&id;
bind[6].is_null = 0;
bind[6].length = 0;
if(mysql_stmt_bind_param(stmt, bind)){
syslog(LOG_PRIORITY, "%s: %s.mysql_stmt_bind_param() error: %s", sdata->ttmpfile, SQL_ATTACHMENT_TABLE, mysql_stmt_error(stmt));

View File

@ -11,7 +11,7 @@
#define PROGNAME "piler"
#define VERSION "0.1.7"
#define VERSION "0.1.8"
#define PROGINFO VERSION ", Janos SUTO <sj@acts.hu>\n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n"

View File

@ -117,6 +117,7 @@ struct _state {
char type[TINYBUFSIZE];
struct list *boundaries;
struct list *rcpt;
int n_attachments;
struct attachment attachments[MAX_ATTACHMENTS];

View File

@ -17,7 +17,7 @@
int make_body_digest(struct session_data *sdata, struct __config *cfg){
int i=0, n, fd, hdr_len=0, offset=3;
char *body=NULL;
unsigned char buf[MAXBUFSIZE], md[DIGEST_LENGTH];
unsigned char buf[BIGBUFSIZE], md[DIGEST_LENGTH];
SHA256_CTX context;
//if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: digesting", sdata->ttmpfile);
@ -28,14 +28,14 @@ int make_body_digest(struct session_data *sdata, struct __config *cfg){
fd = open(sdata->ttmpfile, O_RDONLY);
if(fd == -1) return -1;
while((n = read(fd, buf, MAXBUFSIZE)) > 0){
while((n = read(fd, buf, sizeof(buf))) > 0){
body = (char *)&buf[0];
if(i == 0){
hdr_len = searchStringInBuffer(body, MAXBUFSIZE, "\n\r\n", 3);
hdr_len = searchStringInBuffer(body, sizeof(buf), "\n\r\n", 3);
if(hdr_len == 0){
searchStringInBuffer(body, 2*MAXBUFSIZE+1, "\n\n", 2);
searchStringInBuffer(body, sizeof(buf), "\n\n", 2);
offset = 2;
}
@ -80,7 +80,7 @@ void digest_file(char *filename, char *digest){
SHA256_Init(&context);
while((n = read(fd, buf, MAXBUFSIZE)) > 0){
while((n = read(fd, buf, sizeof(buf))) > 0){
SHA256_Update(&context, buf, n);
}

View File

@ -116,9 +116,12 @@ int is_body_digest_already_stored(struct session_data *sdata, struct _state *sta
int hand_to_sphinx(struct session_data *sdata, struct _state *state, struct __config *cfg){
int rc;
char s[BIGBUFSIZE+2*MAXBUFSIZE];
char *subj, s[BIGBUFSIZE+2*MAXBUFSIZE];
snprintf(s, sizeof(s)-1, "INSERT INTO %s (`from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `piler_id`) values('%s','%s','%s','%s',%ld,%ld,%d,'%s')", SQL_SPHINX_TABLE, state->b_from, state->b_to, state->b_subject, state->b_body, sdata->now, sdata->sent, sdata->tot_len, sdata->ttmpfile);
subj = state->b_subject;
if(*subj == ' ') subj++;
snprintf(s, sizeof(s)-1, "INSERT INTO %s (`from`, `to`, `subject`, `body`, `arrived`, `sent`, `size`, `attachments`, `piler_id`) values('%s','%s','%s','%s',%ld,%ld,%d,%d,'%s')", SQL_SPHINX_TABLE, state->b_from, state->b_to, subj, state->b_body, sdata->now, sdata->sent, sdata->tot_len, state->n_attachments, sdata->ttmpfile);
rc = mysql_real_query(&(sdata->mysql), s, strlen(s));
@ -132,8 +135,7 @@ int hand_to_sphinx(struct session_data *sdata, struct _state *state, struct __co
int store_meta_data(struct session_data *sdata, struct _state *state, struct __config *cfg){
int i=0, rc, ret=ERR;
char *p, s[MAXBUFSIZE], s2[SMALLBUFSIZE];
struct list *list = NULL;
char *p, *subj, s[MAXBUFSIZE], s2[SMALLBUFSIZE];
MYSQL_STMT *stmt;
MYSQL_BIND bind[4];
@ -145,6 +147,9 @@ int store_meta_data(struct session_data *sdata, struct _state *state, struct __c
goto ENDE_META;
}
subj = state->b_subject;
if(*subj == ' ') subj++;
snprintf(s, MAXBUFSIZE-1, "INSERT INTO %s (`from`,`to`,`subject`,`arrived`,`sent`,`size`,`hlen`,`attachments`,`piler_id`,`message_id`,`digest`,`bodydigest`) VALUES(?,?,?,%ld,%ld,%d,%d,%d,'%s',?,'%s','%s')", SQL_METADATA_TABLE, sdata->now, sdata->sent, sdata->tot_len, sdata->hdr_len, state->n_attachments, sdata->ttmpfile, sdata->digest, sdata->bodydigest);
if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: meta sql: *%s*", sdata->ttmpfile, s);
@ -168,12 +173,8 @@ int store_meta_data(struct session_data *sdata, struct _state *state, struct __c
if(strlen(s2) > 5){
LABEL1:
if(is_string_on_list(list, s2) == 1) continue;
append_list(&list, s2);
i++;
memset(bind, 0, sizeof(bind));
bind[0].buffer_type = MYSQL_TYPE_STRING;
@ -187,9 +188,9 @@ LABEL1:
len[1] = strlen(s2); bind[1].length = &len[1];
bind[2].buffer_type = MYSQL_TYPE_STRING;
bind[2].buffer = state->b_subject;
bind[2].buffer = subj;
bind[2].is_null = 0;
len[2] = strlen(state->b_subject); bind[2].length = &len[2];
len[2] = strlen(subj); bind[2].length = &len[2];
bind[3].buffer_type = MYSQL_TYPE_STRING;
bind[3].buffer = state->message_id;
@ -221,7 +222,6 @@ LABEL1:
ENDE_META:
free_list(list);
return ret;
}

View File

@ -47,12 +47,18 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){
free_list(state.boundaries);
free_list(state.rcpt);
trimBuffer(state.b_subject);
fixupEncodedHeaderLine(state.b_subject);
translateLine((unsigned char*)&state.b_subject, &state);
for(i=1; i<=state.n_attachments; i++){
digest_file(state.attachments[i].internalname, &(state.attachments[i].digest[0]));
fixupEncodedHeaderLine(state.attachments[i].filename);
if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s", sdata->ttmpfile, i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
//printf("attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
}
@ -71,7 +77,7 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){
int parse_line(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg){
char *p, *r, puf[SMALLBUFSIZE];
char *p, puf[SMALLBUFSIZE];
int x, len, b64_len, boundary_line=0;
state->line_num++;
@ -199,6 +205,23 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
}
if(state->is_1st_header == 1 && state->message_state == MSG_SUBJECT && strlen(state->b_subject) + strlen(buf) < MAXBUFSIZE-1){
if(state->b_subject[0] == '\0'){
strncat(state->b_subject, buf+strlen("Subject:"), MAXBUFSIZE-1);
}
else {
p = strrchr(state->b_subject, ' ');
if(p && ( strcasestr(p+1, "?Q?") || strcasestr(p+1, "?B?") ) ){
strncat(state->b_subject, buf+1, MAXBUFSIZE-1);
}
else strncat(state->b_subject, buf, MAXBUFSIZE-1);
}
}
/* Content-type: checking */
@ -295,13 +318,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
/* end of boundary check */
if(state->is_header == 1){
/* skip irrelevant headers */
if(state->message_state != MSG_SUBJECT && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0;
if(state->message_state == MSG_SUBJECT) fixupEncodedHeaderLine(buf);
}
/* skip irrelevant headers */
if(state->is_header == 1 && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0;
/* don't process body if it's not a text or html part */
@ -334,6 +352,8 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
if(state->is_header == 1) p = strchr(buf, ' ');
else p = buf;
//printf("a: *%s*\n", buf);
do {
memset(puf, 0, sizeof(puf));
p = split(p, ' ', puf, sizeof(puf)-1);
@ -344,16 +364,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
if(puf[0] == '\0') continue;
if(state->message_state == MSG_SUBJECT){
r = &puf[0]; for(; *r; r++){ if(*r == '_') *r = ' '; }
}
if(state->qp == 1 && puf[strlen(puf)-1] == '='){
puf[strlen(puf)-1] = '\0';
}
else if(state->message_state != MSG_SUBJECT || (p && strchr(p, ' ')) ){
strncat(puf, " ", sizeof(puf)-1);
}
strncat(puf, " ", sizeof(puf)-1);
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
@ -362,15 +373,16 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
len = strlen(puf);
if(state->message_state == MSG_SUBJECT && state->is_1st_header == 1 && strlen(state->b_subject) < MAXBUFSIZE-len-1)
memcpy(&(state->b_subject[strlen(state->b_subject)]), puf, len);
else if(state->message_state == MSG_FROM && strchr(puf, '@') && state->is_1st_header == 1 && state->b_from[0] == '\0' && strlen(state->b_from) < SMALLBUFSIZE-len-1)
if(state->message_state == MSG_FROM && strchr(puf, '@') && state->is_1st_header == 1 && state->b_from[0] == '\0' && strlen(state->b_from) < SMALLBUFSIZE-len-1)
memcpy(&(state->b_from[strlen(state->b_from)]), puf, len);
else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1)
memcpy(&(state->b_to[strlen(state->b_to)]), puf, len);
else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1){
if(is_string_on_list(state->rcpt, puf) == 0){
append_list(&(state->rcpt), puf);
memcpy(&(state->b_to[strlen(state->b_to)]), puf, len);
}
}
else if(state->message_state == MSG_BODY && strlen(state->b_body) < BIGBUFSIZE-len-1)
memcpy(&(state->b_body[strlen(state->b_body)]), puf, len);

View File

@ -60,6 +60,7 @@ void init_state(struct _state *state){
state->saved_size = 0;
state->boundaries = NULL;
state->rcpt = NULL;
state->n_attachments = 0;
@ -197,66 +198,78 @@ int extract_boundary(char *p, struct _state *state){
void fixupEncodedHeaderLine(char *buf){
char *p, *q, *r, *s, u[SMALLBUFSIZE], puf[MAXBUFSIZE];
char *sb, *sq, *p, *q, *r, *s, v[SMALLBUFSIZE], puf[MAXBUFSIZE];
char *start, *end;
memset(puf, 0, MAXBUFSIZE);
memset(puf, 0, sizeof(puf));
//printf("hdr: *%s*\n", buf);
q = buf;
do {
q = split_str(q, " ", u, SMALLBUFSIZE-1);
q = split_str(q, " ", v, sizeof(v)-1);
//printf("v: %s\n", v);
p = v;
p = u;
do {
start = strstr(p, "=?");
if(start){
if(start != p){
*start = '\0';
strncat(puf, p, MAXBUFSIZE-1);
*start = '=';
*start = '\0';
if(strlen(p) > 0){
//printf("flushed, no decode: *%s*\n", p);
strncat(puf, p, sizeof(puf)-1);
}
/* find the trailing '?=' sequence */
start++;
end = strrchr(p, '?'); r = strrchr(p, '=');
s = NULL;
sb = strcasestr(start, "?B?"); if(sb) s = sb;
sq = strcasestr(start, "?Q?"); if(sq) s = sq;
if(end && r && r == end+1){
*end = '\0';
p = end + 2;
if(s){
end = strstr(s+3, "?=");
if(end){
*end = '\0';
//printf("ez az: *%s*\n", s+3);
if(sb){ decodeBase64(s+3); }
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
s = NULL;
if((s = strcasestr(start+2, "?B?"))){
*s = '\0';
decodeBase64(s+3);
//printf("dekodolva: *%s*\n", s+3);
//printf("start: %s\n", start+1);
if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3);
strncat(puf, s+3, sizeof(puf)-1);
p = end + 2;
//printf("maradek: +%s+\n", p);
}
else if((s = strcasestr(start+2, "?Q?"))){
*s = '\0';
decodeQP(s+3);
}
if(s && strncasecmp(start, "=?utf-8", 5) == 0){
decodeUTF8(s+3);
}
if(s) strncat(puf, s+3, MAXBUFSIZE-1);
}
else {
start = NULL;
//printf("aaaa: *%s*\n", start);
strncat(puf, start, sizeof(puf)-1);
break;
}
}
if(!start){
strncat(puf, p, MAXBUFSIZE-1);
else {
//printf("keiene dekod: +%s+\n", p);
strncat(puf, p, sizeof(puf)-1);
break;
}
} while(start);
} while(p);
strncat(puf, " ", MAXBUFSIZE-1);
if(q) strncat(puf, " ", sizeof(puf)-1);
} while(q);
//printf("=> *%s*\n", puf);
snprintf(buf, MAXBUFSIZE-1, "%s", puf);
}
@ -416,7 +429,7 @@ void translateLine(unsigned char *p, struct _state *state){
if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; }
if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_') ){ continue; }
if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; }
if(state->message_state == MSG_CONTENT_TYPE && *p == '_' ){ continue; }

View File

@ -15,12 +15,12 @@ create table `sph_index` (
`sent` int not null,
`body` text,
`size` int default '0',
`attachments` int default 0,
`piler_id` char(36) not null,
`header_id` char(16) default null,
`body_id` char(16) default null,
primary key (`id`)
) Engine=InnoDB;
drop table if exists `metadata`;
create table `metadata` (
`id` bigint unsigned not null auto_increment,
@ -49,7 +49,8 @@ create table `attachment` (
`id` bigint unsigned not null auto_increment,
`piler_id` char(36) not null,
`attachment_id` int not null,
`type` char(64) default null,
`name` char(64) default null,
`type` char(72) default null,
`sig` char(64) not null,
`size` int default 0,
`ptr` int default 0,