From 67b6b44839da890f742ff29060536259663aed20 Mon Sep 17 00:00:00 2001 From: SJ Date: Mon, 5 Dec 2011 17:18:03 +0100 Subject: [PATCH] minor fixes --- etc/sphinx.conf | 21 +++++++++++++++++++++ src/message.c | 3 +++ src/parser.c | 2 -- src/parser.h | 1 + src/parser_utils.c | 23 ++++++++--------------- util/db-mysql.sql | 24 +++++++++++++++++------- 6 files changed, 50 insertions(+), 24 deletions(-) diff --git a/etc/sphinx.conf b/etc/sphinx.conf index 93f7f8ed..7962be82 100644 --- a/etc/sphinx.conf +++ b/etc/sphinx.conf @@ -41,6 +41,19 @@ source delta } +source tag +{ + type = mysql + sql_host = localhost + sql_db = sphinx + sql_user = sphinx + sql_pass = sphinx + + sql_query_pre = SET NAMES utf8 + sql_query = SELECT id, `tag` FROM `tag` + +} + index main1 { @@ -60,6 +73,14 @@ index delta1 } +index tag1 +{ + source = tag + path = /var/data/tag1 + docinfo = extern + charset_type = utf-8 +} + indexer { diff --git a/src/message.c b/src/message.c index 9e7fb9f4..6bf0714b 100644 --- a/src/message.c +++ b/src/message.c @@ -140,6 +140,9 @@ int store_index_data(struct session_data *sdata, struct _state *state, uint64 id } + fix_email_address_for_sphinx(state->b_from); + fix_email_address_for_sphinx(state->b_to); + memset(bind, 0, sizeof(bind)); diff --git a/src/parser.c b/src/parser.c index e17abab7..e0412672 100644 --- a/src/parser.c +++ b/src/parser.c @@ -51,8 +51,6 @@ struct _state parse_message(struct session_data *sdata, struct __config *cfg){ trimBuffer(state.b_subject); fixupEncodedHeaderLine(state.b_subject); - state.message_state = MSG_SUBJECT; - translateLine((unsigned char*)&state.b_subject, &state); for(i=1; i<=state.n_attachments; i++){ diff --git a/src/parser.h b/src/parser.h index e4ffd47d..156d7fc2 100644 --- a/src/parser.h +++ b/src/parser.h @@ -22,6 +22,7 @@ void fixupBase64EncodedLine(char *buf, struct _state *state); void markHTML(char *buf, struct _state *state); int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state); void translateLine(unsigned char *p, struct _state *state); +void fix_email_address_for_sphinx(char *s); void reassembleToken(char *p); void degenerateToken(unsigned char *p); void fixURL(char *url); diff --git a/src/parser_utils.c b/src/parser_utils.c index 14ba5aea..fd6df171 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -203,15 +203,11 @@ void fixupEncodedHeaderLine(char *buf){ memset(puf, 0, sizeof(puf)); - //printf("hdr: *%s*\n", buf); - q = buf; do { q = split_str(q, " ", v, sizeof(v)-1); - //printf("v: %s\n", v); - p = v; do { @@ -219,7 +215,6 @@ void fixupEncodedHeaderLine(char *buf){ if(start){ *start = '\0'; if(strlen(p) > 0){ - //printf("flushed, no decode: *%s*\n", p); strncat(puf, p, sizeof(puf)-1); } @@ -233,31 +228,24 @@ void fixupEncodedHeaderLine(char *buf){ end = strstr(s+3, "?="); if(end){ *end = '\0'; - //printf("ez az: *%s*\n", s+3); + if(sb){ decodeBase64(s+3); } if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } } - - //printf("dekodolva: *%s*\n", s+3); - - //printf("start: %s\n", start+1); if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3); strncat(puf, s+3, sizeof(puf)-1); p = end + 2; - //printf("maradek: +%s+\n", p); } } else { - //printf("aaaa: *%s*\n", start); strncat(puf, start, sizeof(puf)-1); break; } } else { - //printf("keiene dekod: +%s+\n", p); strncat(puf, p, sizeof(puf)-1); break; } @@ -268,8 +256,6 @@ void fixupEncodedHeaderLine(char *buf){ } while(q); - //printf("=> *%s*\n", puf); - snprintf(buf, MAXBUFSIZE-1, "%s", puf); } @@ -464,6 +450,13 @@ void translateLine(unsigned char *p, struct _state *state){ } +void fix_email_address_for_sphinx(char *s){ + for(; *s; s++){ + if(*s == '@' || *s == '.' || *s == '+') *s = 'X'; + } +} + + /* * reassemble 'V i a g r a' to 'Viagra' */ diff --git a/util/db-mysql.sql b/util/db-mysql.sql index a5a7a539..38156d76 100644 --- a/util/db-mysql.sql +++ b/util/db-mysql.sql @@ -1,12 +1,12 @@ drop table if exists `sph_counter`; -create table `sph_counter` ( +create table if not exists `sph_counter` ( `counter_id` int not null, `max_doc_id` int not null, primary key (`counter_id`) ); drop table if exists `sph_index`; -create table `sph_index` ( +create table if not exists `sph_index` ( `id` bigint not null, `from` char(255) default null, `to` text(512) default null, @@ -21,7 +21,7 @@ create table `sph_index` ( drop table if exists `metadata`; -create table `metadata` ( +create table if not exists `metadata` ( `id` bigint unsigned not null auto_increment, `from` char(255) not null, `subject` text(512) default null, @@ -44,7 +44,7 @@ create index metadata_idx3 on metadata(`bodydigest`); drop table if exists `rcpt`; -create table `rcpt` ( +create table if not exists `rcpt` ( `id` bigint unsigned not null, `to` char(64) not null, unique(`id`,`to`) @@ -54,8 +54,11 @@ create index `rcpt_idx` on `rcpt`(`id`); create index `rcpt_idx2` on `rcpt`(`to`); +drop view if exists `messages`; +create view `messages` AS select `metadata`.`id` AS `id`,`metadata`.`piler_id` AS `piler_id`,`metadata`.`from` AS `from`,`rcpt`.`to` AS `to`,`metadata`.`subject` AS `subject` from (`metadata` join `rcpt`) where (`metadata`.`id` = `rcpt`.`id`); + drop table if exists `attachment`; -create table `attachment` ( +create table if not exists `attachment` ( `id` bigint unsigned not null auto_increment, `piler_id` char(36) not null, `attachment_id` int not null, @@ -71,8 +74,15 @@ create index `attachment_idx` on `attachment`(`piler_id`); create index `attachment_idx2` on `attachment`(`sig`); +drop table if exists `tag`; +create table if not exists `tag` ( + `id` bigint not null unique, + `tag` char(255) default null +); + + drop table if exists `archiving_rule`; -create table `archiving_rule` ( +create table if not exists `archiving_rule` ( `id` bigint unsigned not null auto_increment, `from` char(128) default null, `to` char(255) default null, @@ -95,7 +105,7 @@ create table if not exists `counter` ( insert into `counter` values(0, 0, 0); drop table if exists `search`; -create table `search` ( +create table if not exists `search` ( `email` char(128) not null, `ts` int default 0, `term` text(512) not null