From 572d258c2aafbcf2dfaae5490316bb1cfaa80627 Mon Sep 17 00:00:00 2001 From: SJ Date: Wed, 22 Aug 2012 08:15:53 +0200 Subject: [PATCH] added min_word_len option to discard/skip too short words in email body --- etc/example.conf | 3 ++- src/cfg.c | 1 + src/cfg.h | 2 ++ src/parser.c | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/etc/example.conf b/etc/example.conf index 6af079a1..0ed00ceb 100644 --- a/etc/example.conf +++ b/etc/example.conf @@ -58,7 +58,8 @@ piler_header_field=X-piler: piler already archived this email ; to messages without message-id. archive_emails_not_having_message_id=0 - +; minimum word length in mail body to index +min_word_len=1 ; comma separated list of your domains. piler uses this information to determine ; the direction of the given email diff --git a/src/cfg.c b/src/cfg.c index 4502b554..53b95844 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -75,6 +75,7 @@ struct _parse_rule config_parse_rules[] = { "memcached_servers", "string", (void*) string_parser, offsetof(struct __config, memcached_servers), "127.0.0.1", MAXVAL-1}, { "memcached_to_db_interval", "integer", (void*) int_parser, offsetof(struct __config, memcached_to_db_interval), "900", sizeof(int)}, { "memcached_ttl", "integer", (void*) int_parser, offsetof(struct __config, memcached_ttl), "86400", sizeof(int)}, + { "min_word_len", "integer", (void*) int_parser, offsetof(struct __config, min_word_len), "1", sizeof(int)}, { "mydomains", "string", (void*) string_parser, offsetof(struct __config, mydomains), "", MAXVAL-1}, { "mysqlhost", "string", (void*) string_parser, offsetof(struct __config, mysqlhost), "", MAXVAL-1}, { "mysqlport", "integer", (void*) int_parser, offsetof(struct __config, mysqlport), "", sizeof(int)}, diff --git a/src/cfg.h b/src/cfg.h index 24940e65..52e4f7c2 100644 --- a/src/cfg.h +++ b/src/cfg.h @@ -64,6 +64,8 @@ struct __config { int archive_emails_not_having_message_id; + int min_word_len; + int debug; }; diff --git a/src/parser.c b/src/parser.c index 0d208b63..eb3c8d68 100644 --- a/src/parser.c +++ b/src/parser.c @@ -534,7 +534,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int } } } - else if(state->message_state == MSG_BODY && state->bodylen < BIGBUFSIZE-len-1){ + else if(state->message_state == MSG_BODY && len >= cfg->min_word_len && state->bodylen < BIGBUFSIZE-len-1){ memcpy(&(state->b_body[state->bodylen]), puf, len); state->bodylen += len; }