added min_word_len option to discard/skip too short words in email body

This commit is contained in:
SJ 2012-08-22 08:15:53 +02:00
parent 7e8c7f12b6
commit 572d258c2a
4 changed files with 6 additions and 2 deletions

View File

@ -58,7 +58,8 @@ piler_header_field=X-piler: piler already archived this email
; to messages without message-id.
archive_emails_not_having_message_id=0
; minimum word length in mail body to index
min_word_len=1
; comma separated list of your domains. piler uses this information to determine
; the direction of the given email

View File

@ -75,6 +75,7 @@ struct _parse_rule config_parse_rules[] =
{ "memcached_servers", "string", (void*) string_parser, offsetof(struct __config, memcached_servers), "127.0.0.1", MAXVAL-1},
{ "memcached_to_db_interval", "integer", (void*) int_parser, offsetof(struct __config, memcached_to_db_interval), "900", sizeof(int)},
{ "memcached_ttl", "integer", (void*) int_parser, offsetof(struct __config, memcached_ttl), "86400", sizeof(int)},
{ "min_word_len", "integer", (void*) int_parser, offsetof(struct __config, min_word_len), "1", sizeof(int)},
{ "mydomains", "string", (void*) string_parser, offsetof(struct __config, mydomains), "", MAXVAL-1},
{ "mysqlhost", "string", (void*) string_parser, offsetof(struct __config, mysqlhost), "", MAXVAL-1},
{ "mysqlport", "integer", (void*) int_parser, offsetof(struct __config, mysqlport), "", sizeof(int)},

View File

@ -64,6 +64,8 @@ struct __config {
int archive_emails_not_having_message_id;
int min_word_len;
int debug;
};

View File

@ -534,7 +534,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
}
}
}
else if(state->message_state == MSG_BODY && state->bodylen < BIGBUFSIZE-len-1){
else if(state->message_state == MSG_BODY && len >= cfg->min_word_len && state->bodylen < BIGBUFSIZE-len-1){
memcpy(&(state->b_body[state->bodylen]), puf, len);
state->bodylen += len;
}