parser fixes

This commit is contained in:
SJ 2012-01-28 20:52:13 +01:00
parent a7df7986ae
commit b4d13833b8
6 changed files with 10 additions and 7 deletions

View File

@ -11,7 +11,7 @@ source main
sql_pass = sphinx sql_pass = sphinx
sql_query_pre = SET NAMES utf8 sql_query_pre = SET NAMES utf8
sql_query = SELECT id, `from`, `to`, `fromdomain`, `todomain`, `subject`, `arrived`, `sent`, `body`, `size`, `direction`, `attachments` FROM sph_index \ sql_query = SELECT id, `from`, `to`, `fromdomain`, `todomain`, `subject`, `arrived`, `sent`, `body`, `size`, `direction`, `attachments`, `attachment_types` FROM sph_index \
WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 ) WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
sql_attr_uint = size sql_attr_uint = size
@ -33,7 +33,7 @@ source delta
sql_query_pre = SET NAMES utf8 sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sph_index sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sph_index
sql_query_post_index = DELETE FROM sph_index WHERE id<=(SELECT max_doc_id FROM sph_counter WHERE counter_id=1) sql_query_post_index = DELETE FROM sph_index WHERE id<=(SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_query = SELECT id, `from`, `to`, `fromdomain`, `todomain`, `subject`, `arrived`, `sent`, `body`, `size`, `direction`, `attachments` FROM sph_index \ sql_query = SELECT id, `from`, `to`, `fromdomain`, `todomain`, `subject`, `arrived`, `sent`, `body`, `size`, `direction`, `attachments`, `attachment_types` FROM sph_index \
WHERE id <= (SELECT max_doc_id FROM sph_counter WHERE counter_id=1) WHERE id <= (SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_attr_uint = size sql_attr_uint = size

View File

@ -11,7 +11,7 @@
#define PROGNAME "piler" #define PROGNAME "piler"
#define VERSION "0.1.12" #define VERSION "0.1.13"
#define PROGINFO VERSION ", Janos SUTO <sj@acts.hu>\n\n" CONFIGURE_PARAMS "\n" #define PROGINFO VERSION ", Janos SUTO <sj@acts.hu>\n\n" CONFIGURE_PARAMS "\n"

View File

@ -398,7 +398,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf); if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
if(state->is_header == 0 && strncmp(puf, "URL*", 4) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue; if(state->is_header == 0 && strncmp(puf, "__URL__", 7) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue;
len = strlen(puf); len = strlen(puf);

View File

@ -496,7 +496,7 @@ void translateLine(unsigned char *p, struct _state *state){
void fix_email_address_for_sphinx(char *s){ void fix_email_address_for_sphinx(char *s){
for(; *s; s++){ for(; *s; s++){
if(*s == '@' || *s == '.' || *s == '+') *s = 'X'; if(*s == '@' || *s == '.' || *s == '+' || *s == '-' || *s == '_') *s = 'X';
} }
} }
@ -589,7 +589,7 @@ void fixURL(char *url){
if(q) *q = '\0'; if(q) *q = '\0';
snprintf(fixed_url, sizeof(fixed_url)-1, "__URL__%s ", p); snprintf(fixed_url, sizeof(fixed_url)-1, "__URL__%s ", p);
fix_email_address_for_sphinx(fixed_url); fix_email_address_for_sphinx(fixed_url+7);
strcpy(url, fixed_url); strcpy(url, fixed_url);
} }

View File

@ -147,6 +147,8 @@ int check_attachment_rule(struct _state *state, struct rule *rule){
int i; int i;
size_t nmatch=0; size_t nmatch=0;
if(state->n_attachments == 0) return 1;
for(i=1; i<=state->n_attachments; i++){ for(i=1; i<=state->n_attachments; i++){
if( if(
regexec(&(rule->attachment_type), state->attachments[i].type, nmatch, NULL, 0) == 0 && regexec(&(rule->attachment_type), state->attachments[i].type, nmatch, NULL, 0) == 0 &&

View File

@ -110,7 +110,8 @@ create table if not exists `archiving_rule` (
`attachment_type` char(128) default null, `attachment_type` char(128) default null,
`_attachment_size` char(2) default null, `_attachment_size` char(2) default null,
`attachment_size` int default 0, `attachment_size` int default 0,
primary key (`id`) primary key (`id`),
unique(`from`,`to`,`subject`,`_size`,`size`,`attachment_type`,`_attachment_size`,`attachment_size`)
) ENGINE=InnoDB; ) ENGINE=InnoDB;