better duplicate detection

This commit is contained in:
SJ 2014-04-25 21:17:01 +02:00
parent 89ba24c86b
commit a21f5a68e1
8 changed files with 17 additions and 4 deletions

View File

@ -14,7 +14,7 @@
#define VERSION "0.1.25-master-branch" #define VERSION "0.1.25-master-branch"
#define BUILD 867 #define BUILD 868
#define HOSTID "mailarchiver" #define HOSTID "mailarchiver"

View File

@ -148,6 +148,7 @@ struct _state {
int abufpos; int abufpos;
char attachedfile[RND_STR_LEN+SMALLBUFSIZE]; char attachedfile[RND_STR_LEN+SMALLBUFSIZE];
char message_id[SMALLBUFSIZE]; char message_id[SMALLBUFSIZE];
char message_id_hash[2*DIGEST_LENGTH+1];
char miscbuf[MAX_TOKEN_LEN]; char miscbuf[MAX_TOKEN_LEN];
char qpbuf[MAX_TOKEN_LEN]; char qpbuf[MAX_TOKEN_LEN];
unsigned long n_token; unsigned long n_token;

View File

@ -98,6 +98,7 @@ int import_message(char *filename, struct session_data *sdata, struct __data *da
} }
rc = process_message(sdata, &state, data, cfg); rc = process_message(sdata, &state, data, cfg);
unlink(state.message_id_hash);
} }
unlink(sdata->tmpframe); unlink(sdata->tmpframe);

View File

@ -240,7 +240,7 @@ void remove_stripped_attachments(struct _state *state){
int process_message(struct session_data *sdata, struct _state *state, struct __data *data, struct __config *cfg){ int process_message(struct session_data *sdata, struct _state *state, struct __data *data, struct __config *cfg){
int rc; int rc, fd;
/* discard if existing message_id */ /* discard if existing message_id */
@ -257,6 +257,12 @@ int process_message(struct session_data *sdata, struct _state *state, struct __d
return ERR_EXISTS; return ERR_EXISTS;
} }
fd = open(state->message_id_hash, O_CREAT|O_EXCL|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR);
if(fd == -1){
remove_stripped_attachments(state);
return ERR_EXISTS;
}
/* store base64 encoded file attachments */ /* store base64 encoded file attachments */

View File

@ -145,6 +145,9 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi
else snprintf(state->message_id, SMALLBUFSIZE-1, "null"); else snprintf(state->message_id, SMALLBUFSIZE-1, "null");
} }
digest_string(state->message_id, &(state->message_id_hash[0]));
} }

View File

@ -48,6 +48,7 @@ void init_state(struct _state *state){
state->content_type_is_set = 0; state->content_type_is_set = 0;
memset(state->message_id, 0, SMALLBUFSIZE); memset(state->message_id, 0, SMALLBUFSIZE);
memset(state->message_id_hash, 0, 2*DIGEST_LENGTH+1);
memset(state->miscbuf, 0, MAX_TOKEN_LEN); memset(state->miscbuf, 0, MAX_TOKEN_LEN);
memset(state->qpbuf, 0, MAX_TOKEN_LEN); memset(state->qpbuf, 0, MAX_TOKEN_LEN);

View File

@ -231,6 +231,7 @@ int handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){
} }
else { else {
inj = process_message(&sdata, &sstate, data, cfg); inj = process_message(&sdata, &sstate, data, cfg);
unlink(sstate.message_id_hash);
counters.c_size += sdata.tot_len; counters.c_size += sdata.tot_len;
} }
@ -258,7 +259,7 @@ int handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){
counters.c_rcvd++; counters.c_rcvd++;
if(inj == ERR_EXISTS){ if(inj == ERR_EXISTS){
syslog(LOG_PRIORITY, "%s: discarding: duplicate message, id: %llu", sdata.ttmpfile, sdata.duplicate_id); syslog(LOG_PRIORITY, "%s: discarding: duplicate message, id: %llu, message-id: %s", sdata.ttmpfile, sdata.duplicate_id, sstate.message_id);
counters.c_duplicate++; counters.c_duplicate++;
status = S_STATUS_DUPLICATE; status = S_STATUS_DUPLICATE;
} }

View File

@ -78,7 +78,7 @@ int main(int argc, char **argv){
printf("post parsing...\n"); printf("post parsing...\n");
post_parse(&sdata, &state, &cfg); post_parse(&sdata, &state, &cfg);
printf("message-id: %s\n", state.message_id); printf("message-id: %s / %s\n", state.message_id, state.message_id_hash);
printf("from: *%s (%s)*\n", state.b_from, state.b_from_domain); printf("from: *%s (%s)*\n", state.b_from, state.b_from_domain);
printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain); printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain);
printf("reference: *%s*\n", state.reference); printf("reference: *%s*\n", state.reference);