better duplicate detection

This commit is contained in:
SJ 2014-04-25 21:17:01 +02:00
parent 89ba24c86b
commit a21f5a68e1
8 changed files with 17 additions and 4 deletions

View File

@ -14,7 +14,7 @@
#define VERSION "0.1.25-master-branch"
#define BUILD 867
#define BUILD 868
#define HOSTID "mailarchiver"

View File

@ -148,6 +148,7 @@ struct _state {
int abufpos;
char attachedfile[RND_STR_LEN+SMALLBUFSIZE];
char message_id[SMALLBUFSIZE];
char message_id_hash[2*DIGEST_LENGTH+1];
char miscbuf[MAX_TOKEN_LEN];
char qpbuf[MAX_TOKEN_LEN];
unsigned long n_token;

View File

@ -98,6 +98,7 @@ int import_message(char *filename, struct session_data *sdata, struct __data *da
}
rc = process_message(sdata, &state, data, cfg);
unlink(state.message_id_hash);
}
unlink(sdata->tmpframe);

View File

@ -240,7 +240,7 @@ void remove_stripped_attachments(struct _state *state){
int process_message(struct session_data *sdata, struct _state *state, struct __data *data, struct __config *cfg){
int rc;
int rc, fd;
/* discard if existing message_id */
@ -257,6 +257,12 @@ int process_message(struct session_data *sdata, struct _state *state, struct __d
return ERR_EXISTS;
}
fd = open(state->message_id_hash, O_CREAT|O_EXCL|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR);
if(fd == -1){
remove_stripped_attachments(state);
return ERR_EXISTS;
}
/* store base64 encoded file attachments */

View File

@ -145,6 +145,9 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi
else snprintf(state->message_id, SMALLBUFSIZE-1, "null");
}
digest_string(state->message_id, &(state->message_id_hash[0]));
}

View File

@ -48,6 +48,7 @@ void init_state(struct _state *state){
state->content_type_is_set = 0;
memset(state->message_id, 0, SMALLBUFSIZE);
memset(state->message_id_hash, 0, 2*DIGEST_LENGTH+1);
memset(state->miscbuf, 0, MAX_TOKEN_LEN);
memset(state->qpbuf, 0, MAX_TOKEN_LEN);

View File

@ -231,6 +231,7 @@ int handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){
}
else {
inj = process_message(&sdata, &sstate, data, cfg);
unlink(sstate.message_id_hash);
counters.c_size += sdata.tot_len;
}
@ -258,7 +259,7 @@ int handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){
counters.c_rcvd++;
if(inj == ERR_EXISTS){
syslog(LOG_PRIORITY, "%s: discarding: duplicate message, id: %llu", sdata.ttmpfile, sdata.duplicate_id);
syslog(LOG_PRIORITY, "%s: discarding: duplicate message, id: %llu, message-id: %s", sdata.ttmpfile, sdata.duplicate_id, sstate.message_id);
counters.c_duplicate++;
status = S_STATUS_DUPLICATE;
}

View File

@ -78,7 +78,7 @@ int main(int argc, char **argv){
printf("post parsing...\n");
post_parse(&sdata, &state, &cfg);
printf("message-id: %s\n", state.message_id);
printf("message-id: %s / %s\n", state.message_id, state.message_id_hash);
printf("from: *%s (%s)*\n", state.b_from, state.b_from_domain);
printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain);
printf("reference: *%s*\n", state.reference);