2011-11-14 15:57:52 +01:00
|
|
|
/*
|
|
|
|
* parser.c, SJ
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <piler.h>
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
struct _state parse_message(struct session_data *sdata, struct __config *cfg){
|
2011-11-14 15:57:52 +01:00
|
|
|
FILE *f;
|
|
|
|
char buf[MAXBUFSIZE];
|
|
|
|
struct _state state;
|
2011-11-19 21:25:44 +01:00
|
|
|
int i, len;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
init_state(&state);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
f = fopen(sdata->ttmpfile, "r");
|
|
|
|
if(!f){
|
|
|
|
syslog(LOG_PRIORITY, "%s: cannot open", sdata->ttmpfile);
|
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
|
|
|
|
state.mfd = open(sdata->tmpframe, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
|
|
|
if(state.mfd == -1){
|
|
|
|
syslog(LOG_PRIORITY, "%s: cannot open frame file: %s", sdata->ttmpfile, sdata->tmpframe);
|
|
|
|
return state;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
while(fgets(buf, sizeof(buf)-1, f)){
|
|
|
|
parse_line(buf, &state, sdata, cfg);
|
|
|
|
}
|
|
|
|
|
|
|
|
close(state.mfd); state.mfd = 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
fclose(f);
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
free_list(state.boundaries);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
for(i=1; i<=state.n_attachments; i++){
|
2011-11-19 21:25:44 +01:00
|
|
|
digest_file(state.attachments[i].internalname, &(state.attachments[i].digest[0]));
|
2011-11-22 12:31:54 +01:00
|
|
|
if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s", sdata->ttmpfile, i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
|
|
|
|
//printf("attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
if(state.message_id[0] == 0) snprintf(state.message_id, SMALLBUFSIZE-1, "null");
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
len = strlen(state.b_from);
|
|
|
|
if(state.b_from[len-1] == ' ') state.b_from[len-1] = '\0';
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
len = strlen(state.b_to);
|
|
|
|
if(state.b_to[len-1] == ' ') state.b_to[len-1] = '\0';
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
syslog(LOG_PRIORITY, "%s: from=%s, to=%s, subj=%s, message-id=%s", sdata->ttmpfile, state.b_from, state.b_to, state.b_subject, state.message_id);
|
|
|
|
|
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
int parse_line(char *buf, struct _state *state, struct session_data *sdata, struct __config *cfg){
|
|
|
|
char *p, *r, puf[SMALLBUFSIZE];
|
|
|
|
int x, len, b64_len, boundary_line=0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
state->line_num++;
|
2011-11-19 21:25:44 +01:00
|
|
|
len = strlen(buf);
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
//printf("buf: %s", buf);
|
|
|
|
|
|
|
|
if(state->message_rfc822 == 0 && (buf[0] == '\r' || buf[0] == '\n') ){
|
|
|
|
state->message_state = MSG_BODY;
|
|
|
|
|
|
|
|
if(state->is_header == 1) state->is_header = 0;
|
|
|
|
state->is_1st_header = 0;
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
|
|
|
|
if(state->message_state == MSG_BODY && state->fd != -1 && is_item_on_string(state->boundaries, buf) == 0){
|
2011-11-19 21:25:44 +01:00
|
|
|
//printf("dumping: %s", buf);
|
|
|
|
write(state->fd, buf, len);
|
|
|
|
state->attachments[state->n_attachments].size += len;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
state->saved_size += len;
|
|
|
|
//printf("%s", buf);
|
|
|
|
write(state->mfd, buf, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
if(state->message_state == MSG_BODY && state->has_to_dump == 1 && state->pushed_pointer == 0){
|
|
|
|
//printf("####name: %s, type: %s, base64: %d\n", state->filename, state->type, state->base64);
|
|
|
|
|
|
|
|
state->pushed_pointer = 1;
|
|
|
|
|
|
|
|
|
|
|
|
// this is a real attachment to dump
|
|
|
|
if(state->base64 == 1 && strlen(state->filename) > 5 && strlen(state->type) > 3 && state->n_attachments < MAX_ATTACHMENTS-1){
|
|
|
|
state->n_attachments++;
|
|
|
|
|
|
|
|
snprintf(state->attachments[state->n_attachments].filename, TINYBUFSIZE-1, "%s", state->filename);
|
|
|
|
snprintf(state->attachments[state->n_attachments].type, TINYBUFSIZE-1, "%s", state->type);
|
|
|
|
snprintf(state->attachments[state->n_attachments].internalname, TINYBUFSIZE-1, "%s.a%d", sdata->ttmpfile, state->n_attachments);
|
|
|
|
|
|
|
|
//printf("DUMP FILE: %s\n", state->attachments[state->n_attachments].internalname);
|
|
|
|
state->fd = open(state->attachments[state->n_attachments].internalname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
|
|
|
|
|
|
|
snprintf(puf, sizeof(puf)-1, "ATTACHMENT_POINTER_%s.a%d", sdata->ttmpfile, state->n_attachments);
|
|
|
|
write(state->mfd, puf, strlen(puf));
|
|
|
|
//printf("%s", puf);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
state->has_to_dump = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(*buf == '.' && *(buf+1) == '.') buf++;
|
|
|
|
|
|
|
|
/* undefined message state */
|
|
|
|
if(state->is_header == 1 && buf[0] != ' ' && buf[0] != '\t' && strchr(buf, ':')) state->message_state = MSG_UNDEF;
|
|
|
|
|
|
|
|
/* skip empty lines */
|
|
|
|
|
|
|
|
if(state->message_rfc822 == 0 && (buf[0] == '\r' || buf[0] == '\n') ){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
trimBuffer(buf);
|
|
|
|
|
|
|
|
|
|
|
|
/* skip the first line, if it's a "From <email address> date" format */
|
|
|
|
if(state->line_num == 1 && strncmp(buf, "From ", 5) == 0) return 0;
|
|
|
|
|
|
|
|
if(state->is_header == 0 && buf[0] != ' ' && buf[0] != '\t') state->message_state = MSG_BODY;
|
|
|
|
|
|
|
|
if((state->content_type_is_set == 0 || state->is_header == 1) && strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0) state->message_state = MSG_CONTENT_TYPE;
|
|
|
|
else if(strncasecmp(buf, "Content-Transfer-Encoding:", strlen("Content-Transfer-Encoding:")) == 0) state->message_state = MSG_CONTENT_TRANSFER_ENCODING;
|
|
|
|
else if(strncasecmp(buf, "Content-Disposition:", strlen("Content-Disposition:")) == 0) state->message_state = MSG_CONTENT_DISPOSITION;
|
|
|
|
|
|
|
|
|
|
|
|
if(state->message_state == MSG_CONTENT_TYPE || state->message_state == MSG_CONTENT_TRANSFER_ENCODING) state->is_header = 1;
|
|
|
|
|
|
|
|
|
|
|
|
/* header checks */
|
|
|
|
|
|
|
|
if(state->is_header == 1){
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(strncasecmp(buf, "From:", strlen("From:")) == 0) state->message_state = MSG_FROM;
|
2011-11-14 15:57:52 +01:00
|
|
|
else if(strncasecmp(buf, "To:", 3) == 0) state->message_state = MSG_TO;
|
|
|
|
else if(strncasecmp(buf, "Cc:", 3) == 0) state->message_state = MSG_CC;
|
|
|
|
else if(strncasecmp(buf, "Message-Id:", 11) == 0) state->message_state = MSG_MESSAGE_ID;
|
|
|
|
else if(strncasecmp(buf, "Subject:", strlen("Subject:")) == 0) state->message_state = MSG_SUBJECT;
|
|
|
|
else if(strncasecmp(buf, "Date:", strlen("Date:")) == 0 && sdata->sent == 0) sdata->sent = parse_date_header(buf);
|
|
|
|
|
|
|
|
if(state->message_state == MSG_MESSAGE_ID && state->message_id[0] == 0){
|
|
|
|
p = strchr(buf+11, ' ');
|
|
|
|
if(p) p = buf + 12;
|
|
|
|
else p = buf + 11;
|
|
|
|
|
|
|
|
snprintf(state->message_id, SMALLBUFSIZE-1, "%s", p);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we are interested in only From:, To:, Subject:, Received:, Content-*: header lines */
|
|
|
|
if(state->message_state <= 0) return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if((p = strcasestr(buf, "boundary"))){
|
|
|
|
x = extract_boundary(p, state);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
/* Content-type: checking */
|
|
|
|
|
|
|
|
if(state->message_state == MSG_CONTENT_TYPE){
|
|
|
|
state->message_rfc822 = 0;
|
|
|
|
|
|
|
|
/* extract Content type */
|
|
|
|
|
|
|
|
p = strchr(buf, ':');
|
|
|
|
if(p){
|
|
|
|
p++;
|
|
|
|
if(*p == ' ' || *p == '\t') p++;
|
2011-11-22 12:31:54 +01:00
|
|
|
snprintf(state->type, TINYBUFSIZE-1, "%s", p);
|
2011-11-14 15:57:52 +01:00
|
|
|
state->content_type_is_set = 1;
|
2011-11-22 12:31:54 +01:00
|
|
|
p = strchr(state->type, ';');
|
2011-11-14 15:57:52 +01:00
|
|
|
if(p) *p = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(strcasestr(buf, "text/plain") ||
|
|
|
|
strcasestr(buf, "multipart/mixed") ||
|
|
|
|
strcasestr(buf, "multipart/alternative") ||
|
|
|
|
strcasestr(buf, "multipart/report") ||
|
|
|
|
strcasestr(buf, "message/delivery-status") ||
|
|
|
|
strcasestr(buf, "text/rfc822-headers") ||
|
|
|
|
strcasestr(buf, "message/rfc822") ||
|
|
|
|
strcasestr(buf, "application/ms-tnef")
|
|
|
|
){
|
2011-11-16 14:47:47 +01:00
|
|
|
state->textplain = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
else if(strcasestr(buf, "text/html")){
|
2011-11-16 14:47:47 +01:00
|
|
|
state->texthtml = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
/* switch (back) to header mode if we encounterd an attachment with "message/rfc822" content-type */
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(strcasestr(buf, "message/rfc822")){
|
|
|
|
state->message_rfc822 = 1;
|
|
|
|
state->is_header = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(strcasestr(buf, "charset") && strcasestr(buf, "UTF-8")) state->utf8 = 1;
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
if((state->message_state == MSG_CONTENT_TYPE || state->message_state == MSG_CONTENT_DISPOSITION) && strlen(state->filename) < 5){
|
|
|
|
extractNameFromHeaderLine(buf, "name", state->filename);
|
2011-11-16 14:47:47 +01:00
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
|
|
|
if(state->message_state == MSG_CONTENT_TRANSFER_ENCODING){
|
2011-11-22 12:31:54 +01:00
|
|
|
if(strcasestr(buf, "base64")) state->base64 = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
if(strcasestr(buf, "quoted-printable")) state->qp = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
/* boundary check, and reset variables */
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
boundary_line = is_item_on_string(state->boundaries, buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(!strstr(buf, "boundary=") && !strstr(buf, "boundary =") && boundary_line == 1){
|
|
|
|
state->content_type_is_set = 0;
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->has_to_dump == 1){
|
2011-11-14 15:57:52 +01:00
|
|
|
if(state->fd != -1) close(state->fd);
|
|
|
|
state->fd = -1;
|
2011-11-16 14:47:47 +01:00
|
|
|
}
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
state->has_to_dump = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
state->base64 = 0; state->textplain = 0; state->texthtml = state->octetstream = 0;
|
|
|
|
state->skip_html = 0;
|
|
|
|
state->utf8 = 0;
|
|
|
|
state->qp = 0;
|
|
|
|
|
|
|
|
state->realbinary = 0;
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
state->pushed_pointer = 0;
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
memset(state->filename, 0, TINYBUFSIZE);
|
|
|
|
memset(state->type, 0, TINYBUFSIZE);
|
|
|
|
|
|
|
|
state->message_state = MSG_UNDEF;
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(boundary_line == 1){ return 0; }
|
|
|
|
|
|
|
|
|
|
|
|
/* end of boundary check */
|
|
|
|
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->is_header == 1){
|
|
|
|
/* skip irrelevant headers */
|
|
|
|
if(state->message_state != MSG_SUBJECT && state->message_state != MSG_FROM && state->message_state != MSG_TO && state->message_state != MSG_CC) return 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->message_state == MSG_SUBJECT) fixupEncodedHeaderLine(buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
/* don't process body if it's not a text or html part */
|
|
|
|
if(state->message_state == MSG_BODY && state->textplain == 0 && state->texthtml == 0) return 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->base64 == 1 && state->message_state == MSG_BODY){
|
|
|
|
b64_len = decodeBase64(buf);
|
|
|
|
fixupBase64EncodedLine(buf, state);
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
|
|
|
if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state);
|
|
|
|
|
|
|
|
if(state->message_state == MSG_BODY){
|
|
|
|
if(state->qp == 1) decodeQP(buf);
|
|
|
|
if(state->utf8 == 1) decodeUTF8(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
decodeURL(buf);
|
|
|
|
|
|
|
|
if(state->texthtml == 1) decodeHTML(buf);
|
|
|
|
|
|
|
|
|
|
|
|
translateLine((unsigned char*)buf, state);
|
|
|
|
|
|
|
|
reassembleToken(buf);
|
|
|
|
|
|
|
|
|
|
|
|
if(state->is_header == 1) p = strchr(buf, ' ');
|
|
|
|
else p = buf;
|
|
|
|
|
|
|
|
do {
|
2011-11-16 14:47:47 +01:00
|
|
|
memset(puf, 0, sizeof(puf));
|
|
|
|
p = split(p, ' ', puf, sizeof(puf)-1);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(puf[0] == '\0') continue;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
degenerateToken((unsigned char*)puf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(puf[0] == '\0') continue;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->message_state == MSG_SUBJECT){
|
|
|
|
r = &puf[0]; for(; *r; r++){ if(*r == '_') *r = ' '; }
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->qp == 1 && puf[strlen(puf)-1] == '='){
|
|
|
|
puf[strlen(puf)-1] = '\0';
|
|
|
|
}
|
|
|
|
else if(state->message_state != MSG_SUBJECT || (p && strchr(p, ' ')) ){
|
|
|
|
strncat(puf, " ", sizeof(puf)-1);
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->is_header == 0 && strncmp(puf, "URL*", 4) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
len = strlen(puf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
if(state->message_state == MSG_SUBJECT && state->is_1st_header == 1 && strlen(state->b_subject) < MAXBUFSIZE-len-1)
|
2011-11-16 14:47:47 +01:00
|
|
|
memcpy(&(state->b_subject[strlen(state->b_subject)]), puf, len);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
else if(state->message_state == MSG_FROM && strchr(puf, '@') && state->is_1st_header == 1 && state->b_from[0] == '\0' && strlen(state->b_from) < SMALLBUFSIZE-len-1)
|
2011-11-16 14:47:47 +01:00
|
|
|
memcpy(&(state->b_from[strlen(state->b_from)]), puf, len);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
else if((state->message_state == MSG_TO || state->message_state == MSG_CC) && state->is_1st_header == 1 && strchr(puf, '@') && strlen(state->b_to) < SMALLBUFSIZE-len-1)
|
2011-11-16 14:47:47 +01:00
|
|
|
memcpy(&(state->b_to[strlen(state->b_to)]), puf, len);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
else if(state->message_state == MSG_BODY && strlen(state->b_body) < BIGBUFSIZE-len-1)
|
|
|
|
memcpy(&(state->b_body[strlen(state->b_body)]), puf, len);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
} while(p);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|