2011-11-14 15:57:52 +01:00
/*
* parser . c , SJ
*/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <sys/socket.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <netinet/in.h>
# include <arpa/inet.h>
# include <fcntl.h>
# include <unistd.h>
# include <piler.h>
2013-01-06 22:16:21 +01:00
struct _state parse_message ( struct session_data * sdata , int take_into_pieces , struct __data * data , struct __config * cfg ) {
2011-11-14 15:57:52 +01:00
FILE * f ;
2012-06-06 14:29:03 +02:00
int i , len ;
char * p , buf [ MAXBUFSIZE ] , puf [ SMALLBUFSIZE ] ;
2012-08-21 21:57:39 +02:00
char writebuffer [ MAXBUFSIZE ] , abuffer [ MAXBUFSIZE ] ;
2011-11-14 15:57:52 +01:00
struct _state state ;
2011-11-16 14:47:47 +01:00
init_state ( & state ) ;
2011-11-14 15:57:52 +01:00
2012-01-03 00:19:43 +01:00
f = fopen ( sdata - > filename , " r " ) ;
2011-11-14 15:57:52 +01:00
if ( ! f ) {
syslog ( LOG_PRIORITY , " %s: cannot open " , sdata - > ttmpfile ) ;
return state ;
}
2011-11-19 21:25:44 +01:00
2012-06-06 14:29:03 +02:00
if ( sdata - > num_of_rcpt_to > 0 ) {
for ( i = 0 ; i < sdata - > num_of_rcpt_to ; i + + ) {
extractEmail ( sdata - > rcptto [ i ] , puf ) ;
if ( strlen ( puf ) > 5 ) {
p = strstr ( puf , cfg - > hostid ) ;
if ( p & & * ( p - 1 ) = = ' . ' ) {
* ( p - 1 ) = ' ' ;
* p = ' \0 ' ;
len = strlen ( puf ) ;
if ( does_it_seem_like_an_email_address ( puf ) = = 1 ) {
if ( cfg - > verbosity > = _LOG_DEBUG ) syslog ( LOG_PRIORITY , " %s: processing rcpt to address: *%s* " , sdata - > ttmpfile , puf ) ;
2012-09-04 14:49:56 +02:00
if ( state . tolen < MAXBUFSIZE - len - 1 ) {
2013-08-14 14:24:30 +02:00
if ( findnode ( state . rcpt , puf ) = = NULL ) {
addnode ( state . rcpt , puf ) ;
2012-09-04 14:49:56 +02:00
memcpy ( & ( state . b_to [ state . tolen ] ) , puf , len ) ;
state . tolen + = len ;
2012-06-06 14:29:03 +02:00
2012-09-04 14:49:56 +02:00
if ( state . tolen < MAXBUFSIZE - len - 1 ) {
2012-06-06 14:29:03 +02:00
split_email_address ( puf ) ;
2012-09-04 14:49:56 +02:00
memcpy ( & ( state . b_to [ state . tolen ] ) , puf , len ) ;
state . tolen + = len ;
2012-06-06 14:29:03 +02:00
}
}
}
}
}
}
}
}
2012-06-01 14:25:49 +02:00
if ( take_into_pieces = = 1 ) {
state . mfd = open ( sdata - > tmpframe , O_CREAT | O_RDWR , S_IRUSR | S_IWUSR ) ;
if ( state . mfd = = - 1 ) {
syslog ( LOG_PRIORITY , " %s: cannot open frame file: %s " , sdata - > ttmpfile , sdata - > tmpframe ) ;
fclose ( f ) ;
return state ;
}
2011-11-14 15:57:52 +01:00
}
2011-11-19 21:25:44 +01:00
while ( fgets ( buf , sizeof ( buf ) - 1 , f ) ) {
2013-01-06 22:16:21 +01:00
parse_line ( buf , & state , sdata , take_into_pieces , & writebuffer [ 0 ] , sizeof ( writebuffer ) , & abuffer [ 0 ] , sizeof ( abuffer ) , data , cfg ) ;
2012-08-21 21:57:39 +02:00
}
if ( take_into_pieces = = 1 & & state . writebufpos > 0 ) {
len = write ( state . mfd , writebuffer , state . writebufpos ) ;
memset ( writebuffer , 0 , sizeof ( writebuffer ) ) ;
state . writebufpos = 0 ;
2012-06-01 14:25:49 +02:00
}
if ( take_into_pieces = = 1 ) {
close ( state . mfd ) ; state . mfd = 0 ;
2011-11-19 21:25:44 +01:00
}
2011-11-14 15:57:52 +01:00
fclose ( f ) ;
2012-01-03 00:19:43 +01:00
return state ;
}
void post_parse ( struct session_data * sdata , struct _state * state , struct __config * cfg ) {
2012-09-09 23:16:09 +02:00
int i , len , rec = 0 ;
2012-01-03 00:19:43 +01:00
char * p ;
2013-08-14 14:24:30 +02:00
clearhash ( state - > boundaries ) ;
clearhash ( state - > rcpt ) ;
clearhash ( state - > rcpt_domain ) ;
clearhash ( state - > journal_recipient ) ;
2011-11-19 21:25:44 +01:00
2012-01-03 00:19:43 +01:00
trimBuffer ( state - > b_subject ) ;
2014-02-11 15:34:12 +01:00
fixupEncodedHeaderLine ( state - > b_subject , MAXBUFSIZE ) ;
2011-11-28 14:21:14 +01:00
2011-11-14 15:57:52 +01:00
2012-01-03 00:19:43 +01:00
if ( sdata - > internal_sender = = 0 ) sdata - > direction = DIRECTION_INCOMING ;
else {
if ( sdata - > internal_recipient = = 1 ) sdata - > direction = DIRECTION_INTERNAL ;
if ( sdata - > external_recipient = = 1 ) sdata - > direction = DIRECTION_OUTGOING ;
if ( sdata - > internal_recipient = = 1 & & sdata - > external_recipient = = 1 ) sdata - > direction = DIRECTION_INTERNAL_AND_OUTGOING ;
}
2011-11-19 21:25:44 +01:00
2011-11-28 14:21:14 +01:00
2012-01-03 00:19:43 +01:00
for ( i = 1 ; i < = state - > n_attachments ; i + + ) {
digest_file ( state - > attachments [ i ] . internalname , & ( state - > attachments [ i ] . digest [ 0 ] ) ) ;
2011-12-30 15:52:59 +01:00
2012-01-03 00:19:43 +01:00
if ( cfg - > verbosity > = _LOG_DEBUG ) syslog ( LOG_PRIORITY , " %s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s " , sdata - > ttmpfile , i , state - > attachments [ i ] . filename , state - > attachments [ i ] . type , state - > attachments [ i ] . size , state - > attachments [ i ] . internalname , state - > attachments [ i ] . digest ) ;
p = determine_attachment_type ( state - > attachments [ i ] . filename , state - > attachments [ i ] . type ) ;
2011-12-30 15:52:59 +01:00
len = strlen ( p ) ;
2012-01-03 00:19:43 +01:00
if ( strlen ( sdata - > attachments ) < SMALLBUFSIZE - len - 1 & & ! strstr ( sdata - > attachments , p ) ) memcpy ( & ( sdata - > attachments [ strlen ( sdata - > attachments ) ] ) , p , len ) ;
2012-09-07 15:08:50 +02:00
if ( state - > attachments [ i ] . dumped = = 1 ) {
2012-09-09 23:16:09 +02:00
rec = 0 ;
if ( state - > bodylen < BIGBUFSIZE - 1024 ) extract_attachment_content ( sdata , state , state - > attachments [ i ] . aname , get_attachment_extractor_by_filename ( state - > attachments [ i ] . filename ) , & rec ) ;
2012-09-07 15:08:50 +02:00
unlink ( state - > attachments [ i ] . aname ) ;
}
2011-11-19 21:25:44 +01:00
}
2012-07-06 13:02:40 +02:00
if ( state - > message_id [ 0 ] = = 0 ) {
if ( cfg - > archive_emails_not_having_message_id = = 1 )
2012-09-26 15:26:30 +02:00
snprintf ( state - > message_id , SMALLBUFSIZE - 1 , " %s " , sdata - > ttmpfile ) ;
2012-07-06 13:02:40 +02:00
else snprintf ( state - > message_id , SMALLBUFSIZE - 1 , " null " ) ;
}
2011-11-14 15:57:52 +01:00
2014-04-25 21:17:01 +02:00
digest_string ( state - > message_id , & ( state - > message_id_hash [ 0 ] ) ) ;
2011-11-14 15:57:52 +01:00
}
2012-11-02 22:17:21 +01:00
void storno_attachment ( struct _state * state ) {
state - > has_to_dump = 0 ;
if ( state - > n_attachments < = 0 ) return ;
state - > attachments [ state - > n_attachments ] . size = 0 ;
state - > attachments [ state - > n_attachments ] . dumped = 0 ;
memset ( state - > attachments [ state - > n_attachments ] . type , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . shorttype , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . aname , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . filename , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . internalname , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . digest , 0 , 2 * DIGEST_LENGTH + 1 ) ;
state - > n_attachments - - ;
}
2013-01-06 22:16:21 +01:00
int parse_line ( char * buf , struct _state * state , struct session_data * sdata , int take_into_pieces , char * writebuffer , int writebuffersize , char * abuffer , int abuffersize , struct __data * data , struct __config * cfg ) {
2012-01-14 09:53:26 +01:00
char * p , * q , puf [ SMALLBUFSIZE ] ;
2012-09-07 15:08:50 +02:00
unsigned char b64buffer [ MAXBUFSIZE ] ;
2013-02-19 22:28:44 +01:00
int n64 , len , writelen , boundary_line = 0 ;
2011-11-14 15:57:52 +01:00
2012-07-23 16:51:36 +02:00
if ( cfg - > debug = = 1 ) printf ( " line: %s " , buf ) ;
2011-11-14 15:57:52 +01:00
state - > line_num + + ;
2011-11-19 21:25:44 +01:00
len = strlen ( buf ) ;
2011-12-13 17:05:22 +01:00
if ( state - > is_1st_header = = 1 & & ( strncmp ( buf , " Received: by piler " , strlen ( " Received: by piler " ) ) = = 0 | | strncmp ( buf , " X-piler-id: " , strlen ( " X-piler-id: " ) ) = = 0 ) ) {
sdata - > restored_copy = 1 ;
}
2012-01-26 14:35:51 +01:00
if ( state - > is_1st_header = = 1 & & * ( cfg - > spam_header_line ) ! = ' \0 ' & & strncmp ( buf , cfg - > spam_header_line , strlen ( cfg - > spam_header_line ) ) = = 0 ) {
sdata - > spam_message = 1 ;
}
2014-01-13 13:06:10 +01:00
if ( state - > is_1st_header = = 1 & & sdata - > ms_journal = = 0 & & ( strncmp ( buf , " X-MS-Journal-Report: " , strlen ( " X-MS-Journal-Report: " ) ) = = 0 | | ( sdata - > import = = 1 & & strncmp ( buf , " X-MS-Exchange-Organization-Auth " , strlen ( " X-MS-Exchange-Organization-Auth " ) ) = = 0 ) ) ) {
2012-09-28 21:48:20 +02:00
sdata - > ms_journal = 1 ;
2012-09-03 10:06:34 +02:00
memset ( state - > message_id , 0 , SMALLBUFSIZE ) ;
2012-09-03 11:08:12 +02:00
memset ( state - > b_from , 0 , SMALLBUFSIZE ) ;
memset ( state - > b_from_domain , 0 , SMALLBUFSIZE ) ;
2012-09-03 10:06:34 +02:00
}
2011-11-22 12:31:54 +01:00
if ( state - > message_rfc822 = = 0 & & ( buf [ 0 ] = = ' \r ' | | buf [ 0 ] = = ' \n ' ) ) {
state - > message_state = MSG_BODY ;
if ( state - > is_header = = 1 ) state - > is_header = 0 ;
state - > is_1st_header = 0 ;
2014-03-14 12:17:50 +01:00
if ( state - > anamepos > 0 ) {
extractNameFromHeaderLine ( state - > attachment_name_buf , " name " , state - > filename ) ;
}
2011-11-19 21:25:44 +01:00
}
2012-09-28 21:48:20 +02:00
if ( sdata - > ms_journal = = 1 & & strncasecmp ( buf , " Received: " , strlen ( " Received: " ) ) = = 0 ) {
2013-11-22 21:44:04 +01:00
if ( state - > is_1st_header = = 0 ) memset ( state - > b_subject , 0 , MAXBUFSIZE ) ;
2012-09-03 11:08:12 +02:00
state - > is_1st_header = 1 ;
state - > is_header = 1 ;
memset ( state - > b_body , 0 , BIGBUFSIZE ) ;
state - > bodylen = 0 ;
2014-02-11 22:11:58 +01:00
if ( sdata - > import = = 1 ) {
sdata - > sent = 0 ;
}
2012-09-03 11:08:12 +02:00
}
2011-11-22 12:31:54 +01:00
2012-06-01 14:25:49 +02:00
if ( take_into_pieces = = 1 ) {
2013-09-11 09:19:29 +02:00
if ( state - > message_state = = MSG_BODY & & state - > fd ! = - 1 & & is_substr_in_hash ( state - > boundaries , buf ) = = 0 ) {
2012-08-21 21:57:39 +02:00
//n = write(state->fd, buf, len); // WRITE
if ( len + state - > abufpos > abuffersize - 1 ) {
2012-11-03 23:42:36 +01:00
write ( state - > fd , abuffer , state - > abufpos ) ;
2012-09-07 15:08:50 +02:00
if ( state - > b64fd ! = - 1 ) {
abuffer [ state - > abufpos ] = ' \0 ' ;
2013-03-23 17:58:58 +01:00
if ( state - > base64 = = 1 ) {
n64 = base64_decode_attachment_buffer ( abuffer , state - > abufpos , & b64buffer [ 0 ] , sizeof ( b64buffer ) ) ;
n64 = write ( state - > b64fd , b64buffer , n64 ) ;
}
else {
n64 = write ( state - > b64fd , abuffer , state - > abufpos ) ;
}
2012-09-07 15:08:50 +02:00
}
state - > abufpos = 0 ; memset ( abuffer , 0 , abuffersize ) ;
2012-08-21 21:57:39 +02:00
}
memcpy ( abuffer + state - > abufpos , buf , len ) ; state - > abufpos + = len ;
2012-06-01 14:25:49 +02:00
state - > attachments [ state - > n_attachments ] . size + = len ;
}
else {
state - > saved_size + = len ;
2012-08-21 21:57:39 +02:00
//n = write(state->mfd, buf, len); // WRITE
if ( len + state - > writebufpos > writebuffersize - 1 ) {
2012-11-03 23:42:36 +01:00
write ( state - > mfd , writebuffer , state - > writebufpos ) ; state - > writebufpos = 0 ; memset ( writebuffer , 0 , writebuffersize ) ;
2012-08-21 21:57:39 +02:00
}
memcpy ( writebuffer + state - > writebufpos , buf , len ) ; state - > writebufpos + = len ;
2012-06-01 14:25:49 +02:00
}
2011-11-19 21:25:44 +01:00
}
2011-11-22 12:31:54 +01:00
if ( state - > message_state = = MSG_BODY & & state - > has_to_dump = = 1 & & state - > pushed_pointer = = 0 ) {
//printf("####name: %s, type: %s, base64: %d\n", state->filename, state->type, state->base64);
state - > pushed_pointer = 1 ;
2013-03-23 17:58:58 +01:00
// this is a real attachment to dump, it doesn't have to be base64 encoded!
if ( strlen ( state - > filename ) > 4 & & strlen ( state - > type ) > 3 & & state - > n_attachments < MAX_ATTACHMENTS - 1 ) {
2011-11-22 12:31:54 +01:00
state - > n_attachments + + ;
snprintf ( state - > attachments [ state - > n_attachments ] . filename , TINYBUFSIZE - 1 , " %s " , state - > filename ) ;
snprintf ( state - > attachments [ state - > n_attachments ] . type , TINYBUFSIZE - 1 , " %s " , state - > type ) ;
snprintf ( state - > attachments [ state - > n_attachments ] . internalname , TINYBUFSIZE - 1 , " %s.a%d " , sdata - > ttmpfile , state - > n_attachments ) ;
2012-09-07 15:08:50 +02:00
snprintf ( state - > attachments [ state - > n_attachments ] . aname , TINYBUFSIZE - 1 , " %s.a%d.bin " , sdata - > ttmpfile , state - > n_attachments ) ;
2011-11-22 12:31:54 +01:00
//printf("DUMP FILE: %s\n", state->attachments[state->n_attachments].internalname);
2012-06-01 14:25:49 +02:00
if ( take_into_pieces = = 1 ) {
state - > fd = open ( state - > attachments [ state - > n_attachments ] . internalname , O_CREAT | O_RDWR , S_IRUSR | S_IWUSR ) ;
2012-09-07 15:08:50 +02:00
2014-02-11 15:34:12 +01:00
fixupEncodedHeaderLine ( state - > attachments [ state - > n_attachments ] . filename , TINYBUFSIZE ) ;
2012-12-09 23:14:39 +01:00
2012-09-09 23:16:09 +02:00
p = get_attachment_extractor_by_filename ( state - > attachments [ state - > n_attachments ] . filename ) ;
snprintf ( state - > attachments [ state - > n_attachments ] . shorttype , TINYBUFSIZE - 1 , " %s " , p ) ;
if ( strcmp ( " other " , p ) ) {
2013-05-15 10:59:02 +02:00
state - > b64fd = open ( state - > attachments [ state - > n_attachments ] . aname , O_CREAT | O_RDWR , S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
2012-09-07 15:08:50 +02:00
state - > attachments [ state - > n_attachments ] . dumped = 1 ;
}
2012-09-09 23:16:09 +02:00
2012-06-01 14:25:49 +02:00
if ( state - > fd = = - 1 ) {
2012-11-02 22:17:21 +01:00
storno_attachment ( state ) ;
2012-06-01 14:25:49 +02:00
syslog ( LOG_PRIORITY , " %s: error opening %s " , sdata - > ttmpfile , state - > attachments [ state - > n_attachments ] . internalname ) ;
}
else {
snprintf ( puf , sizeof ( puf ) - 1 , " ATTACHMENT_POINTER_%s.a%d_XXX_PILER " , sdata - > ttmpfile , state - > n_attachments ) ;
2012-08-21 21:57:39 +02:00
//n = write(state->mfd, puf, strlen(puf)); // WRITE
writelen = strlen ( puf ) ;
if ( writelen + state - > writebufpos > writebuffersize - 1 ) {
2012-11-03 23:42:36 +01:00
write ( state - > mfd , writebuffer , state - > writebufpos ) ; state - > writebufpos = 0 ; memset ( writebuffer , 0 , writebuffersize ) ;
2012-08-21 21:57:39 +02:00
}
memcpy ( writebuffer + state - > writebufpos , puf , writelen ) ; state - > writebufpos + = writelen ;
2012-06-01 14:25:49 +02:00
}
2011-11-23 12:24:21 +01:00
}
2012-06-01 14:25:49 +02:00
2011-11-22 12:31:54 +01:00
}
else {
state - > has_to_dump = 0 ;
}
}
2011-11-14 15:57:52 +01:00
if ( * buf = = ' . ' & & * ( buf + 1 ) = = ' . ' ) buf + + ;
/* undefined message state */
if ( state - > is_header = = 1 & & buf [ 0 ] ! = ' ' & & buf [ 0 ] ! = ' \t ' & & strchr ( buf , ' : ' ) ) state - > message_state = MSG_UNDEF ;
/* skip empty lines */
if ( state - > message_rfc822 = = 0 & & ( buf [ 0 ] = = ' \r ' | | buf [ 0 ] = = ' \n ' ) ) {
return 0 ;
}
2013-02-19 22:28:44 +01:00
trimBuffer ( buf ) ;
2011-11-14 15:57:52 +01:00
/* skip the first line, if it's a "From <email address> date" format */
if ( state - > line_num = = 1 & & strncmp ( buf , " From " , 5 ) = = 0 ) return 0 ;
if ( state - > is_header = = 0 & & buf [ 0 ] ! = ' ' & & buf [ 0 ] ! = ' \t ' ) state - > message_state = MSG_BODY ;
/* header checks */
if ( state - > is_header = = 1 ) {
2011-11-16 14:47:47 +01:00
if ( strncasecmp ( buf , " From: " , strlen ( " From: " ) ) = = 0 ) state - > message_state = MSG_FROM ;
2013-03-24 01:20:12 +01:00
2014-03-14 12:17:50 +01:00
else if ( strncasecmp ( buf , " Content-Type: " , strlen ( " Content-Type: " ) ) = = 0 ) {
state - > message_state = MSG_CONTENT_TYPE ;
if ( state - > anamepos > 0 ) {
extractNameFromHeaderLine ( state - > attachment_name_buf , " name " , state - > filename ) ;
memset ( state - > attachment_name_buf , 0 , SMALLBUFSIZE ) ;
state - > anamepos = 0 ;
}
}
2013-03-24 01:20:12 +01:00
else if ( strncasecmp ( buf , " Content-Transfer-Encoding: " , strlen ( " Content-Transfer-Encoding: " ) ) = = 0 ) state - > message_state = MSG_CONTENT_TRANSFER_ENCODING ;
2014-03-14 12:17:50 +01:00
else if ( strncasecmp ( buf , " Content-Disposition: " , strlen ( " Content-Disposition: " ) ) = = 0 ) {
state - > message_state = MSG_CONTENT_DISPOSITION ;
if ( state - > anamepos > 0 ) {
extractNameFromHeaderLine ( state - > attachment_name_buf , " name " , state - > filename ) ;
memset ( state - > attachment_name_buf , 0 , SMALLBUFSIZE ) ;
state - > anamepos = 0 ;
}
2013-03-24 01:20:12 +01:00
2014-03-14 12:17:50 +01:00
}
2011-11-14 15:57:52 +01:00
else if ( strncasecmp ( buf , " To: " , 3 ) = = 0 ) state - > message_state = MSG_TO ;
else if ( strncasecmp ( buf , " Cc: " , 3 ) = = 0 ) state - > message_state = MSG_CC ;
2012-10-08 21:06:48 +02:00
else if ( strncasecmp ( buf , " Bcc: " , 4 ) = = 0 ) state - > message_state = MSG_CC ;
2011-11-14 15:57:52 +01:00
else if ( strncasecmp ( buf , " Message-Id: " , 11 ) = = 0 ) state - > message_state = MSG_MESSAGE_ID ;
2012-02-08 23:14:28 +01:00
else if ( strncasecmp ( buf , " References: " , 11 ) = = 0 ) state - > message_state = MSG_REFERENCES ;
2011-11-14 15:57:52 +01:00
else if ( strncasecmp ( buf , " Subject: " , strlen ( " Subject: " ) ) = = 0 ) state - > message_state = MSG_SUBJECT ;
2012-09-03 10:06:34 +02:00
else if ( strncasecmp ( buf , " Recipient: " , strlen ( " Recipient: " ) ) = = 0 ) state - > message_state = MSG_RECIPIENT ;
2013-10-19 10:34:29 +02:00
//else if(strncasecmp(buf, "Date:", strlen("Date:")) == 0 && sdata->sent == 0) sdata->sent = parse_date_header(buf, cfg);
/*
* in pilerimport : sdata - > sent = 0
* in piler daemon : sdata - > sent = sdata - > now
*/
else if ( strncasecmp ( buf , " Date: " , strlen ( " Date: " ) ) = = 0 ) {
if ( sdata - > sent = = 0 ) sdata - > sent = parse_date_header ( buf , cfg ) ;
else {
sdata - > sent = parse_date_header ( buf , cfg ) ;
/* allow +/-1 week drift in the parsed Date: value */
if ( sdata - > now - sdata - > sent > 604800 | | sdata - > sent - sdata - > now > 604800 ) sdata - > sent = sdata - > now ;
}
}
2013-02-22 15:35:37 +01:00
else if ( strncasecmp ( buf , " Delivery-date: " , strlen ( " Delivery-date: " ) ) = = 0 & & sdata - > delivered = = 0 ) sdata - > delivered = parse_date_header ( buf , cfg ) ;
2012-09-28 10:34:04 +02:00
else if ( strncasecmp ( buf , " Received: " , strlen ( " Received: " ) ) = = 0 ) state - > message_state = MSG_RECEIVED ;
2012-11-27 13:16:30 +01:00
else if ( cfg - > extra_to_field [ 0 ] ! = ' \0 ' & & strncasecmp ( buf , cfg - > extra_to_field , strlen ( cfg - > extra_to_field ) ) = = 0 ) state - > message_state = MSG_TO ;
2011-11-14 15:57:52 +01:00
if ( state - > message_state = = MSG_MESSAGE_ID & & state - > message_id [ 0 ] = = 0 ) {
p = strchr ( buf + 11 , ' ' ) ;
if ( p ) p = buf + 12 ;
else p = buf + 11 ;
snprintf ( state - > message_id , SMALLBUFSIZE - 1 , " %s " , p ) ;
}
/* we are interested in only From:, To:, Subject:, Received:, Content-*: header lines */
if ( state - > message_state < = 0 ) return 0 ;
}
2013-03-24 01:20:12 +01:00
if ( state - > message_state = = MSG_CONTENT_TYPE ) {
if ( ( p = strcasestr ( buf , " boundary " ) ) ) {
extract_boundary ( p , state ) ;
}
2011-11-14 15:57:52 +01:00
}
2012-09-28 21:48:20 +02:00
if ( state - > message_state = = MSG_BODY & & sdata - > ms_journal = = 1 & & strncasecmp ( buf , " Recipient: " , strlen ( " Recipient: " ) ) = = 0 ) {
2012-09-03 11:08:12 +02:00
state - > is_header = 1 ;
state - > is_1st_header = 1 ;
state - > message_state = MSG_RECIPIENT ;
}
2012-10-08 21:06:48 +02:00
if ( state - > message_state = = MSG_BODY & & sdata - > ms_journal = = 1 & & strncasecmp ( buf , " Bcc: " , 4 ) = = 0 ) {
state - > is_header = 1 ;
state - > is_1st_header = 1 ;
state - > message_state = MSG_CC ;
}
2012-09-03 10:06:34 +02:00
if ( state - > message_state = = MSG_RECIPIENT ) {
p = strstr ( buf , " Expanded: " ) ;
if ( p ) * p = ' \0 ' ;
}
2012-09-28 10:34:04 +02:00
2012-02-08 23:14:28 +01:00
if ( state - > is_1st_header = = 1 & & state - > message_state = = MSG_REFERENCES ) {
if ( strncasecmp ( buf , " References: " , 11 ) = = 0 ) parse_reference ( state , buf + 11 ) ;
else parse_reference ( state , buf ) ;
}
2011-11-28 14:21:14 +01:00
if ( state - > is_1st_header = = 1 & & state - > message_state = = MSG_SUBJECT & & strlen ( state - > b_subject ) + strlen ( buf ) < MAXBUFSIZE - 1 ) {
if ( state - > b_subject [ 0 ] = = ' \0 ' ) {
2012-10-02 15:21:16 +02:00
p = & buf [ 0 ] ;
if ( strncmp ( buf , " Subject: " , strlen ( " Subject: " ) ) = = 0 ) p + = strlen ( " Subject: " ) ;
2012-12-17 11:43:10 +01:00
if ( * p = = ' ' ) p + + ;
2012-10-02 15:21:16 +02:00
strncat ( state - > b_subject , p , MAXBUFSIZE - 1 ) ;
2011-11-28 14:21:14 +01:00
}
else {
p = strrchr ( state - > b_subject , ' ' ) ;
if ( p & & ( strcasestr ( p + 1 , " ?Q? " ) | | strcasestr ( p + 1 , " ?B? " ) ) ) {
strncat ( state - > b_subject , buf + 1 , MAXBUFSIZE - 1 ) ;
}
else strncat ( state - > b_subject , buf , MAXBUFSIZE - 1 ) ;
}
}
2012-02-07 17:21:23 +01:00
if ( state - > is_1st_header = = 1 ) {
2014-02-11 15:34:12 +01:00
fixupEncodedHeaderLine ( buf , MAXBUFSIZE ) ;
2012-02-07 17:21:23 +01:00
}
2011-11-16 14:47:47 +01:00
2011-11-14 15:57:52 +01:00
/* Content-type: checking */
if ( state - > message_state = = MSG_CONTENT_TYPE ) {
state - > message_rfc822 = 0 ;
/* extract Content type */
p = strchr ( buf , ' : ' ) ;
if ( p ) {
p + + ;
if ( * p = = ' ' | | * p = = ' \t ' ) p + + ;
2011-11-22 12:31:54 +01:00
snprintf ( state - > type , TINYBUFSIZE - 1 , " %s " , p ) ;
2013-03-24 01:20:12 +01:00
//state->content_type_is_set = 1;
2011-11-22 12:31:54 +01:00
p = strchr ( state - > type , ' ; ' ) ;
2011-11-14 15:57:52 +01:00
if ( p ) * p = ' \0 ' ;
}
if ( strcasestr ( buf , " text/plain " ) | |
strcasestr ( buf , " multipart/mixed " ) | |
strcasestr ( buf , " multipart/alternative " ) | |
strcasestr ( buf , " multipart/report " ) | |
strcasestr ( buf , " message/delivery-status " ) | |
strcasestr ( buf , " text/rfc822-headers " ) | |
2013-09-11 09:19:29 +02:00
strcasestr ( buf , " message/rfc822 " )
2011-11-14 15:57:52 +01:00
) {
2011-11-16 14:47:47 +01:00
state - > textplain = 1 ;
2011-11-14 15:57:52 +01:00
}
else if ( strcasestr ( buf , " text/html " ) ) {
2011-11-16 14:47:47 +01:00
state - > texthtml = 1 ;
2011-11-14 15:57:52 +01:00
}
2011-11-16 14:47:47 +01:00
/* switch (back) to header mode if we encounterd an attachment with "message/rfc822" content-type */
2011-11-14 15:57:52 +01:00
if ( strcasestr ( buf , " message/rfc822 " ) ) {
state - > message_rfc822 = 1 ;
state - > is_header = 1 ;
}
if ( strcasestr ( buf , " charset " ) & & strcasestr ( buf , " UTF-8 " ) ) state - > utf8 = 1 ;
2011-11-19 21:25:44 +01:00
}
2011-11-14 15:57:52 +01:00
2011-11-22 12:31:54 +01:00
if ( ( state - > message_state = = MSG_CONTENT_TYPE | | state - > message_state = = MSG_CONTENT_DISPOSITION ) & & strlen ( state - > filename ) < 5 ) {
2014-03-14 12:17:50 +01:00
p = & buf [ 0 ] ;
for ( ; * p ; p + + ) {
if ( * p ! = ' ' & & * p ! = ' \t ' ) break ;
}
len = strlen ( p ) ;
if ( len + state - > anamepos < SMALLBUFSIZE - 2 ) {
memcpy ( & ( state - > attachment_name_buf [ state - > anamepos ] ) , p , len ) ;
state - > anamepos + = len ;
}
2011-11-16 14:47:47 +01:00
}
2011-11-14 15:57:52 +01:00
if ( state - > message_state = = MSG_CONTENT_TRANSFER_ENCODING ) {
2011-11-22 12:31:54 +01:00
if ( strcasestr ( buf , " base64 " ) ) state - > base64 = 1 ;
2011-11-14 15:57:52 +01:00
if ( strcasestr ( buf , " quoted-printable " ) ) state - > qp = 1 ;
}
2011-11-16 14:47:47 +01:00
2011-11-22 12:31:54 +01:00
/* boundary check, and reset variables */
2011-11-14 15:57:52 +01:00
2013-08-22 00:33:39 +02:00
boundary_line = is_substr_in_hash ( state - > boundaries , buf ) ;
2013-08-14 14:24:30 +02:00
2011-11-14 15:57:52 +01:00
if ( ! strstr ( buf , " boundary= " ) & & ! strstr ( buf , " boundary = " ) & & boundary_line = = 1 ) {
2013-03-24 01:20:12 +01:00
state - > is_header = 1 ;
//state->content_type_is_set = 0;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( state - > has_to_dump = = 1 ) {
2012-08-21 21:57:39 +02:00
if ( take_into_pieces = = 1 & & state - > fd ! = - 1 ) {
if ( state - > abufpos > 0 ) {
2012-11-03 23:42:36 +01:00
write ( state - > fd , abuffer , state - > abufpos ) ;
2012-09-07 15:08:50 +02:00
if ( state - > b64fd ! = - 1 ) {
abuffer [ state - > abufpos ] = ' \0 ' ;
2013-03-23 17:58:58 +01:00
if ( state - > base64 = = 1 ) {
n64 = base64_decode_attachment_buffer ( abuffer , state - > abufpos , & b64buffer [ 0 ] , sizeof ( b64buffer ) ) ;
n64 = write ( state - > b64fd , b64buffer , n64 ) ;
}
else {
n64 = write ( state - > b64fd , abuffer , state - > abufpos ) ;
}
2012-09-07 15:08:50 +02:00
}
state - > abufpos = 0 ; memset ( abuffer , 0 , abuffersize ) ;
2012-08-21 21:57:39 +02:00
}
close ( state - > fd ) ;
2012-09-07 15:08:50 +02:00
close ( state - > b64fd ) ;
2012-08-21 21:57:39 +02:00
}
2011-11-14 15:57:52 +01:00
state - > fd = - 1 ;
2012-09-07 15:08:50 +02:00
state - > b64fd = - 1 ;
2011-11-16 14:47:47 +01:00
}
2011-11-14 15:57:52 +01:00
2011-11-22 12:31:54 +01:00
state - > has_to_dump = 1 ;
2011-11-14 15:57:52 +01:00
state - > base64 = 0 ; state - > textplain = 0 ; state - > texthtml = state - > octetstream = 0 ;
state - > skip_html = 0 ;
state - > utf8 = 0 ;
state - > qp = 0 ;
state - > realbinary = 0 ;
2011-11-19 21:25:44 +01:00
state - > pushed_pointer = 0 ;
2011-11-22 12:31:54 +01:00
memset ( state - > filename , 0 , TINYBUFSIZE ) ;
memset ( state - > type , 0 , TINYBUFSIZE ) ;
2014-03-14 12:17:50 +01:00
memset ( state - > attachment_name_buf , 0 , SMALLBUFSIZE ) ;
state - > anamepos = 0 ;
2011-11-22 12:31:54 +01:00
state - > message_state = MSG_UNDEF ;
2011-11-14 15:57:52 +01:00
return 0 ;
}
if ( boundary_line = = 1 ) { return 0 ; }
/* end of boundary check */
2011-11-28 14:21:14 +01:00
/* skip irrelevant headers */
2012-09-03 10:06:34 +02:00
if ( state - > is_header = = 1 & & state - > message_state ! = MSG_FROM & & state - > message_state ! = MSG_TO & & state - > message_state ! = MSG_CC & & state - > message_state ! = MSG_RECIPIENT ) return 0 ;
2011-11-14 15:57:52 +01:00
2011-11-19 21:25:44 +01:00
/* don't process body if it's not a text or html part */
if ( state - > message_state = = MSG_BODY & & state - > textplain = = 0 & & state - > texthtml = = 0 ) return 0 ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( state - > base64 = = 1 & & state - > message_state = = MSG_BODY ) {
2012-11-03 23:42:36 +01:00
decodeBase64 ( buf ) ;
2011-11-16 14:47:47 +01:00
fixupBase64EncodedLine ( buf , state ) ;
}
2011-11-14 15:57:52 +01:00
2012-07-23 16:20:01 +02:00
/* remove all HTML tags */
2011-11-14 15:57:52 +01:00
if ( state - > texthtml = = 1 & & state - > message_state = = MSG_BODY ) markHTML ( buf , state ) ;
2011-12-07 15:24:52 +01:00
if ( state - > message_state = = MSG_BODY & & state - > qp = = 1 ) {
fixupSoftBreakInQuotedPritableLine ( buf , state ) ; // 2011.12.07
decodeQP ( buf ) ;
2011-11-14 15:57:52 +01:00
}
2012-07-23 16:20:01 +02:00
/* I believe that we can live without this function call */
//decodeURL(buf);
2011-11-14 15:57:52 +01:00
if ( state - > texthtml = = 1 ) decodeHTML ( buf ) ;
2011-12-07 15:24:52 +01:00
/* encode the body if it's not utf-8 encoded */
if ( state - > message_state = = MSG_BODY & & state - > utf8 ! = 1 ) utf8_encode ( ( unsigned char * ) buf ) ;
2011-11-14 15:57:52 +01:00
translateLine ( ( unsigned char * ) buf , state ) ;
reassembleToken ( buf ) ;
if ( state - > is_header = = 1 ) p = strchr ( buf , ' ' ) ;
else p = buf ;
2012-09-03 11:08:12 +02:00
//printf("a: %d/%d/%d/%d %s\n", state->is_1st_header, state->is_header, state->message_rfc822, state->message_state, buf);
2011-11-28 14:21:14 +01:00
2011-11-14 15:57:52 +01:00
do {
2011-11-16 14:47:47 +01:00
memset ( puf , 0 , sizeof ( puf ) ) ;
p = split ( p , ' ' , puf , sizeof ( puf ) - 1 ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( puf [ 0 ] = = ' \0 ' ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
degenerateToken ( ( unsigned char * ) puf ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( puf [ 0 ] = = ' \0 ' ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-28 14:21:14 +01:00
strncat ( puf , " " , sizeof ( puf ) - 1 ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( strncasecmp ( puf , " http:// " , 7 ) = = 0 | | strncasecmp ( puf , " https:// " , 8 ) = = 0 ) fixURL ( puf ) ;
2011-11-14 15:57:52 +01:00
2013-11-26 11:43:21 +01:00
if ( state - > is_header = = 0 & & strncmp ( puf , " __URL__ " , 7 ) & & ( puf [ 0 ] = = ' ' | | ( strlen ( puf ) > MAX_WORD_LEN & & cfg - > enable_cjk = = 0 ) | | isHexNumber ( puf ) ) ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
len = strlen ( puf ) ;
2011-11-14 15:57:52 +01:00
2012-02-07 17:21:23 +01:00
if ( state - > message_state = = MSG_FROM & & state - > is_1st_header = = 1 & & strlen ( state - > b_from ) < SMALLBUFSIZE - len - 1 ) {
2011-11-16 14:47:47 +01:00
memcpy ( & ( state - > b_from [ strlen ( state - > b_from ) ] ) , puf , len ) ;
2011-11-14 15:57:52 +01:00
2012-11-28 23:15:14 +01:00
if ( does_it_seem_like_an_email_address ( puf ) = = 1 & & state - > b_from_domain [ 0 ] = = ' \0 ' & & len > 5 ) {
2012-02-07 17:21:23 +01:00
q = strchr ( puf , ' @ ' ) ;
2012-11-28 23:15:14 +01:00
if ( q & & strlen ( q ) > 5 ) {
memcpy ( & ( state - > b_from_domain ) , q + 1 , strlen ( q + 1 ) - 1 ) ;
2012-10-03 23:32:44 +02:00
if ( strstr ( sdata - > mailfrom , " <> " ) ) {
snprintf ( sdata - > fromemail , SMALLBUFSIZE - 1 , " %s " , puf ) ;
sdata - > fromemail [ len - 1 ] = ' \0 ' ;
}
2012-10-02 15:21:16 +02:00
}
2012-02-07 17:21:23 +01:00
2013-01-06 22:16:21 +01:00
if ( is_email_address_on_my_domains ( puf , data ) = = 1 ) sdata - > internal_sender = 1 ;
2012-01-14 09:53:26 +01:00
2012-02-07 17:21:23 +01:00
if ( strlen ( state - > b_from ) < SMALLBUFSIZE - len - 1 ) {
split_email_address ( puf ) ;
memcpy ( & ( state - > b_from [ strlen ( state - > b_from ) ] ) , puf , len ) ;
}
}
2012-01-03 00:19:43 +01:00
}
2012-09-03 10:06:34 +02:00
else if ( ( state - > message_state = = MSG_TO | | state - > message_state = = MSG_CC | | state - > message_state = = MSG_RECIPIENT ) & & state - > is_1st_header = = 1 & & state - > tolen < MAXBUFSIZE - len - 1 ) {
2012-10-05 14:27:03 +02:00
strtolower ( puf ) ;
2013-08-14 14:24:30 +02:00
if ( state - > message_state = = MSG_RECIPIENT & & findnode ( state - > journal_recipient , puf ) = = NULL ) {
addnode ( state - > journal_recipient , puf ) ;
2012-10-05 14:27:03 +02:00
memcpy ( & ( state - > b_journal_to [ state - > journaltolen ] ) , puf , len ) ;
if ( cfg - > verbosity > = _LOG_DEBUG ) syslog ( LOG_PRIORITY , " %s: journal rcpt: '%s' " , sdata - > ttmpfile , puf ) ;
}
2011-11-14 15:57:52 +01:00
2013-08-14 14:24:30 +02:00
if ( findnode ( state - > rcpt , puf ) = = NULL ) {
2013-10-29 16:05:35 +01:00
/* skip any address matching ...@cfg->hostid, 2013.10.29, SJ */
q = strchr ( puf , ' @ ' ) ;
if ( q & & strncmp ( q + 1 , cfg - > hostid , cfg - > hostid_len ) = = 0 ) {
continue ;
}
2013-08-14 14:24:30 +02:00
addnode ( state - > rcpt , puf ) ;
2012-08-21 21:57:39 +02:00
memcpy ( & ( state - > b_to [ state - > tolen ] ) , puf , len ) ;
state - > tolen + = len ;
2012-01-03 00:19:43 +01:00
2012-02-07 17:21:23 +01:00
if ( does_it_seem_like_an_email_address ( puf ) = = 1 ) {
2013-01-06 22:16:21 +01:00
if ( is_email_address_on_my_domains ( puf , data ) = = 1 ) sdata - > internal_recipient = 1 ;
2012-02-07 17:21:23 +01:00
else sdata - > external_recipient = 1 ;
2012-01-03 00:19:43 +01:00
2013-10-29 16:05:35 +01:00
//q = strchr(puf, '@');
2012-02-07 17:21:23 +01:00
if ( q ) {
2013-08-14 14:24:30 +02:00
if ( findnode ( state - > rcpt_domain , q + 1 ) = = NULL ) {
addnode ( state - > rcpt_domain , q + 1 ) ;
2012-02-07 17:21:23 +01:00
memcpy ( & ( state - > b_to_domain [ strlen ( state - > b_to_domain ) ] ) , q + 1 , strlen ( q + 1 ) ) ;
}
2012-01-14 09:53:26 +01:00
}
2012-08-21 21:57:39 +02:00
if ( state - > tolen < MAXBUFSIZE - len - 1 ) {
2012-02-07 17:21:23 +01:00
split_email_address ( puf ) ;
2012-08-21 21:57:39 +02:00
memcpy ( & ( state - > b_to [ state - > tolen ] ) , puf , len ) ;
state - > tolen + = len ;
2012-02-07 17:21:23 +01:00
}
}
2011-11-28 14:21:14 +01:00
}
2012-10-05 14:27:03 +02:00
2011-11-28 14:21:14 +01:00
}
2012-08-22 08:15:53 +02:00
else if ( state - > message_state = = MSG_BODY & & len > = cfg - > min_word_len & & state - > bodylen < BIGBUFSIZE - len - 1 ) {
2012-08-21 21:57:39 +02:00
memcpy ( & ( state - > b_body [ state - > bodylen ] ) , puf , len ) ;
state - > bodylen + = len ;
}
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
} while ( p ) ;
2011-11-14 15:57:52 +01:00
return 0 ;
}