2011-11-14 15:57:52 +01:00
/*
* parser . c , SJ
*/
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include <ctype.h>
# include <sys/socket.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <netinet/in.h>
# include <arpa/inet.h>
# include <fcntl.h>
# include <unistd.h>
# include <piler.h>
2011-11-16 14:47:47 +01:00
struct _state parse_message ( struct session_data * sdata , struct __config * cfg ) {
2011-11-14 15:57:52 +01:00
FILE * f ;
2011-12-30 15:52:59 +01:00
char * p , buf [ MAXBUFSIZE ] ;
2011-11-14 15:57:52 +01:00
struct _state state ;
2011-11-19 21:25:44 +01:00
int i , len ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
init_state ( & state ) ;
2011-11-14 15:57:52 +01:00
f = fopen ( sdata - > ttmpfile , " r " ) ;
if ( ! f ) {
syslog ( LOG_PRIORITY , " %s: cannot open " , sdata - > ttmpfile ) ;
return state ;
}
2011-11-19 21:25:44 +01:00
state . mfd = open ( sdata - > tmpframe , O_CREAT | O_RDWR , S_IRUSR | S_IWUSR ) ;
if ( state . mfd = = - 1 ) {
syslog ( LOG_PRIORITY , " %s: cannot open frame file: %s " , sdata - > ttmpfile , sdata - > tmpframe ) ;
return state ;
2011-11-14 15:57:52 +01:00
}
2011-11-16 14:47:47 +01:00
2011-11-19 21:25:44 +01:00
while ( fgets ( buf , sizeof ( buf ) - 1 , f ) ) {
parse_line ( buf , & state , sdata , cfg ) ;
}
close ( state . mfd ) ; state . mfd = 0 ;
2011-11-14 15:57:52 +01:00
fclose ( f ) ;
2011-11-19 21:25:44 +01:00
2011-11-16 14:47:47 +01:00
free_list ( state . boundaries ) ;
2011-11-28 14:21:14 +01:00
free_list ( state . rcpt ) ;
trimBuffer ( state . b_subject ) ;
fixupEncodedHeaderLine ( state . b_subject ) ;
2011-11-14 15:57:52 +01:00
2011-11-19 21:25:44 +01:00
2011-11-22 12:31:54 +01:00
for ( i = 1 ; i < = state . n_attachments ; i + + ) {
2011-11-19 21:25:44 +01:00
digest_file ( state . attachments [ i ] . internalname , & ( state . attachments [ i ] . digest [ 0 ] ) ) ;
2011-11-28 14:21:14 +01:00
fixupEncodedHeaderLine ( state . attachments [ i ] . filename ) ;
2011-11-22 12:31:54 +01:00
if ( cfg - > verbosity > = _LOG_DEBUG ) syslog ( LOG_PRIORITY , " %s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s " , sdata - > ttmpfile , i , state . attachments [ i ] . filename , state . attachments [ i ] . type , state . attachments [ i ] . size , state . attachments [ i ] . internalname , state . attachments [ i ] . digest ) ;
2011-12-30 15:52:59 +01:00
p = determine_attachment_type ( state . attachments [ i ] . filename , state . attachments [ i ] . type ) ;
len = strlen ( p ) ;
if ( strlen ( sdata - > attachments ) < SMALLBUFSIZE - len - 1 ) memcpy ( & ( sdata - > attachments [ strlen ( sdata - > attachments ) ] ) , p , len ) ;
2011-11-19 21:25:44 +01:00
}
2011-11-14 15:57:52 +01:00
if ( state . message_id [ 0 ] = = 0 ) snprintf ( state . message_id , SMALLBUFSIZE - 1 , " null " ) ;
2011-11-16 14:47:47 +01:00
len = strlen ( state . b_from ) ;
if ( state . b_from [ len - 1 ] = = ' ' ) state . b_from [ len - 1 ] = ' \0 ' ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
len = strlen ( state . b_to ) ;
if ( state . b_to [ len - 1 ] = = ' ' ) state . b_to [ len - 1 ] = ' \0 ' ;
2011-11-14 15:57:52 +01:00
syslog ( LOG_PRIORITY , " %s: from=%s, to=%s, subj=%s, message-id=%s " , sdata - > ttmpfile , state . b_from , state . b_to , state . b_subject , state . message_id ) ;
return state ;
}
2011-11-16 14:47:47 +01:00
int parse_line ( char * buf , struct _state * state , struct session_data * sdata , struct __config * cfg ) {
2011-11-28 14:21:14 +01:00
char * p , puf [ SMALLBUFSIZE ] ;
2011-11-16 14:47:47 +01:00
int x , len , b64_len , boundary_line = 0 ;
2011-11-14 15:57:52 +01:00
state - > line_num + + ;
2011-11-19 21:25:44 +01:00
len = strlen ( buf ) ;
2011-12-13 17:05:22 +01:00
if ( state - > is_1st_header = = 1 & & ( strncmp ( buf , " Received: by piler " , strlen ( " Received: by piler " ) ) = = 0 | | strncmp ( buf , " X-piler-id: " , strlen ( " X-piler-id: " ) ) = = 0 ) ) {
sdata - > restored_copy = 1 ;
}
2011-11-22 12:31:54 +01:00
//printf("buf: %s", buf);
if ( state - > message_rfc822 = = 0 & & ( buf [ 0 ] = = ' \r ' | | buf [ 0 ] = = ' \n ' ) ) {
state - > message_state = MSG_BODY ;
if ( state - > is_header = = 1 ) state - > is_header = 0 ;
state - > is_1st_header = 0 ;
2011-11-19 21:25:44 +01:00
}
2011-11-22 12:31:54 +01:00
if ( state - > message_state = = MSG_BODY & & state - > fd ! = - 1 & & is_item_on_string ( state - > boundaries , buf ) = = 0 ) {
2011-11-19 21:25:44 +01:00
//printf("dumping: %s", buf);
write ( state - > fd , buf , len ) ;
state - > attachments [ state - > n_attachments ] . size + = len ;
}
else {
state - > saved_size + = len ;
//printf("%s", buf);
write ( state - > mfd , buf , len ) ;
}
2011-11-22 12:31:54 +01:00
if ( state - > message_state = = MSG_BODY & & state - > has_to_dump = = 1 & & state - > pushed_pointer = = 0 ) {
//printf("####name: %s, type: %s, base64: %d\n", state->filename, state->type, state->base64);
state - > pushed_pointer = 1 ;
// this is a real attachment to dump
if ( state - > base64 = = 1 & & strlen ( state - > filename ) > 5 & & strlen ( state - > type ) > 3 & & state - > n_attachments < MAX_ATTACHMENTS - 1 ) {
state - > n_attachments + + ;
snprintf ( state - > attachments [ state - > n_attachments ] . filename , TINYBUFSIZE - 1 , " %s " , state - > filename ) ;
snprintf ( state - > attachments [ state - > n_attachments ] . type , TINYBUFSIZE - 1 , " %s " , state - > type ) ;
snprintf ( state - > attachments [ state - > n_attachments ] . internalname , TINYBUFSIZE - 1 , " %s.a%d " , sdata - > ttmpfile , state - > n_attachments ) ;
//printf("DUMP FILE: %s\n", state->attachments[state->n_attachments].internalname);
state - > fd = open ( state - > attachments [ state - > n_attachments ] . internalname , O_CREAT | O_RDWR , S_IRUSR | S_IWUSR ) ;
2011-11-23 12:24:21 +01:00
if ( state - > fd = = - 1 ) {
2011-11-22 12:31:54 +01:00
2011-11-23 12:24:21 +01:00
state - > attachments [ state - > n_attachments ] . size = 0 ;
memset ( state - > attachments [ state - > n_attachments ] . type , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . filename , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . internalname , 0 , TINYBUFSIZE ) ;
memset ( state - > attachments [ state - > n_attachments ] . digest , 0 , 2 * DIGEST_LENGTH + 1 ) ;
syslog ( LOG_PRIORITY , " %s: error opening %s " , sdata - > ttmpfile , state - > attachments [ state - > n_attachments ] . internalname ) ;
state - > n_attachments - - ;
state - > has_to_dump = 0 ;
}
else {
2011-12-06 11:29:36 +01:00
snprintf ( puf , sizeof ( puf ) - 1 , " ATTACHMENT_POINTER_%s.a%d_XXX_PILER " , sdata - > ttmpfile , state - > n_attachments ) ;
2011-11-23 12:24:21 +01:00
write ( state - > mfd , puf , strlen ( puf ) ) ;
//printf("%s", puf);
}
2011-11-22 12:31:54 +01:00
}
else {
state - > has_to_dump = 0 ;
}
}
2011-11-14 15:57:52 +01:00
if ( * buf = = ' . ' & & * ( buf + 1 ) = = ' . ' ) buf + + ;
/* undefined message state */
if ( state - > is_header = = 1 & & buf [ 0 ] ! = ' ' & & buf [ 0 ] ! = ' \t ' & & strchr ( buf , ' : ' ) ) state - > message_state = MSG_UNDEF ;
/* skip empty lines */
if ( state - > message_rfc822 = = 0 & & ( buf [ 0 ] = = ' \r ' | | buf [ 0 ] = = ' \n ' ) ) {
return 0 ;
}
trimBuffer ( buf ) ;
/* skip the first line, if it's a "From <email address> date" format */
if ( state - > line_num = = 1 & & strncmp ( buf , " From " , 5 ) = = 0 ) return 0 ;
if ( state - > is_header = = 0 & & buf [ 0 ] ! = ' ' & & buf [ 0 ] ! = ' \t ' ) state - > message_state = MSG_BODY ;
if ( ( state - > content_type_is_set = = 0 | | state - > is_header = = 1 ) & & strncasecmp ( buf , " Content-Type: " , strlen ( " Content-Type: " ) ) = = 0 ) state - > message_state = MSG_CONTENT_TYPE ;
else if ( strncasecmp ( buf , " Content-Transfer-Encoding: " , strlen ( " Content-Transfer-Encoding: " ) ) = = 0 ) state - > message_state = MSG_CONTENT_TRANSFER_ENCODING ;
else if ( strncasecmp ( buf , " Content-Disposition: " , strlen ( " Content-Disposition: " ) ) = = 0 ) state - > message_state = MSG_CONTENT_DISPOSITION ;
if ( state - > message_state = = MSG_CONTENT_TYPE | | state - > message_state = = MSG_CONTENT_TRANSFER_ENCODING ) state - > is_header = 1 ;
/* header checks */
if ( state - > is_header = = 1 ) {
2011-11-16 14:47:47 +01:00
if ( strncasecmp ( buf , " From: " , strlen ( " From: " ) ) = = 0 ) state - > message_state = MSG_FROM ;
2011-11-14 15:57:52 +01:00
else if ( strncasecmp ( buf , " To: " , 3 ) = = 0 ) state - > message_state = MSG_TO ;
else if ( strncasecmp ( buf , " Cc: " , 3 ) = = 0 ) state - > message_state = MSG_CC ;
else if ( strncasecmp ( buf , " Message-Id: " , 11 ) = = 0 ) state - > message_state = MSG_MESSAGE_ID ;
else if ( strncasecmp ( buf , " Subject: " , strlen ( " Subject: " ) ) = = 0 ) state - > message_state = MSG_SUBJECT ;
else if ( strncasecmp ( buf , " Date: " , strlen ( " Date: " ) ) = = 0 & & sdata - > sent = = 0 ) sdata - > sent = parse_date_header ( buf ) ;
if ( state - > message_state = = MSG_MESSAGE_ID & & state - > message_id [ 0 ] = = 0 ) {
p = strchr ( buf + 11 , ' ' ) ;
if ( p ) p = buf + 12 ;
else p = buf + 11 ;
snprintf ( state - > message_id , SMALLBUFSIZE - 1 , " %s " , p ) ;
}
/* we are interested in only From:, To:, Subject:, Received:, Content-*: header lines */
if ( state - > message_state < = 0 ) return 0 ;
}
if ( ( p = strcasestr ( buf , " boundary " ) ) ) {
x = extract_boundary ( p , state ) ;
}
2011-11-28 14:21:14 +01:00
if ( state - > is_1st_header = = 1 & & state - > message_state = = MSG_SUBJECT & & strlen ( state - > b_subject ) + strlen ( buf ) < MAXBUFSIZE - 1 ) {
if ( state - > b_subject [ 0 ] = = ' \0 ' ) {
strncat ( state - > b_subject , buf + strlen ( " Subject: " ) , MAXBUFSIZE - 1 ) ;
}
else {
p = strrchr ( state - > b_subject , ' ' ) ;
if ( p & & ( strcasestr ( p + 1 , " ?Q? " ) | | strcasestr ( p + 1 , " ?B? " ) ) ) {
strncat ( state - > b_subject , buf + 1 , MAXBUFSIZE - 1 ) ;
}
else strncat ( state - > b_subject , buf , MAXBUFSIZE - 1 ) ;
}
}
2011-11-16 14:47:47 +01:00
2011-11-14 15:57:52 +01:00
/* Content-type: checking */
if ( state - > message_state = = MSG_CONTENT_TYPE ) {
state - > message_rfc822 = 0 ;
/* extract Content type */
p = strchr ( buf , ' : ' ) ;
if ( p ) {
p + + ;
if ( * p = = ' ' | | * p = = ' \t ' ) p + + ;
2011-11-22 12:31:54 +01:00
snprintf ( state - > type , TINYBUFSIZE - 1 , " %s " , p ) ;
2011-11-14 15:57:52 +01:00
state - > content_type_is_set = 1 ;
2011-11-22 12:31:54 +01:00
p = strchr ( state - > type , ' ; ' ) ;
2011-11-14 15:57:52 +01:00
if ( p ) * p = ' \0 ' ;
}
if ( strcasestr ( buf , " text/plain " ) | |
strcasestr ( buf , " multipart/mixed " ) | |
strcasestr ( buf , " multipart/alternative " ) | |
strcasestr ( buf , " multipart/report " ) | |
strcasestr ( buf , " message/delivery-status " ) | |
strcasestr ( buf , " text/rfc822-headers " ) | |
strcasestr ( buf , " message/rfc822 " ) | |
strcasestr ( buf , " application/ms-tnef " )
) {
2011-11-16 14:47:47 +01:00
state - > textplain = 1 ;
2011-11-14 15:57:52 +01:00
}
else if ( strcasestr ( buf , " text/html " ) ) {
2011-11-16 14:47:47 +01:00
state - > texthtml = 1 ;
2011-11-14 15:57:52 +01:00
}
2011-11-16 14:47:47 +01:00
/* switch (back) to header mode if we encounterd an attachment with "message/rfc822" content-type */
2011-11-14 15:57:52 +01:00
if ( strcasestr ( buf , " message/rfc822 " ) ) {
state - > message_rfc822 = 1 ;
state - > is_header = 1 ;
}
if ( strcasestr ( buf , " charset " ) & & strcasestr ( buf , " UTF-8 " ) ) state - > utf8 = 1 ;
2011-11-19 21:25:44 +01:00
}
2011-11-14 15:57:52 +01:00
2011-11-22 12:31:54 +01:00
if ( ( state - > message_state = = MSG_CONTENT_TYPE | | state - > message_state = = MSG_CONTENT_DISPOSITION ) & & strlen ( state - > filename ) < 5 ) {
extractNameFromHeaderLine ( buf , " name " , state - > filename ) ;
2011-11-16 14:47:47 +01:00
}
2011-11-14 15:57:52 +01:00
if ( state - > message_state = = MSG_CONTENT_TRANSFER_ENCODING ) {
2011-11-22 12:31:54 +01:00
if ( strcasestr ( buf , " base64 " ) ) state - > base64 = 1 ;
2011-11-14 15:57:52 +01:00
if ( strcasestr ( buf , " quoted-printable " ) ) state - > qp = 1 ;
}
2011-11-16 14:47:47 +01:00
2011-11-22 12:31:54 +01:00
/* boundary check, and reset variables */
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
boundary_line = is_item_on_string ( state - > boundaries , buf ) ;
2011-11-14 15:57:52 +01:00
if ( ! strstr ( buf , " boundary= " ) & & ! strstr ( buf , " boundary = " ) & & boundary_line = = 1 ) {
state - > content_type_is_set = 0 ;
2011-11-16 14:47:47 +01:00
if ( state - > has_to_dump = = 1 ) {
2011-11-14 15:57:52 +01:00
if ( state - > fd ! = - 1 ) close ( state - > fd ) ;
state - > fd = - 1 ;
2011-11-16 14:47:47 +01:00
}
2011-11-14 15:57:52 +01:00
2011-11-22 12:31:54 +01:00
state - > has_to_dump = 1 ;
2011-11-14 15:57:52 +01:00
state - > base64 = 0 ; state - > textplain = 0 ; state - > texthtml = state - > octetstream = 0 ;
state - > skip_html = 0 ;
state - > utf8 = 0 ;
state - > qp = 0 ;
state - > realbinary = 0 ;
2011-11-19 21:25:44 +01:00
state - > pushed_pointer = 0 ;
2011-11-22 12:31:54 +01:00
memset ( state - > filename , 0 , TINYBUFSIZE ) ;
memset ( state - > type , 0 , TINYBUFSIZE ) ;
state - > message_state = MSG_UNDEF ;
2011-11-14 15:57:52 +01:00
return 0 ;
}
if ( boundary_line = = 1 ) { return 0 ; }
/* end of boundary check */
2011-11-28 14:21:14 +01:00
/* skip irrelevant headers */
if ( state - > is_header = = 1 & & state - > message_state ! = MSG_FROM & & state - > message_state ! = MSG_TO & & state - > message_state ! = MSG_CC ) return 0 ;
2011-11-14 15:57:52 +01:00
2011-11-19 21:25:44 +01:00
/* don't process body if it's not a text or html part */
if ( state - > message_state = = MSG_BODY & & state - > textplain = = 0 & & state - > texthtml = = 0 ) return 0 ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( state - > base64 = = 1 & & state - > message_state = = MSG_BODY ) {
b64_len = decodeBase64 ( buf ) ;
fixupBase64EncodedLine ( buf , state ) ;
}
2011-11-14 15:57:52 +01:00
if ( state - > texthtml = = 1 & & state - > message_state = = MSG_BODY ) markHTML ( buf , state ) ;
2011-12-07 15:24:52 +01:00
if ( state - > message_state = = MSG_BODY & & state - > qp = = 1 ) {
fixupSoftBreakInQuotedPritableLine ( buf , state ) ; // 2011.12.07
decodeQP ( buf ) ;
2011-11-14 15:57:52 +01:00
}
decodeURL ( buf ) ;
if ( state - > texthtml = = 1 ) decodeHTML ( buf ) ;
2011-12-07 15:24:52 +01:00
/* encode the body if it's not utf-8 encoded */
if ( state - > message_state = = MSG_BODY & & state - > utf8 ! = 1 ) utf8_encode ( ( unsigned char * ) buf ) ;
2011-11-14 15:57:52 +01:00
translateLine ( ( unsigned char * ) buf , state ) ;
reassembleToken ( buf ) ;
if ( state - > is_header = = 1 ) p = strchr ( buf , ' ' ) ;
else p = buf ;
2011-11-28 14:21:14 +01:00
//printf("a: *%s*\n", buf);
2011-11-14 15:57:52 +01:00
do {
2011-11-16 14:47:47 +01:00
memset ( puf , 0 , sizeof ( puf ) ) ;
p = split ( p , ' ' , puf , sizeof ( puf ) - 1 ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( puf [ 0 ] = = ' \0 ' ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
degenerateToken ( ( unsigned char * ) puf ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( puf [ 0 ] = = ' \0 ' ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-28 14:21:14 +01:00
strncat ( puf , " " , sizeof ( puf ) - 1 ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( strncasecmp ( puf , " http:// " , 7 ) = = 0 | | strncasecmp ( puf , " https:// " , 8 ) = = 0 ) fixURL ( puf ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
if ( state - > is_header = = 0 & & strncmp ( puf , " URL* " , 4 ) & & ( puf [ 0 ] = = ' ' | | strlen ( puf ) > MAX_WORD_LEN | | isHexNumber ( puf ) ) ) continue ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
len = strlen ( puf ) ;
2011-11-14 15:57:52 +01:00
2011-12-27 20:51:56 +01:00
if ( state - > message_state = = MSG_FROM & & strchr ( puf , ' @ ' ) & & strlen ( puf ) > 5 & & state - > is_1st_header = = 1 & & state - > b_from [ 0 ] = = ' \0 ' & & strlen ( state - > b_from ) < SMALLBUFSIZE - len - 1 )
2011-11-16 14:47:47 +01:00
memcpy ( & ( state - > b_from [ strlen ( state - > b_from ) ] ) , puf , len ) ;
2011-11-14 15:57:52 +01:00
2011-12-27 20:51:56 +01:00
else if ( ( state - > message_state = = MSG_TO | | state - > message_state = = MSG_CC ) & & state - > is_1st_header = = 1 & & strchr ( puf , ' @ ' ) & & strlen ( puf ) > 5 & & strlen ( state - > b_to ) < SMALLBUFSIZE - len - 1 ) {
2011-11-14 15:57:52 +01:00
2011-11-28 14:21:14 +01:00
if ( is_string_on_list ( state - > rcpt , puf ) = = 0 ) {
append_list ( & ( state - > rcpt ) , puf ) ;
memcpy ( & ( state - > b_to [ strlen ( state - > b_to ) ] ) , puf , len ) ;
}
}
2011-11-16 14:47:47 +01:00
else if ( state - > message_state = = MSG_BODY & & strlen ( state - > b_body ) < BIGBUFSIZE - len - 1 )
memcpy ( & ( state - > b_body [ strlen ( state - > b_body ) ] ) , puf , len ) ;
2011-11-14 15:57:52 +01:00
2011-11-16 14:47:47 +01:00
} while ( p ) ;
2011-11-14 15:57:52 +01:00
return 0 ;
}