2011-11-14 15:57:52 +01:00
|
|
|
/*
|
|
|
|
* parser.c, SJ
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <piler.h>
|
|
|
|
|
|
|
|
|
2017-08-08 15:34:45 +02:00
|
|
|
struct parser_state parse_message(struct session_data *sdata, int take_into_pieces, struct data *data, struct config *cfg){
|
2011-11-14 15:57:52 +01:00
|
|
|
FILE *f;
|
2018-02-20 20:35:31 +01:00
|
|
|
char buf[MAXBUFSIZE];
|
2012-08-21 21:57:39 +02:00
|
|
|
char writebuffer[MAXBUFSIZE], abuffer[MAXBUFSIZE];
|
2015-11-21 23:06:47 +01:00
|
|
|
struct parser_state state;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
init_state(&state);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2012-01-03 00:19:43 +01:00
|
|
|
f = fopen(sdata->filename, "r");
|
2011-11-14 15:57:52 +01:00
|
|
|
if(!f){
|
|
|
|
syslog(LOG_PRIORITY, "%s: cannot open", sdata->ttmpfile);
|
|
|
|
return state;
|
2014-06-04 22:20:10 +02:00
|
|
|
}
|
|
|
|
|
2012-06-01 14:25:49 +02:00
|
|
|
if(take_into_pieces == 1){
|
|
|
|
state.mfd = open(sdata->tmpframe, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
|
|
|
if(state.mfd == -1){
|
|
|
|
syslog(LOG_PRIORITY, "%s: cannot open frame file: %s", sdata->ttmpfile, sdata->tmpframe);
|
|
|
|
fclose(f);
|
|
|
|
return state;
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
while(fgets(buf, sizeof(buf)-1, f)){
|
2013-01-06 22:16:21 +01:00
|
|
|
parse_line(buf, &state, sdata, take_into_pieces, &writebuffer[0], sizeof(writebuffer), &abuffer[0], sizeof(abuffer), data, cfg);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if(take_into_pieces == 1 && state.writebufpos > 0){
|
2017-08-08 15:34:45 +02:00
|
|
|
if(write(state.mfd, writebuffer, state.writebufpos) == -1) syslog(LOG_PRIORITY, "ERROR: %s: write(), %s, %d, %s", sdata->ttmpfile, __func__, __LINE__, __FILE__);
|
2021-04-18 20:19:26 +02:00
|
|
|
memset(writebuffer, 0, sizeof(writebuffer)); //-V597
|
2012-08-21 21:57:39 +02:00
|
|
|
state.writebufpos = 0;
|
2012-06-01 14:25:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if(take_into_pieces == 1){
|
|
|
|
close(state.mfd); state.mfd = 0;
|
2020-03-14 07:43:19 +01:00
|
|
|
|
|
|
|
if(state.has_to_dump_whole_body == 1){
|
|
|
|
if(state.abufpos > 0){
|
|
|
|
flush_attachment_buffer(&state, &abuffer[0], sizeof(abuffer));
|
|
|
|
}
|
|
|
|
if(state.fd != -1) close(state.fd);
|
|
|
|
if(state.b64fd != -1) close(state.b64fd);
|
|
|
|
}
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
fclose(f);
|
|
|
|
|
2019-01-13 16:22:27 +01:00
|
|
|
if(data->import && data->import->extra_recipient){
|
2021-07-19 18:07:49 +02:00
|
|
|
char tmpbuf[SMALLBUFSIZE];
|
|
|
|
snprintf(tmpbuf, sizeof(tmpbuf)-1, "%s", data->import->extra_recipient);
|
|
|
|
add_recipient(tmpbuf, strlen(tmpbuf), sdata, &state, data, cfg);
|
2019-01-13 14:40:01 +01:00
|
|
|
}
|
|
|
|
|
2020-12-13 08:36:12 +01:00
|
|
|
// If both Sender: and From: headers exist, and they are different, then append
|
|
|
|
// the From: address to recipients list to give him access to this email as well
|
|
|
|
|
|
|
|
if(state.b_sender_domain[0] && strcmp(state.b_from, state.b_sender)){
|
|
|
|
char tmpbuf[SMALLBUFSIZE];
|
|
|
|
get_first_email_address_from_string(state.b_from, tmpbuf, sizeof(tmpbuf));
|
|
|
|
tmpbuf[strlen(tmpbuf)] = ' ';
|
|
|
|
add_recipient(tmpbuf, strlen(tmpbuf), sdata, &state, data, cfg);
|
|
|
|
}
|
|
|
|
|
2012-01-03 00:19:43 +01:00
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-12-13 08:36:12 +01:00
|
|
|
void post_parse(struct session_data *sdata, struct parser_state *state, struct config *cfg){
|
2020-08-10 20:58:34 +02:00
|
|
|
int i;
|
2012-01-03 00:19:43 +01:00
|
|
|
|
2013-08-14 14:24:30 +02:00
|
|
|
clearhash(state->boundaries);
|
|
|
|
clearhash(state->rcpt);
|
|
|
|
clearhash(state->rcpt_domain);
|
|
|
|
clearhash(state->journal_recipient);
|
2011-11-19 21:25:44 +01:00
|
|
|
|
2020-12-13 07:50:03 +01:00
|
|
|
// Fix From: and Sender: lines if they are too long
|
2018-03-03 11:20:55 +01:00
|
|
|
if(strlen(state->b_from) > 255) state->b_from[255] = '\0';
|
|
|
|
if(strlen(state->b_from_domain) > 255) state->b_from_domain[255] = '\0';
|
|
|
|
|
2020-12-12 21:43:16 +01:00
|
|
|
if(strlen(state->b_sender) > 255) state->b_sender[255] = '\0';
|
|
|
|
if(strlen(state->b_sender_domain) > 255) state->b_sender_domain[255] = '\0';
|
|
|
|
|
2018-03-03 11:20:55 +01:00
|
|
|
// Truncate the message_id if it's >255 characters
|
|
|
|
if(strlen(state->message_id) > 255) state->message_id[255] = '\0';
|
|
|
|
|
2018-01-11 09:59:58 +01:00
|
|
|
fixupEncodedHeaderLine(state->b_subject, MAXBUFSIZE);
|
2012-01-03 00:19:43 +01:00
|
|
|
trimBuffer(state->b_subject);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2012-01-03 00:19:43 +01:00
|
|
|
if(sdata->internal_sender == 0) sdata->direction = DIRECTION_INCOMING;
|
|
|
|
else {
|
|
|
|
if(sdata->internal_recipient == 1) sdata->direction = DIRECTION_INTERNAL;
|
|
|
|
if(sdata->external_recipient == 1) sdata->direction = DIRECTION_OUTGOING;
|
|
|
|
if(sdata->internal_recipient == 1 && sdata->external_recipient == 1) sdata->direction = DIRECTION_INTERNAL_AND_OUTGOING;
|
|
|
|
}
|
2011-11-19 21:25:44 +01:00
|
|
|
|
2021-07-20 13:00:53 +02:00
|
|
|
char *q = strrchr(state->receivedbuf, ';');
|
|
|
|
if(q){
|
|
|
|
time_t received_timestamp = parse_date_header(q+1);
|
|
|
|
if(received_timestamp > 10000000){
|
|
|
|
// If the calculated date based on Date: header line differs more than 1 week
|
|
|
|
// then we'll override it with the data parsed from the first Received: line
|
|
|
|
if(labs(received_timestamp - sdata->sent) > 604800) sdata->sent = received_timestamp;
|
|
|
|
}
|
|
|
|
}
|
2011-11-28 14:21:14 +01:00
|
|
|
|
2012-01-03 00:19:43 +01:00
|
|
|
for(i=1; i<=state->n_attachments; i++){
|
2020-10-24 18:47:10 +02:00
|
|
|
char puf[SMALLBUFSIZE];
|
|
|
|
snprintf(puf, sizeof(puf)-1, "%s ", state->attachments[i].filename);
|
|
|
|
|
|
|
|
unsigned int len = strlen(puf);
|
|
|
|
if(state->bodylen < BIGBUFSIZE-len-1){
|
|
|
|
memcpy(&(state->b_body[state->bodylen]), puf, len);
|
|
|
|
state->bodylen += len;
|
|
|
|
}
|
|
|
|
|
2012-01-03 00:19:43 +01:00
|
|
|
digest_file(state->attachments[i].internalname, &(state->attachments[i].digest[0]));
|
2011-12-30 15:52:59 +01:00
|
|
|
|
2021-07-24 12:39:53 +02:00
|
|
|
if(cfg->verbosity >= _LOG_DEBUG) syslog(LOG_PRIORITY, "%s: attachment list: i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s, dumped: %d", sdata->ttmpfile, i, state->attachments[i].filename, state->attachments[i].type, state->attachments[i].size, state->attachments[i].internalname, state->attachments[i].digest, state->attachments[i].dumped);
|
2012-01-03 00:19:43 +01:00
|
|
|
|
2020-08-10 20:58:34 +02:00
|
|
|
char *p = determine_attachment_type(state->attachments[i].filename, state->attachments[i].type);
|
2020-10-24 18:47:10 +02:00
|
|
|
len = strlen(p);
|
2012-01-03 00:19:43 +01:00
|
|
|
if(strlen(sdata->attachments) < SMALLBUFSIZE-len-1 && !strstr(sdata->attachments, p)) memcpy(&(sdata->attachments[strlen(sdata->attachments)]), p, len);
|
2012-09-07 15:08:50 +02:00
|
|
|
|
|
|
|
if(state->attachments[i].dumped == 1){
|
2020-08-10 20:58:34 +02:00
|
|
|
int rec = 0;
|
2015-02-14 19:47:40 +01:00
|
|
|
if(cfg->extract_attachments == 1 && state->bodylen < BIGBUFSIZE-1024) extract_attachment_content(sdata, state, state->attachments[i].aname, get_attachment_extractor_by_filename(state->attachments[i].filename), &rec, cfg);
|
2020-02-16 10:33:21 +01:00
|
|
|
|
2012-09-07 15:08:50 +02:00
|
|
|
unlink(state->attachments[i].aname);
|
|
|
|
}
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-06 13:02:40 +02:00
|
|
|
if(state->message_id[0] == 0){
|
|
|
|
if(cfg->archive_emails_not_having_message_id == 1)
|
2012-09-26 15:26:30 +02:00
|
|
|
snprintf(state->message_id, SMALLBUFSIZE-1, "%s", sdata->ttmpfile);
|
2012-07-06 13:02:40 +02:00
|
|
|
else snprintf(state->message_id, SMALLBUFSIZE-1, "null");
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2014-04-25 21:17:01 +02:00
|
|
|
|
|
|
|
digest_string(state->message_id, &(state->message_id_hash[0]));
|
|
|
|
|
2015-11-10 16:06:47 +01:00
|
|
|
if(sdata->sent == 0) sdata->sent = sdata->now;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-11-21 23:06:47 +01:00
|
|
|
void storno_attachment(struct parser_state *state){
|
2012-11-02 22:17:21 +01:00
|
|
|
state->has_to_dump = 0;
|
|
|
|
|
|
|
|
if(state->n_attachments <= 0) return;
|
|
|
|
|
|
|
|
state->attachments[state->n_attachments].size = 0;
|
|
|
|
state->attachments[state->n_attachments].dumped = 0;
|
|
|
|
|
|
|
|
memset(state->attachments[state->n_attachments].type, 0, TINYBUFSIZE);
|
|
|
|
memset(state->attachments[state->n_attachments].shorttype, 0, TINYBUFSIZE);
|
|
|
|
memset(state->attachments[state->n_attachments].aname, 0, TINYBUFSIZE);
|
2018-02-04 10:50:42 +01:00
|
|
|
memset(state->attachments[state->n_attachments].filename, 0, SMALLBUFSIZE);
|
2012-11-02 22:17:21 +01:00
|
|
|
memset(state->attachments[state->n_attachments].internalname, 0, TINYBUFSIZE);
|
|
|
|
memset(state->attachments[state->n_attachments].digest, 0, 2*DIGEST_LENGTH+1);
|
|
|
|
|
|
|
|
|
|
|
|
state->n_attachments--;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-11-03 17:12:20 +01:00
|
|
|
void flush_attachment_buffer(struct parser_state *state, char *abuffer, unsigned int abuffersize){
|
|
|
|
if(write(state->fd, abuffer, state->abufpos) == -1) syslog(LOG_PRIORITY, "ERROR: write(), %s, %d, %s", __func__, __LINE__, __FILE__);
|
|
|
|
|
|
|
|
if(state->b64fd != -1){
|
|
|
|
abuffer[state->abufpos] = '\0';
|
|
|
|
if(state->base64 == 1){
|
2020-08-10 20:58:34 +02:00
|
|
|
unsigned char b64buffer[MAXBUFSIZE];
|
|
|
|
int n64 = base64_decode_attachment_buffer(abuffer, &b64buffer[0], sizeof(b64buffer));
|
2018-11-03 17:12:20 +01:00
|
|
|
if(write(state->b64fd, b64buffer, n64) == -1) syslog(LOG_PRIORITY, "ERROR: write(), %s, %d, %s", __func__, __LINE__, __FILE__);
|
|
|
|
}
|
|
|
|
else if(write(state->b64fd, abuffer, state->abufpos) == -1){
|
|
|
|
syslog(LOG_PRIORITY, "ERROR: write(), %s, %d, %s", __func__, __LINE__, __FILE__);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
state->abufpos = 0;
|
|
|
|
memset(abuffer, 0, abuffersize);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-10-17 09:26:07 +02:00
|
|
|
int parse_line(char *buf, struct parser_state *state, struct session_data *sdata, int take_into_pieces, char *writebuffer, unsigned int writebuffersize, char *abuffer, unsigned int abuffersize, struct data *data, struct config *cfg){
|
2020-08-10 20:58:34 +02:00
|
|
|
char *p;
|
|
|
|
int boundary_line=0;
|
2019-01-13 14:40:01 +01:00
|
|
|
unsigned int len;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2012-07-23 16:51:36 +02:00
|
|
|
if(cfg->debug == 1) printf("line: %s", buf);
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
state->line_num++;
|
2011-11-19 21:25:44 +01:00
|
|
|
len = strlen(buf);
|
|
|
|
|
2015-05-17 21:41:13 +02:00
|
|
|
/*
|
|
|
|
* check a few things in the 1st header
|
|
|
|
*/
|
2011-12-13 17:05:22 +01:00
|
|
|
|
2015-05-17 21:41:13 +02:00
|
|
|
if(state->is_1st_header == 1){
|
|
|
|
|
|
|
|
if(strncmp(buf, "Received: by piler", strlen("Received: by piler")) == 0){
|
|
|
|
sdata->restored_copy = 1;
|
|
|
|
}
|
|
|
|
|
2021-04-18 20:19:26 +02:00
|
|
|
if(cfg->security_header[0] && state->found_security_header == 0 && strstr(buf, cfg->security_header)){
|
2020-11-26 19:01:45 +01:00
|
|
|
state->found_security_header = 1;
|
|
|
|
}
|
|
|
|
|
2015-05-17 21:41:13 +02:00
|
|
|
if(*(cfg->piler_header_field) != 0 && strncmp(buf, cfg->piler_header_field, strlen(cfg->piler_header_field)) == 0){
|
|
|
|
sdata->restored_copy = 1;
|
|
|
|
}
|
2012-01-26 14:35:51 +01:00
|
|
|
|
2017-09-06 19:55:30 +02:00
|
|
|
if(sdata->ms_journal == 0 && ( strncmp(buf, "X-MS-Journal-Report:", strlen("X-MS-Journal-Report:")) == 0 || strncmp(buf, "X-WM-Journal-Report: journal", strlen("X-WM-Journal-Report: journal")) == 0) ){
|
|
|
|
|
|
|
|
memset(state->b_to, 0, MAXBUFSIZE);
|
|
|
|
state->tolen = 0;
|
|
|
|
memset(state->b_to_domain, 0, SMALLBUFSIZE);
|
2018-07-12 22:16:08 +02:00
|
|
|
state->todomainlen = 0;
|
2017-09-06 19:55:30 +02:00
|
|
|
|
|
|
|
clearhash(state->rcpt);
|
2018-02-26 17:38:17 +01:00
|
|
|
clearhash(state->rcpt_domain);
|
2017-09-06 19:55:30 +02:00
|
|
|
|
2016-05-30 10:17:54 +02:00
|
|
|
//if(sdata->import == 0){
|
2015-05-17 21:41:13 +02:00
|
|
|
sdata->ms_journal = 1;
|
|
|
|
memset(state->message_id, 0, SMALLBUFSIZE);
|
2016-05-30 10:17:54 +02:00
|
|
|
//}
|
2015-05-17 21:41:13 +02:00
|
|
|
}
|
|
|
|
|
2012-09-03 10:06:34 +02:00
|
|
|
}
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
|
|
|
|
if(state->message_rfc822 == 0 && (buf[0] == '\r' || buf[0] == '\n') ){
|
|
|
|
state->message_state = MSG_BODY;
|
|
|
|
|
|
|
|
if(state->is_header == 1) state->is_header = 0;
|
|
|
|
state->is_1st_header = 0;
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
|
2012-06-01 14:25:49 +02:00
|
|
|
if(take_into_pieces == 1){
|
2020-03-14 07:43:19 +01:00
|
|
|
if(state->message_state == MSG_BODY && state->fd != -1 && (state->has_to_dump_whole_body == 1 || is_substr_in_hash(state->boundaries, buf) == 0) ){
|
2012-08-21 21:57:39 +02:00
|
|
|
if(len + state->abufpos > abuffersize-1){
|
2018-11-03 17:12:20 +01:00
|
|
|
flush_attachment_buffer(state, abuffer, abuffersize);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
|
|
|
memcpy(abuffer+state->abufpos, buf, len); state->abufpos += len;
|
|
|
|
|
2012-06-01 14:25:49 +02:00
|
|
|
state->attachments[state->n_attachments].size += len;
|
2020-03-14 07:43:19 +01:00
|
|
|
|
|
|
|
// When processing the body and writing to an attachment file, then we finish here
|
|
|
|
return 0;
|
2012-06-01 14:25:49 +02:00
|
|
|
}
|
2018-02-20 21:47:08 +01:00
|
|
|
else {
|
2012-06-01 14:25:49 +02:00
|
|
|
state->saved_size += len;
|
2012-08-21 21:57:39 +02:00
|
|
|
if(len + state->writebufpos > writebuffersize-1){
|
2017-08-08 15:34:45 +02:00
|
|
|
if(write(state->mfd, writebuffer, state->writebufpos) == -1) syslog(LOG_PRIORITY, "ERROR: write(), %s, %d, %s", __func__, __LINE__, __FILE__);
|
|
|
|
state->writebufpos = 0;
|
|
|
|
memset(writebuffer, 0, writebuffersize);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
|
|
|
memcpy(writebuffer+state->writebufpos, buf, len); state->writebufpos += len;
|
2012-06-01 14:25:49 +02:00
|
|
|
}
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-14 07:43:19 +01:00
|
|
|
if(state->message_state == MSG_BODY && state->has_to_dump == 1 && state->pushed_pointer == 0){
|
2011-11-22 12:31:54 +01:00
|
|
|
state->pushed_pointer = 1;
|
|
|
|
|
|
|
|
|
2013-03-23 17:58:58 +01:00
|
|
|
// this is a real attachment to dump, it doesn't have to be base64 encoded!
|
2018-02-04 10:50:42 +01:00
|
|
|
if(state->attachment_name_buf[0] != 0 && strcasestr(state->attachment_name_buf, "name") && strlen(state->type) > 3 && state->n_attachments < MAX_ATTACHMENTS-1){
|
2011-11-22 12:31:54 +01:00
|
|
|
state->n_attachments++;
|
|
|
|
|
2018-02-04 10:50:42 +01:00
|
|
|
extractNameFromHeaderLine(state->attachment_name_buf, "name", state->attachments[state->n_attachments].filename, SMALLBUFSIZE);
|
2011-11-22 12:31:54 +01:00
|
|
|
snprintf(state->attachments[state->n_attachments].type, TINYBUFSIZE-1, "%s", state->type);
|
|
|
|
snprintf(state->attachments[state->n_attachments].internalname, TINYBUFSIZE-1, "%s.a%d", sdata->ttmpfile, state->n_attachments);
|
2012-09-07 15:08:50 +02:00
|
|
|
snprintf(state->attachments[state->n_attachments].aname, TINYBUFSIZE-1, "%s.a%d.bin", sdata->ttmpfile, state->n_attachments);
|
2011-11-22 12:31:54 +01:00
|
|
|
|
2012-06-01 14:25:49 +02:00
|
|
|
if(take_into_pieces == 1){
|
|
|
|
state->fd = open(state->attachments[state->n_attachments].internalname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
2012-09-07 15:08:50 +02:00
|
|
|
|
2018-02-04 10:50:42 +01:00
|
|
|
fixupEncodedHeaderLine(state->attachments[state->n_attachments].filename, SMALLBUFSIZE);
|
2012-12-09 23:14:39 +01:00
|
|
|
|
2012-09-09 23:16:09 +02:00
|
|
|
p = get_attachment_extractor_by_filename(state->attachments[state->n_attachments].filename);
|
|
|
|
|
|
|
|
snprintf(state->attachments[state->n_attachments].shorttype, TINYBUFSIZE-1, "%s", p);
|
2020-03-03 15:47:02 +01:00
|
|
|
|
2012-09-09 23:16:09 +02:00
|
|
|
if(strcmp("other", p)){
|
2013-05-15 10:59:02 +02:00
|
|
|
state->b64fd = open(state->attachments[state->n_attachments].aname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
|
2012-09-07 15:08:50 +02:00
|
|
|
state->attachments[state->n_attachments].dumped = 1;
|
|
|
|
}
|
|
|
|
|
2012-09-09 23:16:09 +02:00
|
|
|
|
|
|
|
|
2012-06-01 14:25:49 +02:00
|
|
|
if(state->fd == -1){
|
2012-11-02 22:17:21 +01:00
|
|
|
storno_attachment(state);
|
2012-06-01 14:25:49 +02:00
|
|
|
syslog(LOG_PRIORITY, "%s: error opening %s", sdata->ttmpfile, state->attachments[state->n_attachments].internalname);
|
|
|
|
}
|
|
|
|
else {
|
2020-08-10 20:58:34 +02:00
|
|
|
char puf[SMALLBUFSIZE];
|
2012-06-01 14:25:49 +02:00
|
|
|
snprintf(puf, sizeof(puf)-1, "ATTACHMENT_POINTER_%s.a%d_XXX_PILER", sdata->ttmpfile, state->n_attachments);
|
2020-08-10 20:58:34 +02:00
|
|
|
int writelen = strlen(puf);
|
2012-08-21 21:57:39 +02:00
|
|
|
if(writelen + state->writebufpos > writebuffersize-1){
|
2017-08-08 15:34:45 +02:00
|
|
|
if(write(state->mfd, writebuffer, state->writebufpos) == -1) syslog(LOG_PRIORITY, "ERROR: write(), %s, %d, %s", __func__, __LINE__, __FILE__);
|
|
|
|
state->writebufpos = 0;
|
|
|
|
memset(writebuffer, 0, writebuffersize);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
2017-08-08 15:34:45 +02:00
|
|
|
memcpy(writebuffer+state->writebufpos, puf, writelen);
|
|
|
|
state->writebufpos += writelen;
|
2012-06-01 14:25:49 +02:00
|
|
|
}
|
2011-11-23 12:24:21 +01:00
|
|
|
}
|
2012-06-01 14:25:49 +02:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
state->has_to_dump = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(*buf == '.' && *(buf+1) == '.') buf++;
|
|
|
|
|
|
|
|
/* undefined message state */
|
|
|
|
if(state->is_header == 1 && buf[0] != ' ' && buf[0] != '\t' && strchr(buf, ':')) state->message_state = MSG_UNDEF;
|
|
|
|
|
|
|
|
/* skip empty lines */
|
|
|
|
|
|
|
|
if(state->message_rfc822 == 0 && (buf[0] == '\r' || buf[0] == '\n') ){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-02-19 22:28:44 +01:00
|
|
|
trimBuffer(buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
/* skip the first line, if it's a "From <email address> date" format */
|
|
|
|
if(state->line_num == 1 && strncmp(buf, "From ", 5) == 0) return 0;
|
|
|
|
|
|
|
|
if(state->is_header == 0 && buf[0] != ' ' && buf[0] != '\t') state->message_state = MSG_BODY;
|
|
|
|
|
|
|
|
|
2016-05-30 10:17:54 +02:00
|
|
|
// journal fix
|
|
|
|
|
|
|
|
if(state->message_state == MSG_BODY && sdata->ms_journal == 1){
|
|
|
|
state->is_header = 1;
|
|
|
|
state->is_1st_header = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
/* header checks */
|
|
|
|
|
|
|
|
if(state->is_header == 1){
|
|
|
|
|
2016-05-05 21:13:45 +02:00
|
|
|
if(*(cfg->spam_header_line) != '\0' && strncmp(buf, cfg->spam_header_line, strlen(cfg->spam_header_line)) == 0){
|
|
|
|
sdata->spam_message = 1;
|
|
|
|
}
|
|
|
|
|
2018-02-20 21:47:08 +01:00
|
|
|
if(strncasecmp(buf, "X-Piler-Envelope-To:", strlen("X-Piler-Envelope-To:")) == 0){
|
2018-02-20 20:35:31 +01:00
|
|
|
state->message_state = MSG_ENVELOPE_TO;
|
|
|
|
buf += strlen("X-Piler-Envelope-To:");
|
|
|
|
}
|
|
|
|
else if(strncasecmp(buf, "From:", strlen("From:")) == 0){
|
2018-01-11 09:59:58 +01:00
|
|
|
state->message_state = MSG_FROM;
|
|
|
|
buf += strlen("From:");
|
|
|
|
}
|
2020-12-12 21:43:16 +01:00
|
|
|
else if(strncasecmp(buf, "Sender:", strlen("Sender:")) == 0){
|
|
|
|
state->message_state = MSG_SENDER;
|
|
|
|
buf += strlen("Sender:");
|
|
|
|
}
|
2014-03-14 12:17:50 +01:00
|
|
|
else if(strncasecmp(buf, "Content-Type:", strlen("Content-Type:")) == 0){
|
|
|
|
state->message_state = MSG_CONTENT_TYPE;
|
|
|
|
}
|
2020-03-14 07:43:19 +01:00
|
|
|
else if(strncasecmp(buf, "Content-Transfer-Encoding:", strlen("Content-Transfer-Encoding:")) == 0){
|
|
|
|
state->message_state = MSG_CONTENT_TRANSFER_ENCODING;
|
|
|
|
if(state->is_1st_header == 1 && strcasestr(buf, "base64")){
|
|
|
|
state->has_to_dump = 1;
|
|
|
|
state->has_to_dump_whole_body = 1;
|
|
|
|
}
|
|
|
|
}
|
2018-02-06 19:55:17 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We only enter MSG_CONTENT_DISPOSITION state if we couldn't find
|
|
|
|
* the filename in MSG_CONTENT_TYPE state. We also assume that
|
|
|
|
* Content-Type: comes first, then Content-Disposition:
|
|
|
|
*/
|
|
|
|
else if(strncasecmp(buf, "Content-Disposition:", strlen("Content-Disposition:")) == 0 && strcasestr(state->attachment_name_buf, "name") == NULL){
|
2014-03-14 12:17:50 +01:00
|
|
|
state->message_state = MSG_CONTENT_DISPOSITION;
|
|
|
|
}
|
2018-01-11 09:59:58 +01:00
|
|
|
else if(strncasecmp(buf, "To:", 3) == 0){
|
|
|
|
state->message_state = MSG_TO;
|
|
|
|
buf += strlen("To:");
|
|
|
|
}
|
|
|
|
else if(strncasecmp(buf, "Cc:", 3) == 0){
|
|
|
|
state->message_state = MSG_CC;
|
|
|
|
buf += strlen("Cc:");
|
|
|
|
}
|
|
|
|
else if(strncasecmp(buf, "Bcc:", 4) == 0){
|
|
|
|
state->message_state = MSG_CC;
|
|
|
|
buf += strlen("Bcc:");
|
|
|
|
}
|
2021-12-09 11:02:24 +01:00
|
|
|
else if(strncasecmp(buf, "Message-Id:", 11) == 0){
|
|
|
|
state->message_state = MSG_MESSAGE_ID;
|
|
|
|
buf += strlen("Message-Id:");
|
|
|
|
}
|
2012-02-08 23:14:28 +01:00
|
|
|
else if(strncasecmp(buf, "References:", 11) == 0) state->message_state = MSG_REFERENCES;
|
2018-01-11 09:59:58 +01:00
|
|
|
else if(strncasecmp(buf, "Subject:", strlen("Subject:")) == 0){
|
|
|
|
state->message_state = MSG_SUBJECT;
|
|
|
|
buf += strlen("Subject:");
|
|
|
|
}
|
|
|
|
else if(strncasecmp(buf, "Recipient:", strlen("Recipient:")) == 0){
|
|
|
|
state->message_state = MSG_RECIPIENT;
|
|
|
|
buf += strlen("Recipient:");
|
|
|
|
}
|
2016-05-30 10:17:54 +02:00
|
|
|
if(sdata->ms_journal == 1 && (state->message_state == MSG_TO || state->message_state == MSG_RECIPIENT) ){
|
|
|
|
p = strstr(buf, "Expanded:");
|
|
|
|
if(p) *p = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-10-19 10:34:29 +02:00
|
|
|
/*
|
2015-11-10 16:06:47 +01:00
|
|
|
* by default sdata->sent = 0, and let the parser extract value from the Date: header
|
2013-10-19 10:34:29 +02:00
|
|
|
*/
|
|
|
|
|
2015-11-10 16:06:47 +01:00
|
|
|
else if(strncasecmp(buf, "Date:", strlen("Date:")) == 0 && state->is_1st_header == 1 && sdata->sent == 0){
|
2014-12-03 14:30:26 +01:00
|
|
|
|
|
|
|
if(strstr(buf, "=?") && strstr(buf, "?=")) fixupEncodedHeaderLine(buf, MAXBUFSIZE);
|
|
|
|
|
2021-10-30 18:40:46 +02:00
|
|
|
sdata->sent = parse_date_header(buf+5);
|
2013-10-19 10:34:29 +02:00
|
|
|
|
2015-11-10 16:06:47 +01:00
|
|
|
/* allow +2 days drift in the parsed Date: value */
|
|
|
|
|
|
|
|
if(sdata->sent - sdata->now > 2*86400) sdata->sent = sdata->now;
|
2013-10-19 10:34:29 +02:00
|
|
|
}
|
|
|
|
|
2021-10-30 18:40:46 +02:00
|
|
|
else if(strncasecmp(buf, "Delivery-date:", strlen("Delivery-date:")) == 0 && sdata->delivered == 0) sdata->delivered = parse_date_header(buf+14);
|
2021-07-20 13:00:53 +02:00
|
|
|
else if(strncasecmp(buf, "Received:", strlen("Received:")) == 0){
|
|
|
|
state->message_state = MSG_RECEIVED;
|
|
|
|
state->received_header++;
|
|
|
|
}
|
2018-01-11 09:59:58 +01:00
|
|
|
else if(cfg->extra_to_field[0] != '\0' && strncasecmp(buf, cfg->extra_to_field, strlen(cfg->extra_to_field)) == 0){
|
|
|
|
state->message_state = MSG_TO;
|
|
|
|
buf += strlen(cfg->extra_to_field);
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(state->message_state == MSG_MESSAGE_ID && state->message_id[0] == 0){
|
2021-12-09 11:02:24 +01:00
|
|
|
while(isspace(*buf)){
|
|
|
|
buf++;
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2021-12-09 11:02:24 +01:00
|
|
|
snprintf(state->message_id, SMALLBUFSIZE-1, "%s", buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2020-03-14 07:43:19 +01:00
|
|
|
if(state->message_state == MSG_CONTENT_TYPE || state->message_state == MSG_CONTENT_DISPOSITION){
|
|
|
|
fill_attachment_name_buf(state, buf);
|
|
|
|
}
|
|
|
|
|
2021-08-14 19:15:07 +02:00
|
|
|
if(state->received_header == 1 && state->message_state == MSG_RECEIVED && strlen(state->receivedbuf) + len < sizeof(state->receivedbuf)){
|
2021-07-20 13:00:53 +02:00
|
|
|
memcpy(&(state->receivedbuf[strlen(state->receivedbuf)]), buf, len);
|
|
|
|
}
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
/* we are interested in only From:, To:, Subject:, Received:, Content-*: header lines */
|
|
|
|
if(state->message_state <= 0) return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-03-24 01:20:12 +01:00
|
|
|
if(state->message_state == MSG_CONTENT_TYPE){
|
|
|
|
if((p = strcasestr(buf, "boundary"))){
|
|
|
|
extract_boundary(p, state);
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-14 07:43:19 +01:00
|
|
|
/*
|
2015-01-19 16:00:38 +01:00
|
|
|
* A normal journal looks like this:
|
|
|
|
*
|
|
|
|
* Sender: sender@domain
|
|
|
|
* Subject: Test normal
|
|
|
|
* Message-Id: ...
|
|
|
|
* Recipient: user1@domain
|
|
|
|
* Recipient: user2@domain, Forwarded: user1@domain
|
|
|
|
*
|
|
|
|
* However if outlook forwards an email, then the journal is somewhat changed:
|
|
|
|
*
|
|
|
|
* Sender: sender@domain
|
|
|
|
* Subject: Test through outlook
|
|
|
|
* Message-Id: ...
|
|
|
|
* To: user1@domain
|
|
|
|
* To: user2@domain, Forwarded: user1@domain
|
2016-05-30 10:17:54 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* Outlook.com has the following scheme, when expanded from a distribution list:
|
|
|
|
*
|
|
|
|
* Sender: sender@domain
|
|
|
|
* Subject: Test Email
|
|
|
|
* Message-Id: ...
|
|
|
|
* To: user1@domain, Expanded: listaddress@domain
|
|
|
|
* To: user2@domain, Expanded: listaddress@domain
|
|
|
|
*
|
2015-01-19 16:00:38 +01:00
|
|
|
*/
|
|
|
|
|
2012-10-08 21:06:48 +02:00
|
|
|
|
2012-09-03 10:06:34 +02:00
|
|
|
|
2012-09-28 10:34:04 +02:00
|
|
|
|
2012-02-08 23:14:28 +01:00
|
|
|
if(state->is_1st_header == 1 && state->message_state == MSG_REFERENCES){
|
|
|
|
if(strncasecmp(buf, "References:", 11) == 0) parse_reference(state, buf+11);
|
|
|
|
else parse_reference(state, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-03-14 21:26:18 +01:00
|
|
|
if(state->is_1st_header == 1){
|
2011-11-28 14:21:14 +01:00
|
|
|
|
2016-03-14 21:26:18 +01:00
|
|
|
if(state->message_state == MSG_SUBJECT && strlen(state->b_subject) + strlen(buf) < MAXBUFSIZE-1){
|
2018-01-11 09:59:58 +01:00
|
|
|
// buffer the subject lines, and decode it later
|
|
|
|
strncat(state->b_subject, buf, MAXBUFSIZE-strlen(state->b_subject)-1);
|
2016-03-14 21:26:18 +01:00
|
|
|
}
|
|
|
|
else { fixupEncodedHeaderLine(buf, MAXBUFSIZE); }
|
2012-02-07 17:21:23 +01:00
|
|
|
}
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
/* Content-type: checking */
|
|
|
|
|
|
|
|
if(state->message_state == MSG_CONTENT_TYPE){
|
|
|
|
state->message_rfc822 = 0;
|
|
|
|
|
|
|
|
/* extract Content type */
|
|
|
|
|
|
|
|
p = strchr(buf, ':');
|
|
|
|
if(p){
|
|
|
|
p++;
|
|
|
|
if(*p == ' ' || *p == '\t') p++;
|
2011-11-22 12:31:54 +01:00
|
|
|
snprintf(state->type, TINYBUFSIZE-1, "%s", p);
|
2013-03-24 01:20:12 +01:00
|
|
|
//state->content_type_is_set = 1;
|
2011-11-22 12:31:54 +01:00
|
|
|
p = strchr(state->type, ';');
|
2011-11-14 15:57:52 +01:00
|
|
|
if(p) *p = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(strcasestr(buf, "text/plain") ||
|
|
|
|
strcasestr(buf, "multipart/mixed") ||
|
|
|
|
strcasestr(buf, "multipart/alternative") ||
|
|
|
|
strcasestr(buf, "multipart/report") ||
|
|
|
|
strcasestr(buf, "message/delivery-status") ||
|
|
|
|
strcasestr(buf, "text/rfc822-headers") ||
|
2013-09-11 09:19:29 +02:00
|
|
|
strcasestr(buf, "message/rfc822")
|
2011-11-14 15:57:52 +01:00
|
|
|
){
|
2011-11-16 14:47:47 +01:00
|
|
|
state->textplain = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
else if(strcasestr(buf, "text/html")){
|
2011-11-16 14:47:47 +01:00
|
|
|
state->texthtml = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
/* switch (back) to header mode if we encounterd an attachment with "message/rfc822" content-type */
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(strcasestr(buf, "message/rfc822")){
|
|
|
|
state->message_rfc822 = 1;
|
|
|
|
state->is_header = 1;
|
2016-05-30 10:17:54 +02:00
|
|
|
|
2021-07-24 12:39:53 +02:00
|
|
|
state->has_to_dump = 0;
|
|
|
|
|
2016-05-30 10:17:54 +02:00
|
|
|
if(sdata->ms_journal == 1){
|
|
|
|
state->is_1st_header = 1;
|
|
|
|
|
|
|
|
// reset all headers, except To:
|
|
|
|
|
|
|
|
memset(state->b_subject, 0, MAXBUFSIZE);
|
|
|
|
memset(state->b_body, 0, BIGBUFSIZE);
|
|
|
|
memset(state->b_from, 0, SMALLBUFSIZE);
|
|
|
|
memset(state->b_from_domain, 0, SMALLBUFSIZE);
|
2020-12-12 21:43:16 +01:00
|
|
|
memset(state->b_sender, 0, SMALLBUFSIZE);
|
|
|
|
memset(state->b_sender_domain, 0, SMALLBUFSIZE);
|
2016-05-30 10:17:54 +02:00
|
|
|
memset(state->message_id, 0, SMALLBUFSIZE);
|
|
|
|
|
|
|
|
sdata->ms_journal = 0;
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-02-04 10:50:42 +01:00
|
|
|
if(strcasestr(buf, "charset")) extractNameFromHeaderLine(buf, "charset", state->charset, TINYBUFSIZE);
|
2014-08-30 21:10:29 +02:00
|
|
|
if(strcasestr(state->charset, "UTF-8")) state->utf8 = 1;
|
2011-11-19 21:25:44 +01:00
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
|
|
|
if(state->message_state == MSG_CONTENT_TRANSFER_ENCODING){
|
2011-11-22 12:31:54 +01:00
|
|
|
if(strcasestr(buf, "base64")) state->base64 = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
if(strcasestr(buf, "quoted-printable")) state->qp = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
/* boundary check, and reset variables */
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2013-08-22 00:33:39 +02:00
|
|
|
boundary_line = is_substr_in_hash(state->boundaries, buf);
|
2013-08-14 14:24:30 +02:00
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
if(!strstr(buf, "boundary=") && !strstr(buf, "boundary =") && boundary_line == 1){
|
2013-03-24 01:20:12 +01:00
|
|
|
state->is_header = 1;
|
|
|
|
|
|
|
|
//state->content_type_is_set = 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->has_to_dump == 1){
|
2012-08-21 21:57:39 +02:00
|
|
|
if(take_into_pieces == 1 && state->fd != -1){
|
|
|
|
if(state->abufpos > 0){
|
2018-11-03 17:12:20 +01:00
|
|
|
flush_attachment_buffer(state, abuffer, abuffersize);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
|
|
|
close(state->fd);
|
2012-09-07 15:08:50 +02:00
|
|
|
close(state->b64fd);
|
2012-08-21 21:57:39 +02:00
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
state->fd = -1;
|
2012-09-07 15:08:50 +02:00
|
|
|
state->b64fd = -1;
|
2011-11-16 14:47:47 +01:00
|
|
|
}
|
|
|
|
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
state->has_to_dump = 1;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
state->base64 = 0; state->textplain = 0; state->texthtml = state->octetstream = 0;
|
|
|
|
state->skip_html = 0;
|
|
|
|
state->utf8 = 0;
|
|
|
|
state->qp = 0;
|
|
|
|
|
|
|
|
state->realbinary = 0;
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
state->pushed_pointer = 0;
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
memset(state->type, 0, TINYBUFSIZE);
|
2021-12-09 11:27:51 +01:00
|
|
|
memset(state->charset, 0, TINYBUFSIZE);
|
2011-11-22 12:31:54 +01:00
|
|
|
|
2014-03-14 12:17:50 +01:00
|
|
|
memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
|
|
|
|
state->anamepos = 0;
|
|
|
|
|
2011-11-22 12:31:54 +01:00
|
|
|
state->message_state = MSG_UNDEF;
|
|
|
|
|
2020-03-03 15:47:02 +01:00
|
|
|
return 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if(boundary_line == 1){ return 0; }
|
|
|
|
|
|
|
|
|
|
|
|
/* end of boundary check */
|
|
|
|
|
|
|
|
|
2011-11-28 14:21:14 +01:00
|
|
|
/* skip irrelevant headers */
|
2020-12-12 21:43:16 +01:00
|
|
|
if(state->is_header == 1 && state->message_state != MSG_FROM && state->message_state != MSG_SENDER && state->message_state != MSG_TO && state->message_state != MSG_CC && state->message_state != MSG_RECIPIENT && state->message_state != MSG_ENVELOPE_TO) return 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
2011-11-19 21:25:44 +01:00
|
|
|
/* don't process body if it's not a text or html part */
|
|
|
|
if(state->message_state == MSG_BODY && state->textplain == 0 && state->texthtml == 0) return 0;
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
2011-11-16 14:47:47 +01:00
|
|
|
if(state->base64 == 1 && state->message_state == MSG_BODY){
|
2012-11-03 23:42:36 +01:00
|
|
|
decodeBase64(buf);
|
2011-11-16 14:47:47 +01:00
|
|
|
fixupBase64EncodedLine(buf, state);
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
2011-12-07 15:24:52 +01:00
|
|
|
if(state->message_state == MSG_BODY && state->qp == 1){
|
|
|
|
fixupSoftBreakInQuotedPritableLine(buf, state); // 2011.12.07
|
|
|
|
decodeQP(buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
}
|
|
|
|
|
2012-07-23 16:20:01 +02:00
|
|
|
/* I believe that we can live without this function call */
|
|
|
|
//decodeURL(buf);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2017-09-06 19:55:30 +02:00
|
|
|
/* remove all HTML tags */
|
|
|
|
if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state);
|
|
|
|
|
|
|
|
|
2021-12-09 11:27:51 +01:00
|
|
|
if(state->texthtml == 1){
|
|
|
|
size_t buflen = strlen(buf);
|
|
|
|
decodeHTML(buf, state->utf8);
|
|
|
|
/* decodeHTML converted some entities to iso-8859-1 */
|
|
|
|
if(state->utf8 != 1 && strlen(buf) != buflen){
|
|
|
|
/* no charset or us-ascii: switch to iso-8859-1 */
|
|
|
|
if (state->charset[0] == 0 || strcasecmp(state->charset, "us-ascii") == 0){
|
|
|
|
syslog(LOG_PRIORITY, "%s: assuming iso-8859-1 encoding for HTML (was '%s')", sdata->ttmpfile, state->charset);
|
|
|
|
snprintf(state->charset, TINYBUFSIZE-1, "ISO8859-1");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2011-12-07 15:24:52 +01:00
|
|
|
/* encode the body if it's not utf-8 encoded */
|
2014-08-30 21:10:29 +02:00
|
|
|
if(state->message_state == MSG_BODY && state->utf8 != 1){
|
2020-08-10 20:58:34 +02:00
|
|
|
char tmpbuf[MAXBUFSIZE];
|
|
|
|
int result = utf8_encode(buf, strlen(buf), &tmpbuf[0], sizeof(tmpbuf), state->charset);
|
2014-08-30 21:10:29 +02:00
|
|
|
if(result == OK) snprintf(buf, MAXBUFSIZE-1, "%s", tmpbuf);
|
|
|
|
}
|
2011-11-14 15:57:52 +01:00
|
|
|
|
2020-02-16 10:15:11 +01:00
|
|
|
tokenize(buf, state, sdata, data, cfg);
|
2011-11-14 15:57:52 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|