diff --git a/etc/cron.jobs.in b/etc/cron.jobs.in new file mode 100644 index 00000000..40b8c63e --- /dev/null +++ b/etc/cron.jobs.in @@ -0,0 +1,8 @@ + + +sphinx cronjob: + +*/2 * * * * /usr/local/bin/indexer --quiet delta1 --rotate && sleep 2 && /usr/local/bin/indexer --quiet --merge main1 delta1 --rotate + +*/5 * * * * /usr/local/bin/indexer --quiet tag1 --rotate + diff --git a/src/config.h b/src/config.h index 55378c7f..6f913431 100644 --- a/src/config.h +++ b/src/config.h @@ -11,7 +11,7 @@ #define PROGNAME "piler" -#define VERSION "0.1.9" +#define VERSION "0.1.10" #define PROGINFO VERSION ", Janos SUTO \n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n" diff --git a/src/decoder.c b/src/decoder.c index afaa4334..8c7ab985 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -134,47 +134,6 @@ int decodeBase64(char *p){ } -void decodeUTF8(char *p){ - int i, k=0, a, b; - unsigned char c, c1, c2; - - if(p == NULL) return; - - for(i=0; i= 192 && c1 <= 223){ - c = 64 * (c1 - 192) + c2 - 128; - i += 5; - } - - } - - if(c >= 192 && c <= 223){ - c = 64 * (c - 192) + p[i+1] - 128; - i++; - } - - p[k] = c; - k++; - } - - p[k] = '\0'; -} - - void decodeQP(char *p){ int i, k=0, a, b; char c; @@ -201,44 +160,59 @@ void decodeQP(char *p){ } -void decodeHTML(char *s){ - char *p; - int i, c, k=0, unknown='q'; +void decodeHTML(char *p){ + unsigned char buf[MAXBUFSIZE], __u[8]; + char *s, *q; + int count=0, len, c; struct mi key, *res; - if(s == NULL) return; + if(p == NULL || strlen(p) == 0) return; - for(i=0; ival <= 255) c = res->val; - else c = unknown; + if(res && res->val <= 255){ + utf8_encode_char(res->val, &__u[0], sizeof(__u), &len); + memcpy(&buf[count], &__u[0], len); + count += len; + } + else { + buf[count] = 'q'; + count++; + } } - i += strlen(s+i); - *p = ';'; - + s = q; } - } - s[k] = c; - k++; + } + else { + buf[count] = *s; + count++; + } } - s[k] = '\0'; + buf[count] = '\0'; count++; + + memcpy(p, buf, count); } @@ -280,3 +254,69 @@ void decodeURL(char *p){ p[k] = '\0'; } + +inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len){ + int count=0; + + memset(buf, 0, buflen); + + /* + * Code point 1st byte 2nd byte 3rd byte 4th byte + * ---------- -------- -------- -------- -------- + * U+0000..U+007F 00..7F + * U+0080..U+07FF C2..DF 80..BF + * U+0800..U+0FFF E0 A0..BF 80..BF + */ + + if(c <= 0x7F){ + *(buf+count) = c; + count++; + } + + else if(c <= 0x7FF){ + *(buf+count) = ( 0xC0 | (c >> 6) ); + count++; + *(buf+count) = ( 0x80 | (c & 0x3F) ); + count++; + } + + + else if (c <= 0xFFFF){ + *(buf+count) = ( 0xE0 | (c >> 12) ); + count++; + *(buf+count) = ( 0x80 | ((c >> 6) & 0x3F) ); + count++; + *(buf+count) = ( 0x80 | (c & 0x3F) ); + count++; + } + + *len = count; +} + + +void utf8_encode(unsigned char *p){ + int count=0, len; + unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8]; + + if(p == NULL || strlen((char *)p) == 0) return; + + //printf("encoding: *%s*\n", p); + + memset(utf8, 0, MAXBUFSIZE); + u = &utf8[0]; + s = p; + + for(; *s; s++){ + + utf8_encode_char(*s, &__u[0], sizeof(__u), &len); + + //printf("%s", __u); + memcpy(u+count, &__u[0], len); + + count += len; + } + + *(u+count) = '\0'; count++; + memcpy(p, u, count); +} + diff --git a/src/decoder.h b/src/decoder.h index 3c8e9e5b..29729bc9 100644 --- a/src/decoder.h +++ b/src/decoder.h @@ -7,9 +7,10 @@ void sanitiseBase64(char *s); int decodeBase64(char *p); -void decodeUTF8(char *p); void decodeQP(char *p); void decodeHTML(char *p); void decodeURL(char *p); +inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len); +void utf8_encode(unsigned char *p); #endif /* _DECODER_H */ diff --git a/src/defs.h b/src/defs.h index b595fe9b..72260be2 100644 --- a/src/defs.h +++ b/src/defs.h @@ -108,6 +108,7 @@ struct _state { char attachedfile[RND_STR_LEN+SMALLBUFSIZE]; char message_id[SMALLBUFSIZE]; char miscbuf[MAX_TOKEN_LEN]; + char qpbuf[MAX_TOKEN_LEN]; unsigned long n_token; unsigned long n_subject_token; unsigned long n_body_token; diff --git a/src/parser.c b/src/parser.c index ef2d71bf..e43d2b45 100644 --- a/src/parser.c +++ b/src/parser.c @@ -333,15 +333,18 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state); - if(state->message_state == MSG_BODY){ - if(state->qp == 1) decodeQP(buf); - if(state->utf8 == 1) decodeUTF8(buf); + if(state->message_state == MSG_BODY && state->qp == 1){ + fixupSoftBreakInQuotedPritableLine(buf, state); // 2011.12.07 + decodeQP(buf); } decodeURL(buf); if(state->texthtml == 1) decodeHTML(buf); + /* encode the body if it's not utf-8 encoded */ + if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf); + translateLine((unsigned char*)buf, state); diff --git a/src/parser_utils.c b/src/parser_utils.c index fd6df171..9b996f66 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -47,6 +47,7 @@ void init_state(struct _state *state){ memset(state->message_id, 0, SMALLBUFSIZE); memset(state->miscbuf, 0, MAX_TOKEN_LEN); + memset(state->qpbuf, 0, MAX_TOKEN_LEN); memset(state->filename, 0, TINYBUFSIZE); memset(state->type, 0, TINYBUFSIZE); @@ -232,7 +233,8 @@ void fixupEncodedHeaderLine(char *buf){ if(sb){ decodeBase64(s+3); } if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } } - if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3); + /* encode everything if it's not utf-8 encoded */ + if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3); strncat(puf, s+3, sizeof(puf)-1); @@ -260,6 +262,42 @@ void fixupEncodedHeaderLine(char *buf){ } +void fixupSoftBreakInQuotedPritableLine(char *buf, struct _state *state){ + int i=0; + char *p, puf[MAXBUFSIZE]; + + if(strlen(state->qpbuf) > 0){ + memset(puf, 0, MAXBUFSIZE); + strncpy(puf, state->qpbuf, MAXBUFSIZE-1); + strncat(puf, buf, MAXBUFSIZE-1); + + memset(buf, 0, MAXBUFSIZE); + memcpy(buf, puf, MAXBUFSIZE); + + memset(state->qpbuf, 0, MAX_TOKEN_LEN); + } + + if(buf[strlen(buf)-1] == '='){ + buf[strlen(buf)-1] = '\0'; + i = 1; + } + + if(i == 1){ + p = strrchr(buf, ' '); + if(p){ + memset(state->qpbuf, 0, MAX_TOKEN_LEN); + if(strlen(p) < MAX_TOKEN_LEN-1){ + //snprintf(state->qpbuf, MAX_TOKEN_LEN-1, "%s", p); + memcpy(&(state->qpbuf[0]), p, MAX_TOKEN_LEN-1); + + *p = '\0'; + } + + } + } +} + + void fixupBase64EncodedLine(char *buf, struct _state *state){ char *p, puf[MAXBUFSIZE]; @@ -403,16 +441,9 @@ int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state){ void translateLine(unsigned char *p, struct _state *state){ int url=0; - unsigned char *q=NULL, *P=p; for(; *p; p++){ - /* save position of '=', 2006.01.05, SJ */ - - if(state->qp == 1 && *p == '='){ - q = p; - } - if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; } if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; } @@ -434,7 +465,7 @@ void translateLine(unsigned char *p, struct _state *state){ } } - if(delimiter_characters[(unsigned int)*p] != ' ' || isalnum(*p) == 0) + if(delimiter_characters[(unsigned int)*p] != ' ') *p = ' '; else { *p = tolower(*p); @@ -442,11 +473,6 @@ void translateLine(unsigned char *p, struct _state *state){ } - /* restore the soft break in quoted-printable parts */ - - if(state->qp == 1 && q && (q > P + strlen((char*)P) - 3)) - *q = '='; - } @@ -526,7 +552,8 @@ void fixURL(char *url){ q = strchr(p, '/'); if(q) *q = '\0'; - snprintf(fixed_url, sizeof(fixed_url)-1, "URL*%s ", p); + snprintf(fixed_url, sizeof(fixed_url)-1, "__URL__%s ", p); + fix_email_address_for_sphinx(fixed_url); strcpy(url, fixed_url); } diff --git a/src/session.c b/src/session.c index 7bc14b63..07fcdadb 100644 --- a/src/session.c +++ b/src/session.c @@ -51,8 +51,10 @@ void handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){ mysql_options(&(sdata.mysql), MYSQL_OPT_CONNECT_TIMEOUT, (const char*)&cfg->mysql_connect_timeout); mysql_options(&(sdata.mysql), MYSQL_OPT_RECONNECT, (const char*)&rc); - if(mysql_real_connect(&(sdata.mysql), cfg->mysqlhost, cfg->mysqluser, cfg->mysqlpwd, cfg->mysqldb, cfg->mysqlport, cfg->mysqlsocket, 0)) + if(mysql_real_connect(&(sdata.mysql), cfg->mysqlhost, cfg->mysqluser, cfg->mysqlpwd, cfg->mysqldb, cfg->mysqlport, cfg->mysqlsocket, 0)){ db_conn = 1; + mysql_real_query(&(sdata.mysql), "SET NAMES utf8", strlen("SET NAMES utf8")); + } else syslog(LOG_PRIORITY, "%s", ERR_MYSQL_CONNECT); #endif diff --git a/util/db-mysql.sql b/util/db-mysql.sql index e8c1a819..c3c8700b 100644 --- a/util/db-mysql.sql +++ b/util/db-mysql.sql @@ -1,3 +1,7 @@ +create database `piler` character set 'utf8'; +use `piler`; + + drop table if exists `sph_counter`; create table if not exists `sph_counter` ( `counter_id` int not null, @@ -5,6 +9,7 @@ create table if not exists `sph_counter` ( primary key (`counter_id`) ); + drop table if exists `sph_index`; create table if not exists `sph_index` ( `id` bigint not null,