This commit is contained in:
SJ 2011-12-07 15:24:52 +01:00
parent b8e321fc9d
commit 64dd86e169
9 changed files with 172 additions and 85 deletions

8
etc/cron.jobs.in Normal file
View File

@ -0,0 +1,8 @@
sphinx cronjob:
*/2 * * * * /usr/local/bin/indexer --quiet delta1 --rotate && sleep 2 && /usr/local/bin/indexer --quiet --merge main1 delta1 --rotate
*/5 * * * * /usr/local/bin/indexer --quiet tag1 --rotate

View File

@ -11,7 +11,7 @@
#define PROGNAME "piler" #define PROGNAME "piler"
#define VERSION "0.1.9" #define VERSION "0.1.10"
#define PROGINFO VERSION ", Janos SUTO <sj@acts.hu>\n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n" #define PROGINFO VERSION ", Janos SUTO <sj@acts.hu>\n\n" CONFIGURE_PARAMS "\n\nSend bugs/issues to https://jira.acts.hu:8443/\n"

View File

@ -134,47 +134,6 @@ int decodeBase64(char *p){
} }
void decodeUTF8(char *p){
int i, k=0, a, b;
unsigned char c, c1, c2;
if(p == NULL) return;
for(i=0; i<strlen(p); i++){
c = p[i];
if(p[i] == '=' && isxdigit(p[i+1]) && isxdigit(p[i+2]) &&
p[i+3] == '=' && isxdigit(p[i+4]) && isxdigit(p[i+5])){
a = p[i+1];
b = p[i+2];
c1 = 16 * hex_table[a] + hex_table[b];
a = p[i+4];
b = p[i+5];
c2 = 16 * hex_table[a] + hex_table[b];
if(c1 >= 192 && c1 <= 223){
c = 64 * (c1 - 192) + c2 - 128;
i += 5;
}
}
if(c >= 192 && c <= 223){
c = 64 * (c - 192) + p[i+1] - 128;
i++;
}
p[k] = c;
k++;
}
p[k] = '\0';
}
void decodeQP(char *p){ void decodeQP(char *p){
int i, k=0, a, b; int i, k=0, a, b;
char c; char c;
@ -201,44 +160,59 @@ void decodeQP(char *p){
} }
void decodeHTML(char *s){ void decodeHTML(char *p){
char *p; unsigned char buf[MAXBUFSIZE], __u[8];
int i, c, k=0, unknown='q'; char *s, *q;
int count=0, len, c;
struct mi key, *res; struct mi key, *res;
if(s == NULL) return; if(p == NULL || strlen(p) == 0) return;
for(i=0; i<strlen(s); i++){ s = p;
c = s[i];
if(*(s+i) == '&'){ memset(buf, 0, sizeof(buf));
p = strchr(s+i, ';');
if(p){
*p = '\0';
if(*(s+i+1) == '#'){ for(; *s; s++){
c = atoi(s+i+2); if(*s == '&'){
if(c == 0) c = unknown; q = strchr(s, ';');
if(q){
*q = '\0';
if(*(s+1) == '#'){
c = atoi(s+2);
if(c == 0) c = 'q';
buf[count] = (unsigned char)c;
count++;
} }
else { else {
key.entity = s+i; key.entity = s;
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi); res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
if(res && res->val <= 255) c = res->val; if(res && res->val <= 255){
else c = unknown; utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
memcpy(&buf[count], &__u[0], len);
count += len;
} }
else {
i += strlen(s+i); buf[count] = 'q';
*p = ';'; count++;
} }
} }
s[k] = c; s = q;
k++;
} }
s[k] = '\0'; }
else {
buf[count] = *s;
count++;
}
}
buf[count] = '\0'; count++;
memcpy(p, buf, count);
} }
@ -280,3 +254,69 @@ void decodeURL(char *p){
p[k] = '\0'; p[k] = '\0';
} }
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len){
int count=0;
memset(buf, 0, buflen);
/*
* Code point 1st byte 2nd byte 3rd byte 4th byte
* ---------- -------- -------- -------- --------
* U+0000..U+007F 00..7F
* U+0080..U+07FF C2..DF 80..BF
* U+0800..U+0FFF E0 A0..BF 80..BF
*/
if(c <= 0x7F){
*(buf+count) = c;
count++;
}
else if(c <= 0x7FF){
*(buf+count) = ( 0xC0 | (c >> 6) );
count++;
*(buf+count) = ( 0x80 | (c & 0x3F) );
count++;
}
else if (c <= 0xFFFF){
*(buf+count) = ( 0xE0 | (c >> 12) );
count++;
*(buf+count) = ( 0x80 | ((c >> 6) & 0x3F) );
count++;
*(buf+count) = ( 0x80 | (c & 0x3F) );
count++;
}
*len = count;
}
void utf8_encode(unsigned char *p){
int count=0, len;
unsigned char *u, *s, utf8[MAXBUFSIZE], __u[8];
if(p == NULL || strlen((char *)p) == 0) return;
//printf("encoding: *%s*\n", p);
memset(utf8, 0, MAXBUFSIZE);
u = &utf8[0];
s = p;
for(; *s; s++){
utf8_encode_char(*s, &__u[0], sizeof(__u), &len);
//printf("%s", __u);
memcpy(u+count, &__u[0], len);
count += len;
}
*(u+count) = '\0'; count++;
memcpy(p, u, count);
}

View File

@ -7,9 +7,10 @@
void sanitiseBase64(char *s); void sanitiseBase64(char *s);
int decodeBase64(char *p); int decodeBase64(char *p);
void decodeUTF8(char *p);
void decodeQP(char *p); void decodeQP(char *p);
void decodeHTML(char *p); void decodeHTML(char *p);
void decodeURL(char *p); void decodeURL(char *p);
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len);
void utf8_encode(unsigned char *p);
#endif /* _DECODER_H */ #endif /* _DECODER_H */

View File

@ -108,6 +108,7 @@ struct _state {
char attachedfile[RND_STR_LEN+SMALLBUFSIZE]; char attachedfile[RND_STR_LEN+SMALLBUFSIZE];
char message_id[SMALLBUFSIZE]; char message_id[SMALLBUFSIZE];
char miscbuf[MAX_TOKEN_LEN]; char miscbuf[MAX_TOKEN_LEN];
char qpbuf[MAX_TOKEN_LEN];
unsigned long n_token; unsigned long n_token;
unsigned long n_subject_token; unsigned long n_subject_token;
unsigned long n_body_token; unsigned long n_body_token;

View File

@ -333,15 +333,18 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, stru
if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state); if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state);
if(state->message_state == MSG_BODY){ if(state->message_state == MSG_BODY && state->qp == 1){
if(state->qp == 1) decodeQP(buf); fixupSoftBreakInQuotedPritableLine(buf, state); // 2011.12.07
if(state->utf8 == 1) decodeUTF8(buf); decodeQP(buf);
} }
decodeURL(buf); decodeURL(buf);
if(state->texthtml == 1) decodeHTML(buf); if(state->texthtml == 1) decodeHTML(buf);
/* encode the body if it's not utf-8 encoded */
if(state->message_state == MSG_BODY && state->utf8 != 1) utf8_encode((unsigned char*)buf);
translateLine((unsigned char*)buf, state); translateLine((unsigned char*)buf, state);

View File

@ -47,6 +47,7 @@ void init_state(struct _state *state){
memset(state->message_id, 0, SMALLBUFSIZE); memset(state->message_id, 0, SMALLBUFSIZE);
memset(state->miscbuf, 0, MAX_TOKEN_LEN); memset(state->miscbuf, 0, MAX_TOKEN_LEN);
memset(state->qpbuf, 0, MAX_TOKEN_LEN);
memset(state->filename, 0, TINYBUFSIZE); memset(state->filename, 0, TINYBUFSIZE);
memset(state->type, 0, TINYBUFSIZE); memset(state->type, 0, TINYBUFSIZE);
@ -232,7 +233,8 @@ void fixupEncodedHeaderLine(char *buf){
if(sb){ decodeBase64(s+3); } if(sb){ decodeBase64(s+3); }
if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } } if(sq){ decodeQP(s+3); r = s + 3; for(; *r; r++){ if(*r == '_') *r = ' '; } }
if(strncasecmp(start+1, "utf-8", 5) == 0) decodeUTF8(s+3); /* encode everything if it's not utf-8 encoded */
if(strncasecmp(start+1, "utf-8", 5)) utf8_encode((unsigned char*)s+3);
strncat(puf, s+3, sizeof(puf)-1); strncat(puf, s+3, sizeof(puf)-1);
@ -260,6 +262,42 @@ void fixupEncodedHeaderLine(char *buf){
} }
void fixupSoftBreakInQuotedPritableLine(char *buf, struct _state *state){
int i=0;
char *p, puf[MAXBUFSIZE];
if(strlen(state->qpbuf) > 0){
memset(puf, 0, MAXBUFSIZE);
strncpy(puf, state->qpbuf, MAXBUFSIZE-1);
strncat(puf, buf, MAXBUFSIZE-1);
memset(buf, 0, MAXBUFSIZE);
memcpy(buf, puf, MAXBUFSIZE);
memset(state->qpbuf, 0, MAX_TOKEN_LEN);
}
if(buf[strlen(buf)-1] == '='){
buf[strlen(buf)-1] = '\0';
i = 1;
}
if(i == 1){
p = strrchr(buf, ' ');
if(p){
memset(state->qpbuf, 0, MAX_TOKEN_LEN);
if(strlen(p) < MAX_TOKEN_LEN-1){
//snprintf(state->qpbuf, MAX_TOKEN_LEN-1, "%s", p);
memcpy(&(state->qpbuf[0]), p, MAX_TOKEN_LEN-1);
*p = '\0';
}
}
}
}
void fixupBase64EncodedLine(char *buf, struct _state *state){ void fixupBase64EncodedLine(char *buf, struct _state *state){
char *p, puf[MAXBUFSIZE]; char *p, puf[MAXBUFSIZE];
@ -403,16 +441,9 @@ int appendHTMLTag(char *buf, char *htmlbuf, int pos, struct _state *state){
void translateLine(unsigned char *p, struct _state *state){ void translateLine(unsigned char *p, struct _state *state){
int url=0; int url=0;
unsigned char *q=NULL, *P=p;
for(; *p; p++){ for(; *p; p++){
/* save position of '=', 2006.01.05, SJ */
if(state->qp == 1 && *p == '='){
q = p;
}
if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; } if( (state->message_state == MSG_RECEIVED || state->message_state == MSG_FROM || state->message_state == MSG_TO || state->message_state == MSG_CC) && *p == '@'){ continue; }
if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; } if(state->message_state == MSG_SUBJECT && (*p == '%' || *p == '_' || *p == '&') ){ continue; }
@ -434,7 +465,7 @@ void translateLine(unsigned char *p, struct _state *state){
} }
} }
if(delimiter_characters[(unsigned int)*p] != ' ' || isalnum(*p) == 0) if(delimiter_characters[(unsigned int)*p] != ' ')
*p = ' '; *p = ' ';
else { else {
*p = tolower(*p); *p = tolower(*p);
@ -442,11 +473,6 @@ void translateLine(unsigned char *p, struct _state *state){
} }
/* restore the soft break in quoted-printable parts */
if(state->qp == 1 && q && (q > P + strlen((char*)P) - 3))
*q = '=';
} }
@ -526,7 +552,8 @@ void fixURL(char *url){
q = strchr(p, '/'); q = strchr(p, '/');
if(q) *q = '\0'; if(q) *q = '\0';
snprintf(fixed_url, sizeof(fixed_url)-1, "URL*%s ", p); snprintf(fixed_url, sizeof(fixed_url)-1, "__URL__%s ", p);
fix_email_address_for_sphinx(fixed_url);
strcpy(url, fixed_url); strcpy(url, fixed_url);
} }

View File

@ -51,8 +51,10 @@ void handle_smtp_session(int new_sd, struct __data *data, struct __config *cfg){
mysql_options(&(sdata.mysql), MYSQL_OPT_CONNECT_TIMEOUT, (const char*)&cfg->mysql_connect_timeout); mysql_options(&(sdata.mysql), MYSQL_OPT_CONNECT_TIMEOUT, (const char*)&cfg->mysql_connect_timeout);
mysql_options(&(sdata.mysql), MYSQL_OPT_RECONNECT, (const char*)&rc); mysql_options(&(sdata.mysql), MYSQL_OPT_RECONNECT, (const char*)&rc);
if(mysql_real_connect(&(sdata.mysql), cfg->mysqlhost, cfg->mysqluser, cfg->mysqlpwd, cfg->mysqldb, cfg->mysqlport, cfg->mysqlsocket, 0)) if(mysql_real_connect(&(sdata.mysql), cfg->mysqlhost, cfg->mysqluser, cfg->mysqlpwd, cfg->mysqldb, cfg->mysqlport, cfg->mysqlsocket, 0)){
db_conn = 1; db_conn = 1;
mysql_real_query(&(sdata.mysql), "SET NAMES utf8", strlen("SET NAMES utf8"));
}
else else
syslog(LOG_PRIORITY, "%s", ERR_MYSQL_CONNECT); syslog(LOG_PRIORITY, "%s", ERR_MYSQL_CONNECT);
#endif #endif

View File

@ -1,3 +1,7 @@
create database `piler` character set 'utf8';
use `piler`;
drop table if exists `sph_counter`; drop table if exists `sph_counter`;
create table if not exists `sph_counter` ( create table if not exists `sph_counter` (
`counter_id` int not null, `counter_id` int not null,
@ -5,6 +9,7 @@ create table if not exists `sph_counter` (
primary key (`counter_id`) primary key (`counter_id`)
); );
drop table if exists `sph_index`; drop table if exists `sph_index`;
create table if not exists `sph_index` ( create table if not exists `sph_index` (
`id` bigint not null, `id` bigint not null,