mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-12-25 07:50:11 +01:00
tweaked the parser to support cjk languages
This commit is contained in:
parent
c0301ceaca
commit
1e44042b82
@ -103,6 +103,13 @@ archive_only_mydomains=0
|
||||
; minimum word length in mail body to index
|
||||
min_word_len=1
|
||||
|
||||
; whether to enable CJK (=Chinese, Japanese, and Korean) "characters".
|
||||
; the text piler can see with CJK languages may have extremely long
|
||||
; sequences without any whitespace. To prevent the parser to drop
|
||||
; these very long sequences, enable (1) this feature. By default it's
|
||||
; disabled (0).
|
||||
enable_cjk=0
|
||||
|
||||
; if piler detects this line in the mail header, then it will assume
|
||||
; the message is a spam. You should include your own antispam solution's
|
||||
; specific line.
|
||||
|
@ -68,6 +68,7 @@ struct _parse_rule config_parse_rules[] =
|
||||
{ "clamd_socket", "string", (void*) string_parser, offsetof(struct __config, clamd_socket), CLAMD_SOCKET, MAXVAL-1},
|
||||
{ "debug", "integer", (void*) int_parser, offsetof(struct __config, debug), "0", sizeof(int)},
|
||||
{ "default_retention_days", "integer", (void*) int_parser, offsetof(struct __config, default_retention_days), "2557", sizeof(int)},
|
||||
{ "enable_cjk", "integer", (void*) int_parser, offsetof(struct __config, enable_cjk), "0", sizeof(int)},
|
||||
{ "encrypt_messages", "integer", (void*) int_parser, offsetof(struct __config, encrypt_messages), "1", sizeof(int)},
|
||||
{ "extra_to_field", "string", (void*) string_parser, offsetof(struct __config, extra_to_field), "", MAXVAL-1},
|
||||
{ "hostid", "string", (void*) string_parser, offsetof(struct __config, hostid), HOSTID, MAXVAL-1},
|
||||
|
@ -84,6 +84,8 @@ struct __config {
|
||||
|
||||
int tweak_sent_time_offset;
|
||||
|
||||
int enable_cjk;
|
||||
|
||||
int debug;
|
||||
};
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
#define VERSION "0.1.25-master-branch"
|
||||
|
||||
#define BUILD 851
|
||||
#define BUILD 852
|
||||
|
||||
#define HOSTID "mailarchiver"
|
||||
|
||||
|
@ -604,7 +604,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
||||
|
||||
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
|
||||
|
||||
if(state->is_header == 0 && strncmp(puf, "__URL__", 7) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue;
|
||||
if(state->is_header == 0 && strncmp(puf, "__URL__", 7) && (puf[0] == ' ' || (strlen(puf) > MAX_WORD_LEN && cfg->enable_cjk == 0) || isHexNumber(puf)) ) continue;
|
||||
|
||||
|
||||
len = strlen(puf);
|
||||
|
Loading…
Reference in New Issue
Block a user