mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-12-25 08:10:12 +01:00
tweaked the parser to support cjk languages
This commit is contained in:
parent
c0301ceaca
commit
1e44042b82
@ -103,6 +103,13 @@ archive_only_mydomains=0
|
|||||||
; minimum word length in mail body to index
|
; minimum word length in mail body to index
|
||||||
min_word_len=1
|
min_word_len=1
|
||||||
|
|
||||||
|
; whether to enable CJK (=Chinese, Japanese, and Korean) "characters".
|
||||||
|
; the text piler can see with CJK languages may have extremely long
|
||||||
|
; sequences without any whitespace. To prevent the parser to drop
|
||||||
|
; these very long sequences, enable (1) this feature. By default it's
|
||||||
|
; disabled (0).
|
||||||
|
enable_cjk=0
|
||||||
|
|
||||||
; if piler detects this line in the mail header, then it will assume
|
; if piler detects this line in the mail header, then it will assume
|
||||||
; the message is a spam. You should include your own antispam solution's
|
; the message is a spam. You should include your own antispam solution's
|
||||||
; specific line.
|
; specific line.
|
||||||
|
@ -68,6 +68,7 @@ struct _parse_rule config_parse_rules[] =
|
|||||||
{ "clamd_socket", "string", (void*) string_parser, offsetof(struct __config, clamd_socket), CLAMD_SOCKET, MAXVAL-1},
|
{ "clamd_socket", "string", (void*) string_parser, offsetof(struct __config, clamd_socket), CLAMD_SOCKET, MAXVAL-1},
|
||||||
{ "debug", "integer", (void*) int_parser, offsetof(struct __config, debug), "0", sizeof(int)},
|
{ "debug", "integer", (void*) int_parser, offsetof(struct __config, debug), "0", sizeof(int)},
|
||||||
{ "default_retention_days", "integer", (void*) int_parser, offsetof(struct __config, default_retention_days), "2557", sizeof(int)},
|
{ "default_retention_days", "integer", (void*) int_parser, offsetof(struct __config, default_retention_days), "2557", sizeof(int)},
|
||||||
|
{ "enable_cjk", "integer", (void*) int_parser, offsetof(struct __config, enable_cjk), "0", sizeof(int)},
|
||||||
{ "encrypt_messages", "integer", (void*) int_parser, offsetof(struct __config, encrypt_messages), "1", sizeof(int)},
|
{ "encrypt_messages", "integer", (void*) int_parser, offsetof(struct __config, encrypt_messages), "1", sizeof(int)},
|
||||||
{ "extra_to_field", "string", (void*) string_parser, offsetof(struct __config, extra_to_field), "", MAXVAL-1},
|
{ "extra_to_field", "string", (void*) string_parser, offsetof(struct __config, extra_to_field), "", MAXVAL-1},
|
||||||
{ "hostid", "string", (void*) string_parser, offsetof(struct __config, hostid), HOSTID, MAXVAL-1},
|
{ "hostid", "string", (void*) string_parser, offsetof(struct __config, hostid), HOSTID, MAXVAL-1},
|
||||||
|
@ -84,6 +84,8 @@ struct __config {
|
|||||||
|
|
||||||
int tweak_sent_time_offset;
|
int tweak_sent_time_offset;
|
||||||
|
|
||||||
|
int enable_cjk;
|
||||||
|
|
||||||
int debug;
|
int debug;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
#define VERSION "0.1.25-master-branch"
|
#define VERSION "0.1.25-master-branch"
|
||||||
|
|
||||||
#define BUILD 851
|
#define BUILD 852
|
||||||
|
|
||||||
#define HOSTID "mailarchiver"
|
#define HOSTID "mailarchiver"
|
||||||
|
|
||||||
|
@ -604,7 +604,7 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
|||||||
|
|
||||||
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
|
if(strncasecmp(puf, "http://", 7) == 0 || strncasecmp(puf, "https://", 8) == 0) fixURL(puf);
|
||||||
|
|
||||||
if(state->is_header == 0 && strncmp(puf, "__URL__", 7) && (puf[0] == ' ' || strlen(puf) > MAX_WORD_LEN || isHexNumber(puf)) ) continue;
|
if(state->is_header == 0 && strncmp(puf, "__URL__", 7) && (puf[0] == ' ' || (strlen(puf) > MAX_WORD_LEN && cfg->enable_cjk == 0) || isHexNumber(puf)) ) continue;
|
||||||
|
|
||||||
|
|
||||||
len = strlen(puf);
|
len = strlen(puf);
|
||||||
|
Loading…
Reference in New Issue
Block a user