Exclude 42+ email addresses from sphinx query

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2020-09-17 22:44:41 +02:00
parent 9833992b16
commit 6ce7efd1ed
2 changed files with 11 additions and 0 deletions

View File

@ -239,6 +239,7 @@ $config['SPHINX_ATTACHMENT_INDEX'] = 'att1';
$config['SPHINX_TAG_INDEX'] = 'tag1'; $config['SPHINX_TAG_INDEX'] = 'tag1';
$config['SPHINX_NOTE_INDEX'] = 'note1'; $config['SPHINX_NOTE_INDEX'] = 'note1';
$config['SPHINX_STRICT_SCHEMA'] = 1; $config['SPHINX_STRICT_SCHEMA'] = 1;
$config['MAX_EMAIL_LEN'] = 41;
$config['RELOAD_COMMAND'] = 'sudo -n /etc/init.d/rc.piler reload'; $config['RELOAD_COMMAND'] = 'sudo -n /etc/init.d/rc.piler reload';
$config['PILERIMPORT_IMAP_COMMAND'] = '/usr/local/bin/pilerimport -d /var/piler/imap -q -r'; $config['PILERIMPORT_IMAP_COMMAND'] = '/usr/local/bin/pilerimport -d /var/piler/imap -q -r';

View File

@ -142,6 +142,16 @@ function checkemail($email, $domains) {
function validemail($email = '') { function validemail($email = '') {
if($email == '') { return 0; } if($email == '') { return 0; }
// sphinxsearch supports tokens up to 41 characters long
// If there's a longer token in the query, then sphinx
// reports a query error even if the query is itself correct
// So the workaround is to get rid of these email addresses
if(strlen($email) > MAX_EMAIL_LEN) {
$msg = sprintf("discarding email %s: longer than %d", $email, MAX_EMAIL_LEN);
syslog(LOG_INFO, $msg);
return 0;
}
if(preg_match("/@local$/", $email)) { return 1; } if(preg_match("/@local$/", $email)) { return 1; }
if(preg_match('/^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*(\.[a-zA-Z]{2,10})$/', $email)) { if(preg_match('/^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*(\.[a-zA-Z]{2,10})$/', $email)) {