Merged jsuto/piler into master

This commit is contained in:
Alexander Noack 2021-04-12 12:09:12 +02:00
commit 85e65547c2
9 changed files with 275 additions and 31 deletions

View File

@ -472,6 +472,8 @@ define('HEALTH_WORKER_URL', SITE_URL . 'index.php?route=health/worker');
define('LDAP_TYPE_GENERIC', 'generic_ldap');
define('ATTACHMENT_DUMP_CHECKPOINT', 'attachment_dump_checkpoint');
define('ACTION_ALL', 0);
define('ACTION_UNKNOWN', 1);
define('ACTION_LOGIN', 2);

View File

@ -0,0 +1,102 @@
<?php
define('LOCK_FILE', '/var/piler/tmp/export-attachments.lock');
$webuidir = "";
$outdir = '';
$opts = 'hw:d:';
$lopts = [
'webui:',
'dir:',
'help'
];
if($options = getopt($opts, $lopts)) {
if(isset($options['webui'])) { $webuidir = $options['webui']; }
if(isset($options['dir'])) { $outdir = $options['dir']; }
if(isset($options['help'])) { usage(); }
}
if($webuidir == '' || $outdir == '') { usage(); }
require_once("$webuidir/config.php");
require(DIR_SYSTEM . "/startup.php");
$request = new Request();
Registry::set("request", $request);
$start = NULL;
$loader = new Loader();
Registry::set('load', $loader);
$loader->load->model('domain/domain');
$loader->load->model('search/search');
$loader->load->model('search/message');
$loader->load->model('message/attachment');
$db = new DB(DB_DRIVER, DB_HOSTNAME, DB_USERNAME, DB_PASSWORD, DB_DATABASE, DB_PREFIX);
Registry::set('db', $db);
Registry::set('auditor_user', 1);
openlog("export-attachments", LOG_PID, LOG_MAIL);
$fp = fopen(LOCK_FILE, "w");
if(!flock($fp, LOCK_EX)) {
syslog(LOG_INFO, "WARN: couldn't get a lock on " . LOCK_FILE);
exit;
}
$domain = new ModelDomainDomain();
$attachment = new ModelMessageAttachment();
$message = new ModelSearchMessage();
$domains = $domain->get_mapped_domains();
$last_id = $attachment->get_last_attachment_id();
$start_id = $attachment->get_checkpoint();
syslog(LOG_INFO, "start: $start, limit: $limit");
for($i=$start_id; $i<$last_id; $i++) {
$a = $attachment->get_attachment_by_id($i);
$m = $message->get_message_addresses_by_piler_id($a['piler_id'], $domains);
$attachment->dump_attachment($outdir, "out", $m['sender'], $i, $a);
foreach($m['rcpt'] as $rcpt) {
$attachment->dump_attachment($outdir, "in", $rcpt, $i, $a);
}
if($i % 100 == 0) { $attachment->update_checkpoint($i); }
}
$attachment->update_checkpoint($i);
// Release lock
flock($fp, LOCK_UN);
fclose($fp);
function usage() {
print "\nUsage: " . __FILE__ . "\n\n";
print "\t--webui <path to webui directory>\n";
print "\t--dir <basedir to write attachments>\n";
print "\t--help\n\n";
exit;
}

View File

@ -33,6 +33,10 @@ int max_matches = 1000;
char *index_list = "main1,dailydelta1,delta1";
regex_t regexp;
char *zipfile = NULL;
struct zip *z = NULL;
uint64 *zip_ids = NULL;
int zip_counter = 0;
int zip_batch = 2000;
int export_emails_matching_to_query(struct session_data *sdata, char *s, struct config *cfg);
@ -54,6 +58,7 @@ void usage(){
printf(" -i <index list> Sphinx indices to use (default: %s)\n", index_list);
#if LIBZIP_VERSION_MAJOR >= 1
printf(" -z <zip file> Write exported EML files to a zip file\n");
printf(" -Z <batch size> Zip batch size. Valid range: 10-10000, default: 2000\n");
#endif
printf(" -A Export all emails from archive\n");
printf(" -o Export emails to stdout\n");
@ -170,6 +175,8 @@ uint64 run_query(struct session_data *sdata, struct session_data *sdata2, char *
snprintf(s, sizeof(s)-1, "SELECT id FROM %s WHERE %s AND id > %llu ORDER BY id ASC LIMIT 0,%d", index_list, where_condition, last_id, max_matches);
syslog(LOG_PRIORITY, "sphinx query: %s", s);
if(mysql_real_query(&(sdata2->mysql), s, strlen(s)) == 0){
MYSQL_RES *res = mysql_store_result(&(sdata2->mysql));
if(res != NULL){
@ -320,26 +327,24 @@ int build_query_from_args(char *from, char *to, char *fromdomain, char *todomain
}
#if LIBZIP_VERSION_MAJOR >= 1
int write_to_zip_file(char *filename){
struct zip *z=NULL;
int errorp, ret=ERR;
z = zip_open(zipfile, ZIP_CREATE, &errorp);
if(!z){
printf("error: error creating zip file=%s, error code=%d\n", zipfile, errorp);
return ret;
}
zip_source_t *zs = zip_source_file(z, filename, 0, 0);
if(zs && zip_file_add(z, filename, zs, ZIP_FL_ENC_UTF_8) >= 0){
ret = OK;
} else {
printf("error adding file %s: %s\n", filename, zip_strerror(z));
}
void zip_flush(){
zip_close(z);
return ret;
z = NULL;
zip_counter = 0;
if(!zip_ids) return;
for(int i=0; i<zip_batch; i++){
if(*(zip_ids+i)){
char filename[SMALLBUFSIZE];
snprintf(filename, sizeof(filename)-1, "%llu.eml", *(zip_ids+i));
unlink(filename);
}
}
free(zip_ids);
zip_ids = NULL;
}
#endif
@ -349,6 +354,7 @@ int export_emails_matching_to_query(struct session_data *sdata, char *s, struct
char digest[SMALLBUFSIZE], bodydigest[SMALLBUFSIZE];
char filename[SMALLBUFSIZE];
struct sql sql;
int errorp;
if(prepare_sql_statement(sdata, &sql, s) == ERR) return ERR;
@ -401,12 +407,38 @@ int export_emails_matching_to_query(struct session_data *sdata, char *s, struct
verification_status = 1;
}
#if LIBZIP_VERSION_MAJOR >= 1
if(zipfile && write_to_zip_file(filename) == OK){
unlink(filename);
}
#endif
if(zipfile){
#if LIBZIP_VERSION_MAJOR >= 1
// Open zip file if handler is NULL
if(!z){
z = zip_open(zipfile, ZIP_CREATE, &errorp);
if(!z){
printf("error: error creating zip file=%s, error code=%d\n", zipfile, errorp);
return ERR;
}
}
if(!zip_ids) zip_ids = (uint64*) calloc(sizeof(uint64), zip_batch);
if(!zip_ids){
printf("calloc error for zip_ids\n");
return ERR;
}
zip_source_t *zs = zip_source_file(z, filename, 0, 0);
if(zs && zip_file_add(z, filename, zs, ZIP_FL_ENC_UTF_8) >= 0){
*(zip_ids+zip_counter) = id;
zip_counter++;
} else {
printf("error adding file %s: %s\n", filename, zip_strerror(z));
return ERR;
}
if(zip_counter == zip_batch){
zip_flush();
}
#endif
}
}
else printf("cannot open: %s\n", filename);
}
@ -465,6 +497,7 @@ int main(int argc, char **argv){
{"start-date", required_argument, 0, 'a' },
{"stop-date", required_argument, 0, 'b' },
{"zip", required_argument, 0, 'z' },
{"zip-batch", required_argument, 0, 'Z' },
{"where-condition", required_argument, 0, 'w' },
{"max-matches", required_argument, 0, 'm' },
{"index-list", required_argument, 0, 'i' },
@ -473,9 +506,9 @@ int main(int argc, char **argv){
int option_index = 0;
int c = getopt_long(argc, argv, "c:s:S:f:r:F:R:a:b:w:m:i:z:oAdhv?", long_options, &option_index);
int c = getopt_long(argc, argv, "c:s:S:f:r:F:R:a:b:w:m:i:z:Z:oAdhv?", long_options, &option_index);
#else
int c = getopt(argc, argv, "c:s:S:f:r:F:R:a:b:w:m:i:z:oAdhv?");
int c = getopt(argc, argv, "c:s:S:f:r:F:R:a:b:w:m:i:z:Z:oAdhv?");
#endif
if(c == -1) break;
@ -567,6 +600,10 @@ int main(int argc, char **argv){
case 'z': zipfile = optarg;
break;
case 'Z': zip_batch = atoi(optarg);
if(zip_batch < 10 || zip_batch > 10000)
zip_batch = 2000;
break;
case 'o':
export_to_stdout = 1;
@ -632,5 +669,11 @@ int main(int argc, char **argv){
close_database(&sdata);
if(zipfile){
#if LIBZIP_VERSION_MAJOR >= 1
zip_flush();
#endif
}
return verification_status;
}

View File

@ -128,6 +128,9 @@ uint64 retrieve_email_by_metadata_id(struct session_data *sdata, struct data *da
rc = store_index_data(sdata, &state, data, stored_id, cfg);
unlink(sdata->tmpframe);
remove_stripped_attachments(&state);
if(rc == OK) reindexed++;
else printf("failed to add to %s table: %s\n", SQL_SPHINX_TABLE, filename);

View File

@ -18,10 +18,10 @@ case1() {
"$SMTP_SOURCE_PROG" -s $SMTP_HOST -r archive@cust1.acts.hu extra@addr.ess another@extra.addr -p 25 -t 20 --dir "$EML_DIR/virus" --socket --no-counter
wait_until_emails_are_processed "piler1" 3005
wait_until_emails_are_processed "piler1" 3007
docker exec "piler1" su piler -c /usr/libexec/piler/indexer.delta.sh 2>/dev/null
count_status_values 3005 2894 111 0
count_status_values 3007 2896 111 0
test_retrieved_messages_are_the_same "piler1" "piler"

View File

@ -35,3 +35,24 @@ RewriteRule ^view/javascript/piler.js /js.php [QSA,L]
</FilesMatch>
</IfModule>
<IfModule auth_gssapi_module>
# ktpass -princ HTTP/<webserver-fqdn>@<WINDOWS AD DOMAIN IN CAPITALS> \
# -mapuser <ldap helper user>@<WINDOWS AD DOMAIN IN CAPITALS> \
# -pass * \
# -crypto AES256-SHA1 \
# -ptype KRB5_NT_PRINCIPAL \
# -out /etc/krb5/http.keytab \
#
# setspn -s HTTP/<webserver-fqdn> <ldap helper user>
<FilesMatch "sso\.php$">
RewriteEngine on
RewriteCond %{HTTP:Authorization} !^$
RewriteRule .* - [E=HTTP_AUTHORIZATION:%{HTTP:Authorization},L]
AuthName "User with domain part (separated by @) in CAPITALS - e.g. 'user@DOMAIN'"
AuthType GSSAPI
GssapiBasicAuth On
GssapiCredStore keytab:/etc/krb5/http.keytab
Require valid-user
</FilesMatch>
</IfModule>

View File

@ -41,7 +41,7 @@ class ModelMailMail extends Model {
$l = fgets($r, 4096);
if(preg_match("/^250/", $l)){ $queue_id = $l; $ok = 1; }
if(preg_match("/^250/", $l)){ $queue_id = trim($l); $ok = 1; }
fputs($r, "QUIT\r\n");
$l = fgets($r, 4096);

View File

@ -2,11 +2,10 @@
class ModelMessageAttachment extends Model {
public function get_attachment_by_id($id = 0) {
if($id <= 0) { return []; }
$query = $this->db->query("SELECT id, piler_id, attachment_id, name, type FROM " . TABLE_ATTACHMENT . " WHERE id=?", [$id]);
$query = $this->db->query("SELECT id, piler_id, attachment_id, name, type, ptr FROM " . TABLE_ATTACHMENT . " WHERE id=?", [$id]);
if(isset($query->row)) {
if($query->row['ptr'] > 0) {
@ -77,4 +76,54 @@ class ModelMessageAttachment extends Model {
return $images;
}
public function dump_attachment($basedir='', $in_or_out="in", $email='', $id=0, $attachment=[]) {
if($basedir == '' || $email == '') {
return;
}
$dir = sprintf("%s/%s/%s", $basedir, $email, $in_or_out);
if(!is_dir($dir)) {
if(!mkdir($dir, 0700, true)) {
die("Failed to create folder $dir");
}
}
$fname = sprintf("%s/%d-%s", $dir, $id, $attachment['filename']);
$fp = fopen($fname, "w+");
if($fp) {
fwrite($fp, $attachment['attachment']);
fclose($fp);
} else {
syslog(LOG_INFO, "ERROR: could not write $fname");
}
}
public function get_last_attachment_id() {
$query = $this->db->query("SELECT id FROM " . TABLE_ATTACHMENT . " ORDER BY id DESC LIMIT 1");
if(isset($query->row['id'])) {
return $query->row['id'];
}
return 0;
}
public function get_checkpoint() {
$query = $this->db->query("SELECT value FROM `" . TABLE_OPTION . "` WHERE `key`=?", [ATTACHMENT_DUMP_CHECKPOINT]);
if(isset($query->row['value'])) {
return $query->row['value'];
} else {
$this->db->query("INSERT INTO `" . TABLE_OPTION . "` (`key`, value) VALUES(?,0)", [ATTACHMENT_DUMP_CHECKPOINT]);
return 1;
}
}
public function update_checkpoint($value=0) {
$this->db->query("UPDATE `" . TABLE_OPTION . "` SET value=? WHERE `key`=?", [$value, ATTACHMENT_DUMP_CHECKPOINT]);
}
}

View File

@ -296,6 +296,30 @@ class ModelSearchMessage extends Model {
}
public function get_message_addresses_by_piler_id($piler_id='', $domains=[]) {
$id = 0;
$sender = '';
$rcpt = [];
$query = $this->db->query("SELECT id, `from`, `fromdomain` FROM " . TABLE_META . " WHERE piler_id=?", [$piler_id]);
if(isset($query->row)) {
$id = $query->row['id'];
if(in_array($query->row['fromdomain'], $domains)) {
$sender = $query->row['from'];
}
}
$query = $this->db->query("SELECT `to`, `todomain` FROM " . TABLE_RCPT . " WHERE id=?", [$id]);
foreach($query->rows as $row) {
if(in_array($row['todomain'], $domains)) {
$rcpt[] = $row['to'];
}
}
return ['sender' => $sender, 'rcpt' => $rcpt];
}
public function get_attachment_by_id($id = 0) {
if($id <= 0) { return array(); }
@ -459,7 +483,7 @@ class ModelSearchMessage extends Model {
foreach ($ids as $id) {
$query = $this->db->query("INSERT INTO " . TABLE_TAG . " (id, uid, tag) VALUES(?,?,?)", array($id, $uid, $tag));
}
}
}
}