mirror of
https://bitbucket.org/jsuto/piler.git
synced 2024-11-07 22:41:59 +01:00
introduced a simplified mime decoder class
Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
parent
445e6e96bd
commit
698cc86548
@ -1,7 +1,7 @@
|
||||
<?php
|
||||
|
||||
require 'Zend/Mime/Decode.php';
|
||||
require 'Zend/Exception.php';
|
||||
require DIR_SYSTEM . 'helper/mime.php';
|
||||
|
||||
|
||||
class ModelSearchMessage extends Model {
|
||||
|
||||
@ -12,24 +12,6 @@ class ModelSearchMessage extends Model {
|
||||
public $message;
|
||||
private $verification = 0;
|
||||
|
||||
public function get_boundary($line='') {
|
||||
$parts = explode(";", $line);
|
||||
|
||||
for($i=0; $i<count($parts); $i++) {
|
||||
if(stristr($parts[$i], "boundary")) {
|
||||
$parts[$i] = preg_replace("/boundary\s{0,}=\s{0,}/i", "boundary=", $parts[$i]);
|
||||
$parts[$i] = preg_replace("/\"\;{0,1}/", "", $parts[$i]);
|
||||
$parts[$i] = preg_replace("/\'/", "", $parts[$i]);
|
||||
|
||||
$b = explode("boundary=", $parts[$i]);
|
||||
|
||||
return rtrim($b[count($b)-1]);
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
public function verify_message($piler_id = '', $data = '') {
|
||||
if($piler_id == '') { return 0; }
|
||||
@ -122,11 +104,11 @@ class ModelSearchMessage extends Model {
|
||||
|
||||
$msg = $this->get_raw_message($id);
|
||||
|
||||
Zend_Mime_Decode::splitMessageRaw($msg, $headers, $body);
|
||||
Piler_Mime_Decode::splitMessageRaw($msg, $headers, $body);
|
||||
|
||||
$has_journal = $this->remove_journal($headers);
|
||||
|
||||
$headers = $this->escape_lt_gt_symbols($headers);
|
||||
$headers = Piler_Mime_Decode::escape_lt_gt_symbols($headers);
|
||||
|
||||
return array('headers' => $headers, 'has_journal' => $has_journal);
|
||||
}
|
||||
@ -170,7 +152,7 @@ class ModelSearchMessage extends Model {
|
||||
|
||||
$p = '';
|
||||
|
||||
$data = $this->escape_lt_gt_symbols($data);
|
||||
$data = Piler_Mime_Decode::escape_lt_gt_symbols($data);
|
||||
}
|
||||
|
||||
return $data;
|
||||
@ -228,53 +210,22 @@ class ModelSearchMessage extends Model {
|
||||
|
||||
$has_journal = $this->remove_journal($msg);
|
||||
|
||||
Zend_Mime_Decode::splitMessage($msg, $headers, $body);
|
||||
$boundary = $this->get_boundary($headers['content-type']);
|
||||
Piler_Mime_Decode::splitMessage($msg, $headers, $body);
|
||||
|
||||
if(is_array($headers['from'])) { $headers['from'] = $headers['from'][0]; }
|
||||
if(is_array($headers['to'])) { $headers['to'] = $headers['to'][0]; }
|
||||
if(is_array($headers['cc'])) { $headers['cc'] = $headers['cc'][0]; }
|
||||
if(is_array($headers['subject'])) { $headers['subject'] = $headers['subject'][0]; }
|
||||
if(is_array($headers['date'])) { $headers['date'] = $headers['date'][0]; }
|
||||
|
||||
if(isset($headers['from'])) $from .= $this->escape_lt_gt_symbols($headers['from']);
|
||||
if(isset($headers['to'])) $to .= $this->escape_lt_gt_symbols($headers['to']);
|
||||
if(isset($headers['cc'])) $cc .= $this->escape_lt_gt_symbols($headers['cc']);
|
||||
if(isset($headers['subject'])) $subject .= $this->escape_lt_gt_symbols($headers['subject']);
|
||||
if(isset($headers['date'])) $date .= $headers['date'];
|
||||
|
||||
$this->message = array(
|
||||
'text/plain' => '',
|
||||
'text/html' => ''
|
||||
);
|
||||
|
||||
$this->extract_textuals_from_mime_parts($headers, $body, $boundary);
|
||||
|
||||
return array('from' => $from,
|
||||
'to' => $to,
|
||||
'cc' => $cc,
|
||||
'subject' => $this->highlight_search_terms($subject, $terms),
|
||||
'date' => $date,
|
||||
'message' => $this->message['text/html'] ? $this->message['text/html'] : $this->message['text/plain'],
|
||||
'has_journal' => $has_journal,
|
||||
'verification' => $this->verification
|
||||
);
|
||||
for($i=0; $i<count(Piler_Mime_Decode::HEADER_FIELDS); $i++) {
|
||||
if(isset($headers[Piler_Mime_Decode::HEADER_FIELDS[$i]]) && is_array($headers[Piler_Mime_Decode::HEADER_FIELDS[$i]])) {
|
||||
$headers[Piler_Mime_Decode::HEADER_FIELDS[$i]] = $headers[Piler_Mime_Decode::HEADER_FIELDS[$i][0]];
|
||||
}
|
||||
|
||||
|
||||
private function extract_textuals_from_mime_parts($headers = array(), $body = '', $boundary = '') {
|
||||
$mime_parts = array();
|
||||
|
||||
if($boundary) {
|
||||
try {
|
||||
$mime_parts = Zend_Mime_Decode::splitMessageStruct($body, $boundary);
|
||||
}
|
||||
catch (Exception $e) {
|
||||
syslog(LOG_INFO, "Caught exception: " . $e->getMessage());
|
||||
}
|
||||
if(Piler_Mime_Decode::HEADER_FIELDS[$i] == 'date') {
|
||||
${Piler_Mime_Decode::HEADER_FIELDS[$i]} .= $headers[Piler_Mime_Decode::HEADER_FIELDS[$i]];
|
||||
} else {
|
||||
$mime_parts[] = array('header' => $headers, 'body' => $body);
|
||||
${Piler_Mime_Decode::HEADER_FIELDS[$i]} .= Piler_Mime_Decode::escape_lt_gt_symbols($headers[Piler_Mime_Decode::HEADER_FIELDS[$i]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Piler_Mime_Decode::parseMessage($msg, $parts);
|
||||
|
||||
require_once DIR_SYSTEM . 'helper/HTMLPurifier.standalone.php';
|
||||
|
||||
@ -285,69 +236,33 @@ class ModelSearchMessage extends Model {
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
for($i=0; $i<count($mime_parts); $i++) {
|
||||
$mime = array(
|
||||
'content-type' => '',
|
||||
'encoding' => ''
|
||||
$this->message = array(
|
||||
'text/plain' => '',
|
||||
'text/html' => ''
|
||||
);
|
||||
|
||||
if(isset($mime_parts[$i]['header']['content-type'])) {
|
||||
$mime['content-type'] = Zend_Mime_Decode::splitContentType($mime_parts[$i]['header']['content-type']);
|
||||
for($i=0; $i<count($parts); $i++) {
|
||||
|
||||
$body = Piler_Mime_Decode::fixMimeBodyPart($parts[$i]['headers'], $parts[$i]['body']);
|
||||
|
||||
if($parts[$i]['headers']['content-type']['type'] == 'text/html') {
|
||||
$this->message['text/html'] = $purifier->purify($body);
|
||||
}
|
||||
/*
|
||||
Fix the mime type for some emails having a single textual body part
|
||||
without the Content-type header.
|
||||
*/
|
||||
else if (count($mime_parts) == 1) {
|
||||
$mime['content-type']['type'] = 'text/plain';
|
||||
else {
|
||||
$this->message['text/plain'] = $body;
|
||||
}
|
||||
|
||||
$mime['content-type']['type'] = strtolower($mime['content-type']['type']);
|
||||
|
||||
if(in_array($mime['content-type']['type'], array('multipart/mixed', 'multipart/related', 'multipart/alternative')))
|
||||
$this->extract_textuals_from_mime_parts($mime_parts[$i]['header'], $mime_parts[$i]['body'], $mime['content-type']['boundary']);
|
||||
|
||||
if(isset($mime_parts[$i]['header']['content-transfer-encoding']))
|
||||
$mime['encoding'] = $mime_parts[$i]['header']['content-transfer-encoding'];
|
||||
|
||||
if(in_array($mime['content-type']['type'], array('text/plain', 'text/html')))
|
||||
$this->message[$mime['content-type']['type']] .= $this->fix_mime_body_part($purifier, $mime, $mime_parts[$i]['body']);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private function fix_mime_body_part($purifier, $mime = array(), $body = '') {
|
||||
if($mime['encoding'] == 'quoted-printable')
|
||||
$body = Zend_Mime_Decode::decodeQuotedPrintable($body);
|
||||
|
||||
if($mime['encoding'] == 'base64')
|
||||
$body = base64_decode($body);
|
||||
|
||||
if(strtolower($mime['content-type']['charset']) != 'utf-8')
|
||||
$body = iconv($mime['content-type']['charset'], 'utf-8' . '//IGNORE', $body);
|
||||
|
||||
|
||||
if(strtolower($mime['content-type']['type']) == 'text/plain') {
|
||||
|
||||
$body = $this->escape_lt_gt_symbols($body);
|
||||
|
||||
$body = preg_replace("/\n/", "<br />\n", $body);
|
||||
$body = "\n" . $this->print_nicely($body);
|
||||
}
|
||||
|
||||
if(strtolower($mime['content-type']['type']) == 'text/html') {
|
||||
$body = $purifier->purify($body);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
|
||||
|
||||
private function escape_lt_gt_symbols($s = '') {
|
||||
$s = preg_replace("/</", "<", $s);
|
||||
$s = preg_replace("/>/", ">", $s);
|
||||
|
||||
return $s;
|
||||
return array('from' => $from,
|
||||
'to' => $to,
|
||||
'cc' => $cc,
|
||||
'subject' => $this->highlight_search_terms($subject, $terms),
|
||||
'date' => $date,
|
||||
'message' => $this->message['text/html'] ? $this->message['text/html'] : $this->message['text/plain'],
|
||||
'has_journal' => $has_journal,
|
||||
'verification' => $this->verification
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@ -405,25 +320,6 @@ class ModelSearchMessage extends Model {
|
||||
}
|
||||
|
||||
|
||||
private function print_nicely($chunk) {
|
||||
$k = 0;
|
||||
$nice_chunk = "";
|
||||
|
||||
$x = explode(" ", $chunk);
|
||||
|
||||
for($i=0; $i<count($x); $i++){
|
||||
$nice_chunk .= $x[$i] . " ";
|
||||
$k += strlen($x[$i]);
|
||||
|
||||
if(strstr($x[$i], "\n")){ $k = 0; }
|
||||
|
||||
if($k > 70){ $nice_chunk .= "\n"; $k = 0; }
|
||||
}
|
||||
|
||||
return $nice_chunk;
|
||||
}
|
||||
|
||||
|
||||
public function NiceSize($size) {
|
||||
if($size < 1000) return "1k";
|
||||
if($size < 100000) return round($size/1000) . "k";
|
||||
|
303
webui/system/helper/mime.php
Normal file
303
webui/system/helper/mime.php
Normal file
@ -0,0 +1,303 @@
|
||||
<?php
|
||||
|
||||
|
||||
class Piler_Mime_Decode {
|
||||
const HEADER_FIELDS = ['from', 'to', 'cc', 'subject', 'date'];
|
||||
|
||||
|
||||
public static function parseMessage($message, &$result) {
|
||||
|
||||
self::splitMessage($message, $headers, $body);
|
||||
|
||||
$boundary = self::getBoundary($headers);
|
||||
|
||||
// No boundary defined
|
||||
|
||||
if($boundary == '') {
|
||||
if($headers['content-type']['type'] == "message/rfc822") {
|
||||
self::parseMessage($body, $result);
|
||||
}
|
||||
else {
|
||||
$result[] = array(
|
||||
'headers' => $headers,
|
||||
'body' => $body
|
||||
);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$parts = self::splitMime($body, $boundary);
|
||||
|
||||
for($i=0; $i<count($parts); $i++) {
|
||||
|
||||
self::splitMessage($parts[$i], $headers, $body);
|
||||
|
||||
$boundary = self::getBoundary($headers);
|
||||
if($boundary) {
|
||||
self::parseMessage($parts[$i], $result);
|
||||
}
|
||||
else {
|
||||
if(in_array($headers['content-type']['type'], ["text/plain", "text/html"])) {
|
||||
$result[] = array('headers' => $headers, 'body' => $body);
|
||||
}
|
||||
else if($headers['content-type']['type'] == "message/rfc822") {
|
||||
self::parseMessage($body, $result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static function splitMime($body, $boundary) {
|
||||
$start = 0;
|
||||
$res = array();
|
||||
|
||||
$body = self::remove_LF($body);
|
||||
|
||||
// Extract the mime parts excluding the boundary itself
|
||||
|
||||
$p = strpos($body, '--' . $boundary . "\n", $start);
|
||||
if($p === false) {
|
||||
// no parts found!
|
||||
return array();
|
||||
}
|
||||
|
||||
// Position after first boundary line
|
||||
|
||||
$start = $p + 3 + strlen($boundary);
|
||||
|
||||
while(($p = strpos($body, '--' . $boundary . "\n", $start)) !== false) {
|
||||
$res[] = substr($body, $start, $p-$start);
|
||||
$start = $p + 3 + strlen($boundary);
|
||||
}
|
||||
|
||||
// No more parts, find end boundary
|
||||
|
||||
$p = strpos($body, '--' . $boundary . '--', $start);
|
||||
if($p === false) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// The remaining part also needs to be parsed:
|
||||
$res[] = substr($body, $start, $p - $start);
|
||||
|
||||
return $res;
|
||||
}
|
||||
|
||||
|
||||
public static function splitMessage($message, &$headers, &$body, $EOL = "\n") {
|
||||
self::splitMessageRaw($message, $headers, $body);
|
||||
$headers = self::splitHeaders($headers);
|
||||
}
|
||||
|
||||
|
||||
public static function splitMessageRaw($message, &$headers, &$body, $EOL = "\n") {
|
||||
$headers = [];
|
||||
$body = '';
|
||||
|
||||
$message = self::remove_LF($message);
|
||||
|
||||
// Find an empty line between headers and body, otherwise we got a header-only message
|
||||
|
||||
if(strpos($message, $EOL . $EOL)) {
|
||||
list($headers, $body) = explode($EOL . $EOL, $message, 2);
|
||||
}
|
||||
else {
|
||||
$headers = $message;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static function splitHeaders($headers) {
|
||||
$headers = self::headersToArray($headers);
|
||||
|
||||
// normalize header names
|
||||
foreach ($headers as $name => $header) {
|
||||
$lower = strtolower($name);
|
||||
if($lower == $name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
unset($headers[$name]);
|
||||
|
||||
if(!isset($headers[$lower])) {
|
||||
$headers[$lower] = $header;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(is_array($headers[$lower])) {
|
||||
$headers[$lower][] = $header;
|
||||
continue;
|
||||
}
|
||||
|
||||
$headers[$lower] = array($headers[$lower], $header);
|
||||
}
|
||||
|
||||
// Add some default values, if they are missing
|
||||
|
||||
if(!isset($headers['content-type'])) { $headers['content-type'] = 'text/plain'; }
|
||||
|
||||
for($i=0; $i<count(self::HEADER_FIELDS); $i++) {
|
||||
if(!isset($headers[self::HEADER_FIELDS[$i]])) { $headers[self::HEADER_FIELDS[$i]] = ''; }
|
||||
|
||||
$headers[self::HEADER_FIELDS[$i]] = iconv_mime_decode($headers[self::HEADER_FIELDS[$i]], ICONV_MIME_DECODE_CONTINUE_ON_ERROR);
|
||||
}
|
||||
|
||||
$headers['content-type'] = self::splitContentType($headers['content-type']);
|
||||
|
||||
$headers['content-type']['type'] = strtolower($headers['content-type']['type']);
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
|
||||
public static function headersToArray($headers = '') {
|
||||
$token = '';
|
||||
$last_token = '';
|
||||
$result = array();
|
||||
|
||||
$headers = explode("\n", $headers);
|
||||
|
||||
foreach($headers as $h) {
|
||||
|
||||
// Handle cases when there's no whitespace between the header key and value
|
||||
// eg. Subject:som
|
||||
|
||||
$h = preg_replace("/^([\S]+):(\S)/", '${1}: ${2}', $h);
|
||||
$h = preg_replace("/\s{1,}/", " ", $h);
|
||||
|
||||
$line = preg_split("/\s/", $h);
|
||||
|
||||
// Skip line if it doesn't have a colon (:) and the 1st character is not a whitespace
|
||||
|
||||
if(!ctype_space($h[0]) && !strchr($h, ':')) { continue; }
|
||||
|
||||
if($line) {
|
||||
if(substr($line[0], -1) == ':') {
|
||||
$token = array_shift($line);
|
||||
$token = rtrim($token, ':');
|
||||
|
||||
$last_token = $token;
|
||||
}
|
||||
else {
|
||||
$token = '';
|
||||
}
|
||||
|
||||
$line_str = implode(" ", $line);
|
||||
|
||||
if(!isset($result[$last_token])) {
|
||||
$result[$last_token] = $line_str;
|
||||
}
|
||||
else {
|
||||
if($token) {
|
||||
$result[$last_token] .= "\n";
|
||||
}
|
||||
|
||||
$result[$last_token] .= ' ' . $line_str;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
while(list($k, $v) = each($result)) {
|
||||
if(strchr($v, "\n")) {
|
||||
$result[$k] = explode("\n", $v);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
public static function splitContentType($field = '') {
|
||||
$split = array();
|
||||
$what = 'type';
|
||||
|
||||
$field = $what . '=' . $field;
|
||||
if(!preg_match_all('%([^=\s]+)\s*=\s*("[^"]+"|[^;]+)(;\s*|$)%', $field, $matches)) {
|
||||
return $split;
|
||||
}
|
||||
|
||||
$split = array();
|
||||
foreach ($matches[1] as $key => $name) {
|
||||
$name = strtolower($name);
|
||||
if($matches[2][$key][0] == '"') {
|
||||
$split[$name] = substr($matches[2][$key], 1, -1);
|
||||
} else {
|
||||
$split[$name] = $matches[2][$key];
|
||||
}
|
||||
}
|
||||
|
||||
return $split;
|
||||
}
|
||||
|
||||
|
||||
public static function remove_LF($message = '') {
|
||||
return str_replace("\r", "", $message);
|
||||
//return preg_replace("/\r/", "", $message);
|
||||
}
|
||||
|
||||
|
||||
public static function getBoundary($headers = array()) {
|
||||
if(isset($headers['content-type']['boundary'])) {
|
||||
return $headers['content-type']['boundary'];
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
|
||||
public static function fixMimeBodyPart($headers = array(), $body = '') {
|
||||
|
||||
if(isset($headers['content-transfer-encoding'])) {
|
||||
if($headers['content-transfer-encoding'] == 'quoted-printable') {
|
||||
$body = quoted_printable_decode($body);
|
||||
}
|
||||
|
||||
if($headers['content-transfer-encoding'] == 'base64') {
|
||||
$body = base64_decode($body);
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($headers['content-type']['charset'])) {
|
||||
$body = iconv($headers['content-type']['charset'], 'utf-8' . '//IGNORE', $body);
|
||||
}
|
||||
|
||||
if(strtolower($headers['content-type']['type']) == 'text/plain') {
|
||||
$body = self::escape_lt_gt_symbols($body);
|
||||
$body = preg_replace("/\n/", "<br />\n", $body);
|
||||
$body = "\n" . self::printNicely($body);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
|
||||
|
||||
public static function escape_lt_gt_symbols($s = '') {
|
||||
$s = preg_replace("/</", "<", $s);
|
||||
$s = preg_replace("/>/", ">", $s);
|
||||
|
||||
return $s;
|
||||
}
|
||||
|
||||
|
||||
public static function printNicely($s = '') {
|
||||
$k = 0;
|
||||
$nice = "";
|
||||
|
||||
$x = explode(" ", $s);
|
||||
|
||||
for($i=0; $i<count($x); $i++){
|
||||
$nice .= $x[$i] . " ";
|
||||
$k += strlen($x[$i]);
|
||||
|
||||
if(strstr($x[$i], "\n")){ $k = 0; }
|
||||
|
||||
if($k > 70){ $nice .= "\n"; $k = 0; }
|
||||
}
|
||||
|
||||
return $nice;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user