Refactored mime helper

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2023-06-07 07:33:20 +02:00
parent 30a89d856f
commit 1ef327b4ab
4 changed files with 78 additions and 68 deletions

View File

@ -277,6 +277,7 @@ $config['JQUERY_DATE_FORMAT'] = 'yy-mm-dd';
$config['DECIMAL_SEPARATOR'] = "."; // See https://www.php.net/manual/en/function.number-format
$config['THOUSANDS_SEPARATOR'] = ","; // for the format options
$config['ENABLE_GB2312_FIX'] = 1;
$config['FROM_LENGTH_TO_SHOW'] = 28;

View File

@ -2,7 +2,7 @@
class Piler_Mime_Decode {
const HEADER_FIELDS = ['from', 'to', 'cc', 'subject', 'date'];
const HEADER_FIELDS = ['from', 'sender', 'to', 'cc', 'subject', 'date'];
public static function normalize_message($message) {
@ -24,10 +24,10 @@ class Piler_Mime_Decode {
self::parseMessage($body, $result);
}
else {
$result[] = array(
$result[] = [
'headers' => $headers,
'body' => $body
);
];
}
return;
@ -45,7 +45,7 @@ class Piler_Mime_Decode {
}
else {
if(in_array($headers['content-type']['type'], ["text/plain", "text/html"])) {
$result[] = array('headers' => $headers, 'body' => $body);
$result[] = ['headers' => $headers, 'body' => $body];
}
else if($headers['content-type']['type'] == "message/rfc822") {
self::parseMessage($body, $result);
@ -57,14 +57,14 @@ class Piler_Mime_Decode {
public static function splitMime($body, $boundary) {
$start = 0;
$res = array();
$res = [];
// Extract the mime parts excluding the boundary itself
$p = strpos($body, '--' . $boundary . EOL, $start);
if($p === false) {
// no parts found!
return array();
return [];
}
// Position after first boundary line
@ -80,7 +80,7 @@ class Piler_Mime_Decode {
$p = strpos($body, '--' . $boundary . '--', $start);
if($p === false) {
return array();
return [];
}
// The remaining part also needs to be parsed:
@ -100,6 +100,8 @@ class Piler_Mime_Decode {
$headers = [];
$body = '';
$message = self::normalize_message($message);
// Find an empty line between headers and body, otherwise we got a header-only message
if(strpos($message, EOL . EOL)) {
@ -180,7 +182,7 @@ class Piler_Mime_Decode {
continue;
}
$headers[$lower] = array($headers[$lower], $header);
$headers[$lower] = [$headers[$lower], $header];
}
// Add some default values, if they are missing
@ -196,9 +198,21 @@ class Piler_Mime_Decode {
for($i=0; $i<count(self::HEADER_FIELDS); $i++) {
if(!isset($headers[self::HEADER_FIELDS[$i]])) { $headers[self::HEADER_FIELDS[$i]] = ''; }
$headers[self::HEADER_FIELDS[$i]] = preg_replace("/gb2312/i", "GBK", $headers[self::HEADER_FIELDS[$i]]);
// If the mail header features the same field more than once, eg.
// Date: Wed, 23 Mar 2016 21:26:53 +0100
// Date: Wed, 23 Mar 2016 21:26:53 +0100
// then take the first occurance
$headers[self::HEADER_FIELDS[$i]] = iconv_mime_decode($headers[self::HEADER_FIELDS[$i]], ICONV_MIME_DECODE_CONTINUE_ON_ERROR);
$header = $headers[self::HEADER_FIELDS[$i]];
if(is_array($header)) {
$header = $header[0];
}
if(ENABLE_GB2312_FIX) {
$header = preg_replace("/gb2312/i", "GBK", $header);
}
$headers[self::HEADER_FIELDS[$i]] = iconv_mime_decode($header, ICONV_MIME_DECODE_CONTINUE_ON_ERROR);
}
$headers['content-type'] = self::splitContentType($headers['content-type']);
@ -212,7 +226,7 @@ class Piler_Mime_Decode {
public static function headersToArray($headers = '') {
$token = '';
$last_token = '';
$result = array();
$result = [];
$headers = explode(EOL, $headers);
@ -258,7 +272,6 @@ class Piler_Mime_Decode {
}
foreach($result as $k => $v) {
if(strchr($v, EOL)) {
$result[$k] = explode(EOL, $v);
}
@ -269,7 +282,7 @@ class Piler_Mime_Decode {
public static function splitContentType($field = '') {
$split = array();
$split = [];
$what = 'type';
$field = $what . '=' . $field;
@ -277,7 +290,7 @@ class Piler_Mime_Decode {
return $split;
}
$split = array();
$split = [];
foreach ($matches[1] as $key => $name) {
$name = strtolower($name);
if($matches[2][$key][0] == '"') {
@ -291,7 +304,7 @@ class Piler_Mime_Decode {
}
public static function getBoundary($headers = array()) {
public static function getBoundary($headers = []) {
if(isset($headers['content-type']['boundary'])) {
return $headers['content-type']['boundary'];
}
@ -300,7 +313,7 @@ class Piler_Mime_Decode {
}
public static function fixMimeBodyPart($headers = array(), $body = '') {
public static function fixMimeBodyPart($headers = [], $body = '') {
if(isset($headers['content-transfer-encoding'])) {
if(strtolower($headers['content-transfer-encoding']) == 'quoted-printable') {
@ -313,9 +326,10 @@ class Piler_Mime_Decode {
}
if(isset($headers['content-type']['charset'])) {
if(strtolower($headers['content-type']['charset']) == 'gb2312') {
if(ENABLE_GB2312_FIX && strtolower($headers['content-type']['charset']) == 'gb2312') {
$headers['content-type']['charset'] = 'GBK';
}
$body = iconv($headers['content-type']['charset'], 'utf-8' . '//IGNORE', $body);
}

View File

@ -11,14 +11,14 @@ final class MailParserTest extends TestCase {
public function providerTestParseMessage() {
return [
["1.eml", 1, ["Liebe Gueste,\n\ndie Einarbeitung der Rechen- und Summenfunktionen ins RK-Formular"]],
["2.eml", 1, ["Hallo!\nDie seltsamen Zeilenumbr=C3=BCche treten tats=C3=A4chlich auf."]],
["3.eml", 1, ["\n\nCan we discuss? Send Reply For more information, THANKS."]],
["1.eml", 1, ["Liebe Gueste,\r\n\r\ndie Einarbeitung der Rechen- und Summenfunktionen ins RK-Formular"]],
["2.eml", 1, ["Hallo!\r\nDie seltsamen Zeilenumbr=C3=BCche treten tats=C3=A4chlich auf."]],
["3.eml", 1, ["\r\n\r\nCan we discuss? Send Reply For more information, THANKS."]],
["4.eml", 2, ["=0D=0A=0D=0A=0D=0A=0D=0A", "<HTML><HEAD>=0D=0A<META http-equiv=3D\"Content-Type\" content=3D\"te="]],
["5.eml", 2, ["\nHi ,\n\nIf so, stop by and test out our FREE phishing simulator! Find out how", "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1=2E0 Transitional//EN\" \"http://ww=\nw=2Ew3=2Eorg/TR/xhtml1/DTD/xhtml1-transitional=2Edtd\"><html xmlns=3D\"http:/="]],
["5.eml", 2, ["\r\nHi ,\r\n\r\nIf so, stop by and test out our FREE phishing simulator! Find out how", "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1=2E0 Transitional//EN\" \"http://ww=\r\nw=2Ew3=2Eorg/TR/xhtml1/DTD/xhtml1-transitional=2Edtd\"><html xmlns=3D\"http:/="]],
["6.eml", 2, ["RGVhciBTaXJzLA0KDQpHbGFkIHRvIGhlYXIgdGhhdCB5b3UncmUgb24gdGhlIGZpbHRyYXRpb24g", "<html><head><meta http-equiv=3D\"content-type\" content=3D\"text/html; charse="]],
["7.eml", 2, ["Mai ajánlat: \n \n Exkluzív!", "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"]],
["8.eml", 2, ["Hello,\n\nYou have received a newsletter from Chemol Travel.", "<html xmlns=3D\"http://www.w3.org/1999/xhtml\" xmlns:v=3D\"urn:schemas-micro=\nsoft-com:vml\" xmlns:o=3D\"urn:schemas-microsoft-com:office:office\">"]],
["7.eml", 2, ["Mai ajánlat: \r\n \r\n Exkluzív!", "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"]],
["8.eml", 2, ["Hello,\r\n\r\nYou have received a newsletter from Chemol Travel.", "<html xmlns=3D\"http://www.w3.org/1999/xhtml\" xmlns:v=3D\"urn:schemas-micro=\r\nsoft-com:vml\" xmlns:o=3D\"urn:schemas-microsoft-com:office:office\">"]],
];
}

View File

@ -1,66 +1,61 @@
<?php
define('CONTENT_TYPE', 'content-type');
define('TEXT_PLAIN', 'text/plain');
define('SUBJECT', 'subject');
define('THIS_IS_A_TEST', 'This is a test');
use PHPUnit\Framework\TestCase;
require_once DIR_BASE . 'system/helper/mime.php';
require_once dirname(dirname(__FILE__)) . '/system/helper/mime.php';
final class SplitMessageTest extends TestCase {
public function providerTestSplitMessage() {
return [
["From: aaa\r\nTo:bbb\r\nSubject: test\r\n\r\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\r\nTo:bbb\r\nSubject: test\r\n\r\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\r\nTo:bbb\r\nCC ccc\r\nSubject: test\r\n\r\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\r\nSender: alala@aaa\r\nTo:bbb\r\nCC ccc\r\nSubject: test\r\n\r\nThis is a test",
array('sender' => 'alala@aaa', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\r\nTo:bbb\r\nSubject: test\r\n\r\n\r\n\r\n" . THIS_IS_A_TEST . "\nAaa\n",
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
"\n\n" . THIS_IS_A_TEST . "\nAaa\n"],
["From: aaa\r\nTo:bbb\r\nSubject: test\r\n\r\n\r\n\r\nThis is a test\nAaa\n",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"\r\n\r\nThis is a test\r\nAaa\r\n"],
["From: aaa\r\nTo:bbb\r\nSubject: test\r\nContent-type: text/html\r\n\r\n\r\n" . THIS_IS_A_TEST . "\nAaa\n",
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => 'text/html')),
"\n" . THIS_IS_A_TEST . "\nAaa\n"],
["From: aaa\r\nTo:bbb\r\nSubject: test\r\nContent-type: text/html\r\n\r\n\r\nThis is a test\nAaa\n",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/html')),
"\r\nThis is a test\r\nAaa\r\n"],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/plain\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/plain\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nDate: Sun, 17 Apr 2016 22:40:03 +0800\nDKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=chemoltravel.hu; s=ml;\n\tt=1471888357; bh=A/l/HLQe3HM4Xc4jFxAmhaWVCMU=;\n\th=Date:To:From:Subject:Sender:From:To:Subject:Date;\n\tb=JlEqXiAKBOoT/YyXKTMsXnEphh2J6sXxgNmbKbGybjo3cU1rgQEL0m1h26gl5AaBP\nContent-Type: " . TEXT_PLAIN . "\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', SUBJECT => 'test', 'date' => 'Sun, 17 Apr 2016 22:40:03 +0800', 'dkim-signature' => 'v=1; a=rsa-sha1; c=relaxed/relaxed; d=chemoltravel.hu; s=ml; t=1471888357; bh=A/l/HLQe3HM4Xc4jFxAmhaWVCMU=; h=Date:To:From:Subject:Sender:From:To:Subject:Date; b=JlEqXiAKBOoT/YyXKTMsXnEphh2J6sXxgNmbKbGybjo3cU1rgQEL0m1h26gl5AaBP', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nDate: Sun, 17 Apr 2016 22:40:03 +0800\nDKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=chemoltravel.hu; s=ml;\n\tt=1471888357; bh=A/l/HLQe3HM4Xc4jFxAmhaWVCMU=;\n\th=Date:To:From:Subject:Sender:From:To:Subject:Date;\n\tb=JlEqXiAKBOoT/YyXKTMsXnEphh2J6sXxgNmbKbGybjo3cU1rgQEL0m1h26gl5AaBP\nContent-Type: text/plain\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'subject' => 'test', 'date' => 'Sun, 17 Apr 2016 22:40:03 +0800', 'dkim-signature' => 'v=1; a=rsa-sha1; c=relaxed/relaxed; d=chemoltravel.hu; s=ml; t=1471888357; bh=A/l/HLQe3HM4Xc4jFxAmhaWVCMU=; h=Date:To:From:Subject:Sender:From:To:Subject:Date; b=JlEqXiAKBOoT/YyXKTMsXnEphh2J6sXxgNmbKbGybjo3cU1rgQEL0m1h26gl5AaBP', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/PLAIN\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN)),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/PLAIN\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/plain; charset=\"ISO-8859-1\"\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', CONTENT_TYPE => array('type' => TEXT_PLAIN, 'charset' => 'ISO-8859-1')),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nContent-Type: text/plain; charset=\"ISO-8859-1\"\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'content-type' => array('type' => 'text/plain', 'charset' => 'ISO-8859-1')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_\"\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', 'mime-version' => '1.0', CONTENT_TYPE => array('type' => 'multipart/alternative', 'boundary' => '_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_')),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_\"\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'mime-version' => '1.0', 'content-type' => array('type' => 'multipart/alternative', 'boundary' => '_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/alternative;\n boundary=\"_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_\"\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', 'mime-version' => '1.0', CONTENT_TYPE => array('type' => 'multipart/alternative', 'boundary' => '_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_')),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/alternative;\n boundary=\"_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_\"\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'mime-version' => '1.0', 'content-type' => array('type' => 'multipart/alternative', 'boundary' => '_=_SWIFT_v4_1460476188_145aa333fc0127705a7e904aab6d1957_=_')),
"This is a test"],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/related;\n\ttype=\"multipart/alternative\";\n\tboundary=\"----=_NextPart_000_0006_01D195BC.69E26510\"\n\n" . THIS_IS_A_TEST,
array('from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', SUBJECT => 'test', 'mime-version' => '1.0', CONTENT_TYPE => array('type' => 'multipart/alternative', 'boundary' => '----=_NextPart_000_0006_01D195BC.69E26510')),
THIS_IS_A_TEST],
["From: aaa\nTo:bbb\nSubject: test\nMIME-Version: 1.0\nContent-Type: multipart/related;\n\ttype=\"multipart/alternative\";\n\tboundary=\"----=_NextPart_000_0006_01D195BC.69E26510\"\n\nThis is a test",
array('sender' => '', 'from' => 'aaa', 'to' => 'bbb', 'cc' => '', 'date' => '', 'subject' => 'test', 'mime-version' => '1.0', 'content-type' => array('type' => 'multipart/alternative', 'boundary' => '----=_NextPart_000_0006_01D195BC.69E26510')),
"This is a test"],
];