From c5959f79c40c582dc8df18f3b9a322a21f1cbd54 Mon Sep 17 00:00:00 2001 From: Janos SUTO Date: Tue, 10 Jul 2018 11:17:57 +0000 Subject: [PATCH] Fixed attachment name parsing Signed-off-by: Janos SUTO --- src/parser_utils.c | 27 ++++++++++++++------------- unit_tests/check_parser_utils.c | 17 ++++++++++++++++- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/parser_utils.c b/src/parser_utils.c index 49ab3daa..0b06ee65 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -824,21 +824,22 @@ void extractNameFromHeaderLine(char *s, char *name, char *resultbuf, int resultb if(p){ p++; - // If the line has the 'name' more than once, then truncate the subsequent parts, ie. - // utf-8''P;LAN%20Holden%204.docx;filename="P;LAN Holden 4.docx" ==> utf-8''P;LAN%20Holden%204.docx - q = strstr(p, name); - if(q) *q = '\0'; + // skip any whitespace after name=, ie. name = " + while(*p==' ' || *p=='\t') p++; - q = strrchr(p, ';'); - if(q) *q = '\0'; - q = strrchr(p, '"'); - if(q){ - *q = '\0'; - p = strchr(p, '"'); - if(p){ - p++; - } + // if there's a double quote after the equal symbol (=), ie. name*="utf-8.... + if(*p == '"'){ + p++; + q = strchr(p, '"'); + + if(q) *q = '\0'; } + else { + // no " after =, so split on ; + q = strchr(p, ';'); + if(q) *q = '\0'; + } + if(extended == 1){ encoding = p; diff --git a/unit_tests/check_parser_utils.c b/unit_tests/check_parser_utils.c index 4cc9e31a..e457e787 100644 --- a/unit_tests/check_parser_utils.c +++ b/unit_tests/check_parser_utils.c @@ -97,10 +97,25 @@ static void test_extractNameFromHeaderLine(){ {"Content-Type: image/png; name=\"Screenshot from 2015-11-10 10:07:13.png\"", "name", "Screenshot from 2015-11-10 10:07:13.png"}, {"Content-Disposition: attachment; filename=\"zzzzz Email Examples.zip\";", "name", "zzzzz Email Examples.zip"}, + {"Content-Type: application/msword; name*=\"iso-8859-1''Einverst%E4ndniserkl%E4rung_Kids-PKW_Familienname.doc\"", "name", "Einverständniserklärung_Kids-PKW_Familienname.doc"}, + {"Content-Type: application/msword; name*= \"iso-8859-1''Einverst%E4ndniserkl%E4rung_Kids-PKW_Familienname.doc\"", "name", "Einverständniserklärung_Kids-PKW_Familienname.doc"}, + + // This one sucks, and I don't think it's a proper definition + {"Content-Type: application/msword; filename*=utf-8''P;LAN%20Holden%204.docx;filename=\"P;LAN Holden 4.docx\"", "name", "P"}, + // Adding quotes makes it acceptable to the parser + {"Content-Type: application/msword; filename*=\"utf-8''P;LAN%20Holden%204.docx\";filename=\"P;LAN Holden 4.docx\"", "name", "P;LAN Holden 4.docx"}, + + {"Content-Type: null; name=\"toDev-Netengineering.png\"", "name", "toDev-Netengineering.png"}, + {"Content-Type: null; name=\"toDev-name-Netengineering.png\"", "name", "toDev-name-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-Netengineering.png\"", "name", "toDev-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-name-Netengineering.png\"", "name", "toDev-name-Netengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDevnameNetengineering.png\"", "name", "toDevnameNetengineering.png"}, + {"Content-Type: null; name*=\"iso-8859-1''toDev-namE-Netengineering.png\"", "name", "toDev-namE-Netengineering.png"}, + {"foo: bar; title=Economy", "title", "Economy"}, {"foo: bar; title=\"US-$ rates\"", "title", "US-$ rates"}, {"foo: bar; title*=iso-8859-1'en'%A3%20rates", "title", "£ rates"}, - {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"} + {"foo: bar; title*=UTF-8''%c2%a3%20and%20%e2%82%ac%20rates", "title", "£ and € rates"}, }; TEST_HEADER();