mirror of
https://bitbucket.org/jsuto/piler.git
synced 2025-01-12 12:20:11 +01:00
added odf, ms office attachment support
This commit is contained in:
parent
1620f0b50f
commit
b4854e312a
151
configure
vendored
151
configure
vendored
@ -3412,9 +3412,14 @@ have_clamd="no"
|
|||||||
have_antivirus="no"
|
have_antivirus="no"
|
||||||
have_mysql="no"
|
have_mysql="no"
|
||||||
have_tre="no"
|
have_tre="no"
|
||||||
|
have_zip="no"
|
||||||
have_zlib="no"
|
have_zlib="no"
|
||||||
|
|
||||||
pdftotext="no"
|
pdftotext="no"
|
||||||
|
catdoc="no"
|
||||||
|
catppt="no"
|
||||||
|
xls2csv="no"
|
||||||
|
odt2txt="no"
|
||||||
|
|
||||||
|
|
||||||
have_static_build="no"
|
have_static_build="no"
|
||||||
@ -3782,6 +3787,98 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for ac_header in zip.h
|
||||||
|
do :
|
||||||
|
ac_fn_c_check_header_mongrel "$LINENO" "zip.h" "ac_cv_header_zip_h" "$ac_includes_default"
|
||||||
|
if test "x$ac_cv_header_zip_h" = xyes; then :
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_ZIP_H 1
|
||||||
|
_ACEOF
|
||||||
|
have_zip=yes
|
||||||
|
else
|
||||||
|
echo "zip.h is not found"
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lzip" >&5
|
||||||
|
$as_echo_n "checking for main in -lzip... " >&6; }
|
||||||
|
if ${ac_cv_lib_zip_main+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_check_lib_save_LIBS=$LIBS
|
||||||
|
LIBS="-lzip $LIBS"
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return main ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_link "$LINENO"; then :
|
||||||
|
ac_cv_lib_zip_main=yes
|
||||||
|
else
|
||||||
|
ac_cv_lib_zip_main=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
LIBS=$ac_check_lib_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zip_main" >&5
|
||||||
|
$as_echo "$ac_cv_lib_zip_main" >&6; }
|
||||||
|
if test "x$ac_cv_lib_zip_main" = xyes; then :
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for zip_open in -lzip" >&5
|
||||||
|
$as_echo_n "checking for zip_open in -lzip... " >&6; }
|
||||||
|
if ${ac_cv_lib_zip_zip_open+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_check_lib_save_LIBS=$LIBS
|
||||||
|
LIBS="-lzip $LIBS"
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char zip_open ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return zip_open ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_link "$LINENO"; then :
|
||||||
|
ac_cv_lib_zip_zip_open=yes
|
||||||
|
else
|
||||||
|
ac_cv_lib_zip_zip_open=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext \
|
||||||
|
conftest$ac_exeext conftest.$ac_ext
|
||||||
|
LIBS=$ac_check_lib_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zip_zip_open" >&5
|
||||||
|
$as_echo "$ac_cv_lib_zip_zip_open" >&6; }
|
||||||
|
if test "x$ac_cv_lib_zip_zip_open" = xyes; then :
|
||||||
|
have_zip=yes
|
||||||
|
else
|
||||||
|
echo "libzip.so is not found"; have_zip=no
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi
|
||||||
|
ac_cv_lib_zip=ac_cv_lib_zip_main
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for ac_header in zlib.h
|
for ac_header in zlib.h
|
||||||
do :
|
do :
|
||||||
ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
|
ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
|
||||||
@ -4144,6 +4241,16 @@ if test "$have_tre" = "yes"; then
|
|||||||
antispam_libs="$antispam_libs -ltre"
|
antispam_libs="$antispam_libs -ltre"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if test "$have_zip" = "yes"; then
|
||||||
|
echo "zip library: yes"
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_ZIP "1"
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
antispam_libs="$antispam_libs -lzip"
|
||||||
|
fi
|
||||||
|
|
||||||
if test "$have_mysql" = "yes"; then
|
if test "$have_mysql" = "yes"; then
|
||||||
defs="$defs -DNEED_MYSQL"
|
defs="$defs -DNEED_MYSQL"
|
||||||
fi
|
fi
|
||||||
@ -4163,7 +4270,51 @@ _ACEOF
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which catdoc 2>/dev/null` != "z"; then
|
||||||
|
catdoc=`which catdoc`
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_CATDOC "$catdoc"
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which catppt 2>/dev/null` != "z"; then
|
||||||
|
catppt=`which catppt`
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_CATPPT "$catppt"
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which xls2csv 2>/dev/null` != "z"; then
|
||||||
|
xls2csv=`which xls2csv`
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_XLS2CSV "$xls2csv"
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which odt2txt 2>/dev/null` != "z"; then
|
||||||
|
odt2txt=`which odt2txt`
|
||||||
|
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define HAVE_ODT2TXT "$odt2txt"
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
echo "pdftotext: $pdftotext"
|
echo "pdftotext: $pdftotext"
|
||||||
|
echo "catdoc: $catdoc"
|
||||||
|
echo "catppt: $catppt"
|
||||||
|
echo "xls2csv: $xls2csv"
|
||||||
|
echo "odt2txt: $odt2txt"
|
||||||
|
|
||||||
|
|
||||||
id -u $RUNNING_USER 2>/dev/null 1>/dev/null
|
id -u $RUNNING_USER 2>/dev/null 1>/dev/null
|
||||||
|
45
configure.in
45
configure.in
@ -38,9 +38,14 @@ have_clamd="no"
|
|||||||
have_antivirus="no"
|
have_antivirus="no"
|
||||||
have_mysql="no"
|
have_mysql="no"
|
||||||
have_tre="no"
|
have_tre="no"
|
||||||
|
have_zip="no"
|
||||||
have_zlib="no"
|
have_zlib="no"
|
||||||
|
|
||||||
pdftotext="no"
|
pdftotext="no"
|
||||||
|
catdoc="no"
|
||||||
|
catppt="no"
|
||||||
|
xls2csv="no"
|
||||||
|
odt2txt="no"
|
||||||
|
|
||||||
|
|
||||||
have_static_build="no"
|
have_static_build="no"
|
||||||
@ -120,6 +125,12 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dnl libzip
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS(zip.h, have_zip=yes, echo "zip.h is not found")
|
||||||
|
AC_CHECK_LIB([zip],[main],[AC_CHECK_LIB(zip, zip_open, have_zip=yes, echo "libzip.so is not found"; have_zip=no)],[],[])ac_cv_lib_zip=ac_cv_lib_zip_main
|
||||||
|
|
||||||
|
|
||||||
dnl zlib
|
dnl zlib
|
||||||
|
|
||||||
AC_CHECK_HEADERS(zlib.h, have_zlib=yes, echo "zlib.h is not found")
|
AC_CHECK_HEADERS(zlib.h, have_zlib=yes, echo "zlib.h is not found")
|
||||||
@ -266,6 +277,12 @@ if test "$have_tre" = "yes"; then
|
|||||||
antispam_libs="$antispam_libs -ltre"
|
antispam_libs="$antispam_libs -ltre"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if test "$have_zip" = "yes"; then
|
||||||
|
echo "zip library: yes"
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_ZIP, 1, [libzip support])
|
||||||
|
antispam_libs="$antispam_libs -lzip"
|
||||||
|
fi
|
||||||
|
|
||||||
if test "$have_mysql" = "yes"; then
|
if test "$have_mysql" = "yes"; then
|
||||||
defs="$defs -DNEED_MYSQL"
|
defs="$defs -DNEED_MYSQL"
|
||||||
fi
|
fi
|
||||||
@ -281,7 +298,35 @@ if test z`which pdftotext 2>/dev/null` != "z"; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which catdoc 2>/dev/null` != "z"; then
|
||||||
|
catdoc=`which catdoc`
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_CATDOC, "$catdoc", [path to catdoc])
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which catppt 2>/dev/null` != "z"; then
|
||||||
|
catppt=`which catppt`
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_CATPPT, "$catppt", [path to catppt])
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which xls2csv 2>/dev/null` != "z"; then
|
||||||
|
xls2csv=`which xls2csv`
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_XLS2CSV, "$xls2csv", [path to xls2csv])
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if test z`which odt2txt 2>/dev/null` != "z"; then
|
||||||
|
odt2txt=`which odt2txt`
|
||||||
|
AC_DEFINE_UNQUOTED(HAVE_ODT2TXT, "$odt2txt", [path to odt2txt])
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
echo "pdftotext: $pdftotext"
|
echo "pdftotext: $pdftotext"
|
||||||
|
echo "catdoc: $catdoc"
|
||||||
|
echo "catppt: $catppt"
|
||||||
|
echo "xls2csv: $xls2csv"
|
||||||
|
echo "odt2txt: $odt2txt"
|
||||||
|
|
||||||
|
|
||||||
id -u $RUNNING_USER 2>/dev/null 1>/dev/null
|
id -u $RUNNING_USER 2>/dev/null 1>/dev/null
|
||||||
|
@ -10,3 +10,8 @@
|
|||||||
#define HAVE_DAEMON 1
|
#define HAVE_DAEMON 1
|
||||||
|
|
||||||
#undef HAVE_PDFTOTEXT
|
#undef HAVE_PDFTOTEXT
|
||||||
|
#undef HAVE_CATDOC
|
||||||
|
#undef HAVE_CATPPT
|
||||||
|
#undef HAVE_XLS2CSV
|
||||||
|
#undef HAVE_ZIP
|
||||||
|
|
||||||
|
@ -68,6 +68,7 @@ struct child {
|
|||||||
struct attachment {
|
struct attachment {
|
||||||
int size;
|
int size;
|
||||||
char type[TINYBUFSIZE];
|
char type[TINYBUFSIZE];
|
||||||
|
char shorttype[TINYBUFSIZE];
|
||||||
char aname[TINYBUFSIZE];
|
char aname[TINYBUFSIZE];
|
||||||
char filename[TINYBUFSIZE];
|
char filename[TINYBUFSIZE];
|
||||||
char internalname[TINYBUFSIZE];
|
char internalname[TINYBUFSIZE];
|
||||||
|
143
src/extract.c
143
src/extract.c
@ -4,15 +4,91 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <piler.h>
|
#include <piler.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_ZIP
|
||||||
|
#include <zip.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
void extract_pdf(struct session_data *sdata, struct _state *state, char *filename, struct __config *cfg){
|
|
||||||
|
void remove_xml(char *buf, int *html){
|
||||||
|
int i=0;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
p = buf;
|
||||||
|
|
||||||
|
for(; *p; p++){
|
||||||
|
if(*p == '<'){ *html = 1; }
|
||||||
|
|
||||||
|
if(*html == 0){
|
||||||
|
*(buf+i) = *p;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(*p == '>'){
|
||||||
|
*html = 0;
|
||||||
|
|
||||||
|
if(i > 2 && *(buf+i-1) != ' '){
|
||||||
|
*(buf+i) = ' '; i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*(buf+i) = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int extract_opendocument(struct session_data *sdata, struct _state *state, char *filename, char *prefix){
|
||||||
|
int errorp, i=0, len=0, html=0;
|
||||||
|
char buf[MAXBUFSIZE];
|
||||||
|
struct zip *z;
|
||||||
|
struct zip_stat sb;
|
||||||
|
struct zip_file *zf;
|
||||||
|
|
||||||
|
z = zip_open(filename, 0, &errorp);
|
||||||
|
if(!z) return 1;
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
|
||||||
|
while(zip_stat_index(z, i, 0, &sb) == 0){
|
||||||
|
if(strncmp(sb.name, prefix, strlen(prefix)) == 0){
|
||||||
|
|
||||||
|
zf = zip_fopen_index(z, i, 0);
|
||||||
|
if(zf){
|
||||||
|
while((len = zip_fread(zf, buf, sizeof(buf))) > 0){
|
||||||
|
|
||||||
|
remove_xml(buf, &html);
|
||||||
|
len = strlen(buf);
|
||||||
|
|
||||||
|
if(state->bodylen < BIGBUFSIZE-len-1){
|
||||||
|
memcpy(&(state->b_body[state->bodylen]), buf, len);
|
||||||
|
state->bodylen += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
}
|
||||||
|
zip_fclose(zf);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(state->bodylen > BIGBUFSIZE-1024) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
zip_close(z);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void read_content_with_popen(struct session_data *sdata, struct _state *state, char *cmd){
|
||||||
int len;
|
int len;
|
||||||
char buf[MAXBUFSIZE];
|
char buf[MAXBUFSIZE];
|
||||||
FILE *f;
|
FILE *f;
|
||||||
|
|
||||||
snprintf(buf, sizeof(buf)-1, "%s -enc UTF-8 %s -", HAVE_PDFTOTEXT, filename);
|
f = popen(cmd, "r");
|
||||||
|
|
||||||
f = popen(buf, "r");
|
|
||||||
if(f){
|
if(f){
|
||||||
while(fgets(buf, sizeof(buf)-1, f)){
|
while(fgets(buf, sizeof(buf)-1, f)){
|
||||||
len = strlen(buf);
|
len = strlen(buf);
|
||||||
@ -31,3 +107,62 @@ void extract_pdf(struct session_data *sdata, struct _state *state, char *filenam
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void extract_attachment_content(struct session_data *sdata, struct _state *state, char *filename, char *type, int *rec){
|
||||||
|
char cmd[SMALLBUFSIZE];
|
||||||
|
|
||||||
|
if(strcmp(type, "other") == 0) return;
|
||||||
|
|
||||||
|
memset(cmd, 0, sizeof(cmd));
|
||||||
|
|
||||||
|
#ifdef HAVE_PDFTOTEXT
|
||||||
|
if(strcmp(type, "pdf") == 0) snprintf(cmd, sizeof(cmd)-1, "%s -enc UTF-8 %s -", HAVE_PDFTOTEXT, filename);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_CATDOC
|
||||||
|
if(strcmp(type, "doc") == 0) snprintf(cmd, sizeof(cmd)-1, "%s -d utf-8 %s", HAVE_CATDOC, filename);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_CATPPT
|
||||||
|
if(strcmp(type, "ppt") == 0) snprintf(cmd, sizeof(cmd)-1, "%s -d utf-8 %s", HAVE_CATPPT, filename);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_XLS2CSV
|
||||||
|
if(strcmp(type, "xls") == 0) snprintf(cmd, sizeof(cmd)-1, "%s -d utf-8 %s", HAVE_XLS2CSV, filename);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(strlen(cmd) > 12){
|
||||||
|
read_content_with_popen(sdata, state, cmd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_ZIP
|
||||||
|
if(strcmp(type, "odf") == 0){
|
||||||
|
extract_opendocument(sdata, state, filename, "content.xml");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(strcmp(type, "docx") == 0){
|
||||||
|
extract_opendocument(sdata, state, filename, "word/document.xml");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(strcmp(type, "xlsx") == 0){
|
||||||
|
extract_opendocument(sdata, state, filename, "xl/worksheets/sheet");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(strcmp(type, "pptx") == 0){
|
||||||
|
extract_opendocument(sdata, state, filename, "ppt/slides/slide");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(strcmp(type, "zip") == 0 && *rec == 0){
|
||||||
|
(*rec)++;
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
#ifndef _EXTRACT_H
|
#ifndef _EXTRACT_H
|
||||||
#define _EXTRACT_H
|
#define _EXTRACT_H
|
||||||
|
|
||||||
void extract_pdf(struct session_data *sdata, struct _state *state, char *filename, struct __config *cfg);
|
#include "defs.h"
|
||||||
|
|
||||||
|
void extract_attachment_content(struct session_data *sdata, struct _state *state, char *filename, char *type, int *rec);
|
||||||
|
|
||||||
|
|
||||||
#endif /* _EXTRACT_H */
|
#endif /* _EXTRACT_H */
|
||||||
|
23
src/parser.c
23
src/parser.c
@ -100,7 +100,7 @@ struct _state parse_message(struct session_data *sdata, int take_into_pieces, st
|
|||||||
|
|
||||||
|
|
||||||
void post_parse(struct session_data *sdata, struct _state *state, struct __config *cfg){
|
void post_parse(struct session_data *sdata, struct _state *state, struct __config *cfg){
|
||||||
int i, len;
|
int i, len, rec=0;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
free_list(state->boundaries);
|
free_list(state->boundaries);
|
||||||
@ -127,17 +127,11 @@ void post_parse(struct session_data *sdata, struct _state *state, struct __confi
|
|||||||
|
|
||||||
p = determine_attachment_type(state->attachments[i].filename, state->attachments[i].type);
|
p = determine_attachment_type(state->attachments[i].filename, state->attachments[i].type);
|
||||||
len = strlen(p);
|
len = strlen(p);
|
||||||
|
|
||||||
if(strlen(sdata->attachments) < SMALLBUFSIZE-len-1 && !strstr(sdata->attachments, p)) memcpy(&(sdata->attachments[strlen(sdata->attachments)]), p, len);
|
if(strlen(sdata->attachments) < SMALLBUFSIZE-len-1 && !strstr(sdata->attachments, p)) memcpy(&(sdata->attachments[strlen(sdata->attachments)]), p, len);
|
||||||
|
|
||||||
if(state->attachments[i].dumped == 1){
|
if(state->attachments[i].dumped == 1){
|
||||||
|
rec = 0;
|
||||||
#ifdef HAVE_PDFTOTEXT
|
if(state->bodylen < BIGBUFSIZE-1024) extract_attachment_content(sdata, state, state->attachments[i].aname, get_attachment_extractor_by_filename(state->attachments[i].filename), &rec);
|
||||||
if(
|
|
||||||
strcmp(p, "pdf,") == 0 ||
|
|
||||||
(strcmp(p, "other,") == 0 && strcasestr(state->attachments[i].filename, ".pdf"))
|
|
||||||
) extract_pdf(sdata, state, state->attachments[i].aname, cfg);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unlink(state->attachments[i].aname);
|
unlink(state->attachments[i].aname);
|
||||||
}
|
}
|
||||||
@ -245,22 +239,29 @@ int parse_line(char *buf, struct _state *state, struct session_data *sdata, int
|
|||||||
if(take_into_pieces == 1){
|
if(take_into_pieces == 1){
|
||||||
state->fd = open(state->attachments[state->n_attachments].internalname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
state->fd = open(state->attachments[state->n_attachments].internalname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
||||||
|
|
||||||
p = determine_attachment_type(state->attachments[state->n_attachments].filename, state->attachments[state->n_attachments].type);
|
p = get_attachment_extractor_by_filename(state->attachments[state->n_attachments].filename);
|
||||||
|
|
||||||
if(strcmp("pdf,", p) == 0 || strcmp("other,", p) == 0){
|
snprintf(state->attachments[state->n_attachments].shorttype, TINYBUFSIZE-1, "%s", p);
|
||||||
|
|
||||||
|
if(strcmp("other", p)){
|
||||||
state->b64fd = open(state->attachments[state->n_attachments].aname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
state->b64fd = open(state->attachments[state->n_attachments].aname, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
|
||||||
state->attachments[state->n_attachments].dumped = 1;
|
state->attachments[state->n_attachments].dumped = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if(state->fd == -1){
|
if(state->fd == -1){
|
||||||
|
|
||||||
state->attachments[state->n_attachments].size = 0;
|
state->attachments[state->n_attachments].size = 0;
|
||||||
state->attachments[state->n_attachments].dumped = 0;
|
state->attachments[state->n_attachments].dumped = 0;
|
||||||
memset(state->attachments[state->n_attachments].type, 0, TINYBUFSIZE);
|
memset(state->attachments[state->n_attachments].type, 0, TINYBUFSIZE);
|
||||||
|
memset(state->attachments[state->n_attachments].shorttype, 0, TINYBUFSIZE);
|
||||||
|
memset(state->attachments[state->n_attachments].aname, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[state->n_attachments].filename, 0, TINYBUFSIZE);
|
memset(state->attachments[state->n_attachments].filename, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[state->n_attachments].internalname, 0, TINYBUFSIZE);
|
memset(state->attachments[state->n_attachments].internalname, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[state->n_attachments].digest, 0, 2*DIGEST_LENGTH+1);
|
memset(state->attachments[state->n_attachments].digest, 0, 2*DIGEST_LENGTH+1);
|
||||||
|
|
||||||
|
|
||||||
syslog(LOG_PRIORITY, "%s: error opening %s", sdata->ttmpfile, state->attachments[state->n_attachments].internalname);
|
syslog(LOG_PRIORITY, "%s: error opening %s", sdata->ttmpfile, state->attachments[state->n_attachments].internalname);
|
||||||
|
|
||||||
state->n_attachments--;
|
state->n_attachments--;
|
||||||
|
@ -31,6 +31,7 @@ void degenerateToken(unsigned char *p);
|
|||||||
void fixURL(char *url);
|
void fixURL(char *url);
|
||||||
int extractNameFromHeaderLine(char *s, char *name, char *resultbuf);
|
int extractNameFromHeaderLine(char *s, char *name, char *resultbuf);
|
||||||
char *determine_attachment_type(char *filename, char *type);
|
char *determine_attachment_type(char *filename, char *type);
|
||||||
|
char *get_attachment_extractor_by_filename(char *filename);
|
||||||
void parse_reference(struct _state *state, char *s);
|
void parse_reference(struct _state *state, char *s);
|
||||||
int base64_decode_attachment_buffer(char *p, int plen, unsigned char *b, int blen);
|
int base64_decode_attachment_buffer(char *p, int plen, unsigned char *b, int blen);
|
||||||
|
|
||||||
|
@ -76,6 +76,7 @@ void init_state(struct _state *state){
|
|||||||
state->attachments[i].size = 0;
|
state->attachments[i].size = 0;
|
||||||
state->attachments[i].dumped = 0;
|
state->attachments[i].dumped = 0;
|
||||||
memset(state->attachments[i].type, 0, TINYBUFSIZE);
|
memset(state->attachments[i].type, 0, TINYBUFSIZE);
|
||||||
|
memset(state->attachments[i].shorttype, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[i].aname, 0, TINYBUFSIZE);
|
memset(state->attachments[i].aname, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[i].filename, 0, TINYBUFSIZE);
|
memset(state->attachments[i].filename, 0, TINYBUFSIZE);
|
||||||
memset(state->attachments[i].internalname, 0, TINYBUFSIZE);
|
memset(state->attachments[i].internalname, 0, TINYBUFSIZE);
|
||||||
@ -697,23 +698,40 @@ char *determine_attachment_type(char *filename, char *type){
|
|||||||
|
|
||||||
if(strncasecmp(type, "application/pdf", strlen("application/pdf")) == 0) return "pdf,";
|
if(strncasecmp(type, "application/pdf", strlen("application/pdf")) == 0) return "pdf,";
|
||||||
|
|
||||||
|
if(strncasecmp(type, "application/ms-tnef", strlen("application/ms-tnef")) == 0) return "winmail,";
|
||||||
if(strncasecmp(type, "application/msword", strlen("application/msword")) == 0) return "word,";
|
if(strncasecmp(type, "application/msword", strlen("application/msword")) == 0) return "word,";
|
||||||
|
|
||||||
|
// a .csv file has the same type
|
||||||
if(strncasecmp(type, "application/vnd.ms-excel", strlen("application/vnd.ms-excel")) == 0) return "excel,";
|
if(strncasecmp(type, "application/vnd.ms-excel", strlen("application/vnd.ms-excel")) == 0) return "excel,";
|
||||||
|
|
||||||
if(strncasecmp(type, "application/vnd.ms-powerpoint", strlen("application/vnd.ms-powerpoint")) == 0) return "powerpoint,";
|
if(strncasecmp(type, "application/vnd.ms-powerpoint", strlen("application/vnd.ms-powerpoint")) == 0) return "powerpoint,";
|
||||||
|
|
||||||
|
if(strncasecmp(type, "application/vnd.visio", strlen("application/vnd.visio")) == 0) return "visio,";
|
||||||
|
|
||||||
|
if(strncasecmp(type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", strlen("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) == 0) return "word,";
|
||||||
|
if(strncasecmp(type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", strlen("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) == 0) return "excel,";
|
||||||
|
if(strncasecmp(type, "application/vnd.openxmlformats-officedocument.presentationml.presentation", strlen("application/vnd.openxmlformats-officedocument.presentationml.presentation")) == 0) return "powerpoint,";
|
||||||
|
|
||||||
if(strncasecmp(type, "application/x-shockwave-flash", strlen("application/x-shockwave-flash")) == 0) return "flash,";
|
if(strncasecmp(type, "application/x-shockwave-flash", strlen("application/x-shockwave-flash")) == 0) return "flash,";
|
||||||
|
|
||||||
if(strcasestr(type, "opendocument")) return "odf,";
|
if(strcasestr(type, "opendocument")) return "odf,";
|
||||||
|
|
||||||
if(strcasecmp(type, "application/octet-stream") == 0){
|
|
||||||
|
|
||||||
p = strrchr(type, '.');
|
|
||||||
|
if(strncasecmp(type, "application/", 12) == 0){
|
||||||
|
|
||||||
|
p = strrchr(filename, '.');
|
||||||
if(p){
|
if(p){
|
||||||
p++;
|
p++;
|
||||||
|
|
||||||
|
if(strncasecmp(p, "pdf", 3) == 0) return "pdf,";
|
||||||
|
|
||||||
if(strncasecmp(p, "zip", 3) == 0) return "compressed,";
|
if(strncasecmp(p, "zip", 3) == 0) return "compressed,";
|
||||||
if(strncasecmp(p, "rar", 3) == 0) return "compressed,";
|
if(strncasecmp(p, "rar", 3) == 0) return "compressed,";
|
||||||
|
|
||||||
|
// tar.gz has the same type
|
||||||
|
if(strncasecmp(p, "x-gzip", 3) == 0) return "compressed,";
|
||||||
|
|
||||||
if(strncasecmp(p, "doc", 3) == 0) return "word,";
|
if(strncasecmp(p, "doc", 3) == 0) return "word,";
|
||||||
if(strncasecmp(p, "docx", 4) == 0) return "word,";
|
if(strncasecmp(p, "docx", 4) == 0) return "word,";
|
||||||
if(strncasecmp(p, "xls", 3) == 0) return "excel,";
|
if(strncasecmp(p, "xls", 3) == 0) return "excel,";
|
||||||
@ -733,6 +751,32 @@ char *determine_attachment_type(char *filename, char *type){
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char *get_attachment_extractor_by_filename(char *filename){
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
p = strrchr(filename, '.');
|
||||||
|
if(!p) return "other";
|
||||||
|
|
||||||
|
if(strcasecmp(p, ".pdf") == 0) return "pdf";
|
||||||
|
if(strcasecmp(p, ".zip") == 0) return "zip";
|
||||||
|
if(strcasecmp(p, ".gz") == 0) return "gzip";
|
||||||
|
if(strcasecmp(p, ".rar") == 0) return "rar";
|
||||||
|
if(strcasecmp(p, ".odt") == 0) return "odf";
|
||||||
|
if(strcasecmp(p, ".odp") == 0) return "odf";
|
||||||
|
if(strcasecmp(p, ".ods") == 0) return "odf";
|
||||||
|
if(strcasecmp(p, ".doc") == 0) return "doc";
|
||||||
|
if(strcasecmp(p, ".docx") == 0) return "docx";
|
||||||
|
if(strcasecmp(p, ".xls") == 0) return "xls";
|
||||||
|
if(strcasecmp(p, ".xlsx") == 0) return "xlsx";
|
||||||
|
if(strcasecmp(p, ".ppt") == 0) return "ppt";
|
||||||
|
if(strcasecmp(p, ".pptx") == 0) return "pptx";
|
||||||
|
if(strcasecmp(p, ".txt") == 0) return "text";
|
||||||
|
if(strcasecmp(p, ".csv") == 0) return "text";
|
||||||
|
|
||||||
|
return "other";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void parse_reference(struct _state *state, char *s){
|
void parse_reference(struct _state *state, char *s){
|
||||||
int len;
|
int len;
|
||||||
char puf[SMALLBUFSIZE];
|
char puf[SMALLBUFSIZE];
|
||||||
|
@ -72,7 +72,7 @@ int main(int argc, char **argv){
|
|||||||
snprintf(sdata.filename, SMALLBUFSIZE-1, "%s", argv[1]);
|
snprintf(sdata.filename, SMALLBUFSIZE-1, "%s", argv[1]);
|
||||||
snprintf(sdata.tmpframe, SMALLBUFSIZE-1, "%s.m", argv[1]);
|
snprintf(sdata.tmpframe, SMALLBUFSIZE-1, "%s.m", argv[1]);
|
||||||
|
|
||||||
state = parse_message(&sdata, 0, &cfg);
|
state = parse_message(&sdata, 1, &cfg);
|
||||||
post_parse(&sdata, &state, &cfg);
|
post_parse(&sdata, &state, &cfg);
|
||||||
|
|
||||||
printf("message-id: %s\n", state.message_id);
|
printf("message-id: %s\n", state.message_id);
|
||||||
@ -80,7 +80,7 @@ int main(int argc, char **argv){
|
|||||||
printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain);
|
printf("to: *%s (%s)*\n", state.b_to, state.b_to_domain);
|
||||||
printf("reference: *%s*\n", state.reference);
|
printf("reference: *%s*\n", state.reference);
|
||||||
printf("subject: *%s*\n", state.b_subject);
|
printf("subject: *%s*\n", state.b_subject);
|
||||||
//printf("body: *%s*\n", state.b_body);
|
printf("body: *%s*\n", state.b_body);
|
||||||
|
|
||||||
printf("sent: %ld\n", sdata.sent);
|
printf("sent: %ld\n", sdata.sent);
|
||||||
|
|
||||||
@ -103,8 +103,11 @@ int main(int argc, char **argv){
|
|||||||
|
|
||||||
for(i=1; i<=state.n_attachments; i++){
|
for(i=1; i<=state.n_attachments; i++){
|
||||||
printf("i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
|
printf("i:%d, name=*%s*, type: *%s*, size: %d, int.name: %s, digest: %s\n", i, state.attachments[i].filename, state.attachments[i].type, state.attachments[i].size, state.attachments[i].internalname, state.attachments[i].digest);
|
||||||
|
unlink(state.attachments[i].internalname);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unlink(sdata.tmpframe);
|
||||||
|
|
||||||
printf("attachments:%s\n", sdata.attachments);
|
printf("attachments:%s\n", sdata.attachments);
|
||||||
|
|
||||||
printf("direction: %d\n", sdata.direction);
|
printf("direction: %d\n", sdata.direction);
|
||||||
|
Loading…
Reference in New Issue
Block a user