fix opendocument text extraction

Signed-off-by: Janos SUTO <sj@acts.hu>
This commit is contained in:
Janos SUTO 2018-03-04 13:21:19 +01:00
parent 553d7e92c0
commit a48e3c74df
2 changed files with 11 additions and 22 deletions

View File

@ -11,7 +11,7 @@
#define VERSION "1.3.4" #define VERSION "1.3.4"
#define BUILD 991 #define BUILD 992
#define HOSTID "mailarchiver" #define HOSTID "mailarchiver"

View File

@ -19,32 +19,22 @@
#define die(e) do { syslog(LOG_INFO, "error: helper: %s", e); exit(EXIT_FAILURE); } while (0); #define die(e) do { syslog(LOG_INFO, "error: helper: %s", e); exit(EXIT_FAILURE); } while (0);
void remove_xml(char *buf, int *html){ int remove_xml(char *src, char *dest, int destlen, int *html){
int i=0; int i=0;
char *p;
p = buf; memset(dest, 0, destlen);
for(; *p; p++){ for(; *src; src++){
if(*p == '<'){ *html = 1; } if(*src == '<'){ *html = 1; continue; }
if(*src == '>'){ *html = 0; continue; }
if(*html == 0){ if(*html == 0){
*(buf+i) = *p; if(i < destlen) *(dest+i) = *src;
i++; i++;
} }
if(*p == '>'){
*html = 0;
if(i > 2 && *(buf+i-1) != ' '){
*(buf+i) = ' '; i++;
} }
} return i;
}
*(buf+i) = '\0';
} }
@ -52,7 +42,7 @@ void remove_xml(char *buf, int *html){
int extract_opendocument(struct session_data *sdata, struct parser_state *state, char *filename, char *prefix){ int extract_opendocument(struct session_data *sdata, struct parser_state *state, char *filename, char *prefix){
int errorp, i=0, len=0, html=0; int errorp, i=0, len=0, html=0;
int len2; int len2;
char buf[MAXBUFSIZE]; char buf[4*MAXBUFSIZE], puf[4*MAXBUFSIZE];
struct zip *z; struct zip *z;
struct zip_stat sb; struct zip_stat sb;
struct zip_file *zf; struct zip_file *zf;
@ -72,11 +62,10 @@ int extract_opendocument(struct session_data *sdata, struct parser_state *state,
if(zf){ if(zf){
while((len = zip_fread(zf, buf, sizeof(buf)-2)) > 0){ while((len = zip_fread(zf, buf, sizeof(buf)-2)) > 0){
remove_xml(buf, &html); len2 = remove_xml(buf, puf, sizeof(puf), &html);
len2 = strlen(buf);
if(len2 > 0 && state->bodylen < BIGBUFSIZE-len2-1){ if(len2 > 0 && state->bodylen < BIGBUFSIZE-len2-1){
memcpy(&(state->b_body[state->bodylen]), buf, len2); memcpy(&(state->b_body[state->bodylen]), puf, len2);
state->bodylen += len2; state->bodylen += len2;
} }