mirror of
				https://bitbucket.org/jsuto/piler.git
				synced 2025-11-04 08:52:26 +01:00 
			
		
		
		
	@@ -171,6 +171,7 @@ struct parser_state {
 | 
			
		||||
   int qp;
 | 
			
		||||
   int htmltag;
 | 
			
		||||
   int style;
 | 
			
		||||
   int meta_content_type;
 | 
			
		||||
   int skip_html;
 | 
			
		||||
   int has_to_dump;
 | 
			
		||||
   int has_to_dump_whole_body;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										15
									
								
								src/parser.c
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								src/parser.c
									
									
									
									
									
								
							@@ -641,7 +641,7 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata
 | 
			
		||||
      state->pushed_pointer = 0;
 | 
			
		||||
 | 
			
		||||
      memset(state->type, 0, TINYBUFSIZE);
 | 
			
		||||
      snprintf(state->charset, TINYBUFSIZE-1, "unknown");
 | 
			
		||||
      memset(state->charset, 0, TINYBUFSIZE);
 | 
			
		||||
 | 
			
		||||
      memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
 | 
			
		||||
      state->anamepos = 0;
 | 
			
		||||
@@ -684,7 +684,18 @@ int parse_line(char *buf, struct parser_state *state, struct session_data *sdata
 | 
			
		||||
   if(state->texthtml == 1 && state->message_state == MSG_BODY) markHTML(buf, state);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   if(state->texthtml == 1) decodeHTML(buf, state->utf8);
 | 
			
		||||
   if(state->texthtml == 1){
 | 
			
		||||
      size_t buflen = strlen(buf);
 | 
			
		||||
      decodeHTML(buf, state->utf8);
 | 
			
		||||
      /* decodeHTML converted some entities to iso-8859-1 */
 | 
			
		||||
      if(state->utf8 != 1 && strlen(buf) != buflen){
 | 
			
		||||
        /* no charset or us-ascii: switch to iso-8859-1 */
 | 
			
		||||
        if (state->charset[0] == 0 || strcasecmp(state->charset, "us-ascii") == 0){
 | 
			
		||||
          syslog(LOG_PRIORITY, "%s: assuming iso-8859-1 encoding for HTML (was '%s')", sdata->ttmpfile, state->charset);
 | 
			
		||||
          snprintf(state->charset, TINYBUFSIZE-1, "ISO8859-1");
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   /* encode the body if it's not utf-8 encoded */
 | 
			
		||||
   if(state->message_state == MSG_BODY && state->utf8 != 1){
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ void fixupEncodedHeaderLine(char *buf, int buflen);
 | 
			
		||||
void fixupSoftBreakInQuotedPritableLine(char *buf, struct parser_state *state);
 | 
			
		||||
void fixupBase64EncodedLine(char *buf, struct parser_state *state);
 | 
			
		||||
void markHTML(char *buf, struct parser_state *state);
 | 
			
		||||
void setStateHTMLStyle(char *htmlbuf, int pos, struct parser_state *state);
 | 
			
		||||
void setStateHTML(char *htmlbuf, int pos, struct parser_state *state);
 | 
			
		||||
void translateLine(unsigned char *p, struct parser_state *state);
 | 
			
		||||
void fix_email_address_for_sphinx(char *s);
 | 
			
		||||
void split_email_address(char *s);
 | 
			
		||||
 
 | 
			
		||||
@@ -40,6 +40,7 @@ void init_state(struct parser_state *state){
 | 
			
		||||
 | 
			
		||||
   state->htmltag = 0;
 | 
			
		||||
   state->style = 0;
 | 
			
		||||
   state->meta_content_type = 0;
 | 
			
		||||
 | 
			
		||||
   state->skip_html = 0;
 | 
			
		||||
 | 
			
		||||
@@ -52,6 +53,7 @@ void init_state(struct parser_state *state){
 | 
			
		||||
   memset(state->receivedbuf, 0, sizeof(state->receivedbuf));
 | 
			
		||||
 | 
			
		||||
   memset(state->type, 0, TINYBUFSIZE);
 | 
			
		||||
   memset(state->charset, 0, TINYBUFSIZE);
 | 
			
		||||
 | 
			
		||||
   memset(state->attachment_name_buf, 0, SMALLBUFSIZE);
 | 
			
		||||
   state->anamepos = 0;
 | 
			
		||||
@@ -551,7 +553,7 @@ void markHTML(char *buf, struct parser_state *state){
 | 
			
		||||
 | 
			
		||||
            if(isspace(*s)){
 | 
			
		||||
               if(j > 0){
 | 
			
		||||
                  setStateHTMLStyle(html, pos, state);
 | 
			
		||||
                  setStateHTML(html, pos, state);
 | 
			
		||||
                  memset(html, 0, SMALLBUFSIZE); j=0;
 | 
			
		||||
               }
 | 
			
		||||
               pos++;
 | 
			
		||||
@@ -576,23 +578,51 @@ void markHTML(char *buf, struct parser_state *state){
 | 
			
		||||
 | 
			
		||||
         if(j > 0){
 | 
			
		||||
            strncat(html, " ", SMALLBUFSIZE-1);
 | 
			
		||||
            setStateHTMLStyle(html, pos, state);
 | 
			
		||||
            setStateHTML(html, pos, state);
 | 
			
		||||
            memset(html, 0, SMALLBUFSIZE); j=0;
 | 
			
		||||
         }
 | 
			
		||||
         state->meta_content_type = 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   //printf("append last in line:*%s*, html=+%s+, j=%d\n", puf, html, j);
 | 
			
		||||
   if(j > 0){ setStateHTMLStyle(html, pos, state); }
 | 
			
		||||
   if(j > 0){ setStateHTML(html, pos, state); }
 | 
			
		||||
 | 
			
		||||
   strcpy(buf, puf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void setStateHTMLStyle(char *htmlbuf, int pos, struct parser_state *state){
 | 
			
		||||
void setStateHTML(char *htmlbuf, int pos, struct parser_state *state){
 | 
			
		||||
   if(pos == 0 && strncmp(htmlbuf, "style ", 6) == 0) state->style = 1;
 | 
			
		||||
   if(pos == 0 && strncmp(htmlbuf, "/style ", 7) == 0) state->style = 0;
 | 
			
		||||
 | 
			
		||||
   if(pos == 0 && state->charset[0] == 0 && strncmp(htmlbuf, "meta ", 5) == 0) state->meta_content_type = 0x1;
 | 
			
		||||
   if(state->meta_content_type){
 | 
			
		||||
     if((state->meta_content_type & 0x2) == 0 && strstr(htmlbuf, "http-equiv=content-type "))
 | 
			
		||||
       state->meta_content_type |= 0x2;
 | 
			
		||||
 | 
			
		||||
     if((state->meta_content_type & 0x4) == 0 && strstr(htmlbuf, "content=text/html;"))
 | 
			
		||||
       state->meta_content_type |= 0x4;
 | 
			
		||||
 | 
			
		||||
     if(state->meta_content_type == 0x7){
 | 
			
		||||
       char *p, *q;
 | 
			
		||||
 | 
			
		||||
       p = strstr(htmlbuf, "charset=");
 | 
			
		||||
       if(p){
 | 
			
		||||
         p += 8;
 | 
			
		||||
         for(q = p; isalnum(*q) || index("-_", *q); q++)
 | 
			
		||||
           ;
 | 
			
		||||
 | 
			
		||||
         if(q > p && q-p+1 < (int) sizeof(state->charset)){
 | 
			
		||||
           syslog(LOG_PRIORITY, "Changing HTML charset from '%s' to '%*s' due to meta tag", state->charset, (int)(q-p), p);
 | 
			
		||||
           strncpy(state->charset, p, q-p);
 | 
			
		||||
           state->charset[q-p+1] = '\0';
 | 
			
		||||
           state->meta_content_type = 0;
 | 
			
		||||
         }
 | 
			
		||||
       }
 | 
			
		||||
     }
 | 
			
		||||
   }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user