mirror of
https://bitbucket.org/jsuto/piler.git
synced 2025-01-12 11:30:13 +01:00
345fd715f5
Signed-off-by: Janos SUTO <sj@acts.hu>
344 lines
8.2 KiB
C
344 lines
8.2 KiB
C
/*
|
|
* decoder.c, SJ
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <iconv.h>
|
|
#include "decoder.h"
|
|
#include "htmlentities.h"
|
|
#include "config.h"
|
|
|
|
|
|
static int b64[] = {
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
|
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 0, 255, 255,
|
|
|
|
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
|
|
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
|
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
|
|
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
|
|
|
|
};
|
|
|
|
|
|
static char hex_table[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
|
|
static int compmi(const void *m1, const void *m2){
|
|
struct mi *mi1 = (struct mi *) m1;
|
|
struct mi *mi2 = (struct mi *) m2;
|
|
return strcmp(mi1->entity, mi2->entity);
|
|
}
|
|
|
|
|
|
inline void utf8_encode_char(unsigned char c, unsigned char *buf, int buflen, int *len){
|
|
int count=0;
|
|
|
|
memset(buf, 0, buflen);
|
|
|
|
/*
|
|
* Code point 1st byte 2nd byte 3rd byte 4th byte
|
|
* ---------- -------- -------- -------- --------
|
|
* U+0000..U+007F 00..7F
|
|
* U+0080..U+07FF C2..DF 80..BF
|
|
* U+0800..U+0FFF E0 A0..BF 80..BF
|
|
*
|
|
* FIXME: See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G7404 for valid sequences
|
|
*/
|
|
|
|
if(c <= 0x7F){
|
|
*(buf+count) = c;
|
|
count++;
|
|
}
|
|
|
|
else {
|
|
*(buf+count) = ( 0xC0 | (c >> 6) );
|
|
count++;
|
|
*(buf+count) = ( 0x80 | (c & 0x3F) );
|
|
count++;
|
|
}
|
|
|
|
*len = count;
|
|
}
|
|
|
|
|
|
inline static void pack_4_into_3(char *s, char *s2){
|
|
int j, n[4], k1, k2;
|
|
|
|
memset(s2, 0, 3);
|
|
|
|
if(strlen(s) != 4) return;
|
|
|
|
for(j=0; j<4; j++){
|
|
k1 = s[j];
|
|
n[j] = b64[k1];
|
|
}
|
|
|
|
k1 = n[0]; k1 = k1 << 2;
|
|
k2 = n[1]; k2 = k2 >> 4;
|
|
|
|
s2[0] = k1 | k2;
|
|
|
|
k1 = (n[1] & 0x0F) << 4;
|
|
k2 = n[2]; k2 = k2 >> 2;
|
|
|
|
s2[1] = k1 | k2;
|
|
|
|
k1 = n[2] << 6;
|
|
k2 = n[3] >> 0;
|
|
|
|
|
|
s2[2] = k1 | k2;
|
|
}
|
|
|
|
|
|
int decodeBase64(char *p){
|
|
int len=0;
|
|
unsigned char puf[MAXBUFSIZE];
|
|
|
|
memset(puf, 0, sizeof(puf));
|
|
|
|
len = decode_base64_to_buffer(p, strlen(p), &puf[0], sizeof(puf)-1);
|
|
|
|
snprintf(p, MAXBUFSIZE-1, "%s", puf);
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
int decode_base64_to_buffer(char *p, int plen, unsigned char *b, int blen){
|
|
int i, len=0;
|
|
char s[5], s2[3];
|
|
|
|
if(plen < 4 || plen > blen)
|
|
return 0;
|
|
|
|
for(i=0; i<plen; i+=4){
|
|
memcpy(s, p+i, 4);
|
|
s[4] = '\0';
|
|
int decodedlen = 3;
|
|
|
|
/* safety check against abnormally long lines */
|
|
|
|
if(len + decodedlen > blen-1) break;
|
|
|
|
if(strlen(s) == 4){
|
|
pack_4_into_3(s, s2);
|
|
if(s[3] == '=') decodedlen = 2;
|
|
if(s[2] == '=') decodedlen = 1;
|
|
|
|
memcpy(b+len, s2, decodedlen);
|
|
|
|
len += decodedlen;
|
|
}
|
|
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
void decodeQP(char *p){
|
|
unsigned int i;
|
|
int k=0, a, b;
|
|
|
|
if(p == NULL) return;
|
|
|
|
for(i=0; i<strlen((char*)p); i++){
|
|
char c = p[i];
|
|
|
|
if(p[i] == '=' && isxdigit(p[i+1]) && isxdigit(p[i+2])){
|
|
a = p[i+1];
|
|
b = p[i+2];
|
|
|
|
c = 16 * hex_table[a] + hex_table[b];
|
|
|
|
i += 2;
|
|
}
|
|
else if(p[i] == '_'){
|
|
c = ' ';
|
|
}
|
|
|
|
p[k] = c;
|
|
k++;
|
|
|
|
}
|
|
|
|
p[k] = '\0';
|
|
}
|
|
|
|
|
|
void decodeHTML(char *p, int utf8){
|
|
unsigned char buf[MAXBUFSIZE], __u[8];
|
|
char *s, *q;
|
|
int count=0, len, c;
|
|
struct mi key, *res;
|
|
|
|
if(p == NULL || strlen(p) == 0) return;
|
|
|
|
s = p;
|
|
|
|
memset(buf, 0, sizeof(buf));
|
|
|
|
for(; *s; s++){
|
|
if(*s == '&'){
|
|
q = strchr(s, ';');
|
|
if(q){
|
|
*q = '\0';
|
|
|
|
if(*(s+1) == '#'){
|
|
c = atoi(s+2);
|
|
if(c == 0) c = 'q';
|
|
|
|
buf[count] = (unsigned char)c;
|
|
count++;
|
|
}
|
|
else {
|
|
key.entity = s;
|
|
res = bsearch(&key, htmlentities, NUM_OF_HTML_ENTITIES, sizeof(struct mi), compmi);
|
|
|
|
if(res && res->val <= 255){
|
|
|
|
if(utf8 == 1){
|
|
utf8_encode_char(res->val, &__u[0], sizeof(__u), &len);
|
|
memcpy(&buf[count], &__u[0], len);
|
|
count += len;
|
|
}
|
|
else {
|
|
buf[count] = res->val;
|
|
count++;
|
|
}
|
|
}
|
|
else {
|
|
buf[count] = 'q';
|
|
count++;
|
|
}
|
|
}
|
|
|
|
s = q;
|
|
}
|
|
|
|
}
|
|
else {
|
|
buf[count] = *s;
|
|
count++;
|
|
}
|
|
}
|
|
|
|
buf[count] = '\0'; count++;
|
|
|
|
memcpy(p, buf, count);
|
|
}
|
|
|
|
|
|
void decodeURL(char *p){
|
|
unsigned int i;
|
|
int c, k=0, a, b;
|
|
|
|
if(p == NULL) return;
|
|
|
|
for(i=0; i<strlen(p); i++){
|
|
switch(p[i]){
|
|
case '+':
|
|
c = ' ';
|
|
break;
|
|
|
|
case '%':
|
|
if(isxdigit(p[i+1]) && isxdigit(p[i+2])){
|
|
a = p[i+1];
|
|
b = p[i+2];
|
|
|
|
c = 16 * hex_table[a] + hex_table[b];
|
|
|
|
i += 2;
|
|
}
|
|
else
|
|
c = p[i];
|
|
|
|
break;
|
|
|
|
default:
|
|
c = p[i];
|
|
break;
|
|
}
|
|
|
|
p[k] = c;
|
|
k++;
|
|
|
|
}
|
|
|
|
p[k] = '\0';
|
|
}
|
|
|
|
|
|
int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *encoding){
|
|
iconv_t cd;
|
|
size_t inbytesleft, outbytesleft;
|
|
int ret = ERR;
|
|
|
|
memset(outbuf, 0, outbuflen);
|
|
|
|
// Iconv sometimes produces an invalid utf8 sequence for gb2312.
|
|
// The fix is to use cp936, instead of gb2312 encoding.
|
|
//
|
|
// If there will be more similar exceptions, then we have to use
|
|
// a more efficient lookup method
|
|
|
|
if(strcasecmp(encoding, "gb2312") == 0)
|
|
cd = iconv_open("utf-8", "cp936");
|
|
else if(strcasecmp(encoding, "ks_c_5601-1987") == 0)
|
|
cd = iconv_open("utf-8", "EUC-KR");
|
|
else
|
|
cd = iconv_open("utf-8", encoding);
|
|
|
|
if(cd != (iconv_t)-1){
|
|
inbytesleft = inbuflen;
|
|
outbytesleft = outbuflen-1;
|
|
|
|
if(iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1)
|
|
ret = ERR;
|
|
else
|
|
ret = OK;
|
|
|
|
iconv_close(cd);
|
|
}
|
|
|
|
return ret;
|
|
}
|