From 25aeff66ba55c0da10555bd1cafc15b7552a654f Mon Sep 17 00:00:00 2001 From: Janos SUTO Date: Tue, 7 Nov 2017 19:24:06 +0100 Subject: [PATCH] src: fix gb2312 -> utf8 iconv conversion issue Signed-off-by: Janos SUTO --- src/decoder.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/decoder.c b/src/decoder.c index a4a23c6a..581655c0 100644 --- a/src/decoder.c +++ b/src/decoder.c @@ -323,7 +323,16 @@ int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *en memset(outbuf, 0, outbuflen); - cd = iconv_open("utf-8", encoding); + // Iconv sometimes produces an invalid utf8 sequence for gb2312. + // The fix is to use cp936, instead of gb2312 encoding. + // + // If there will be more similar exceptions, then we have to use + // a more efficient lookup method + + if(strcasecmp(encoding, "gb2312") == 0) + cd = iconv_open("utf-8", "cp936"); + else + cd = iconv_open("utf-8", encoding); if(cd != (iconv_t)-1){ inbytesleft = inbuflen; @@ -339,4 +348,3 @@ int utf8_encode(char *inbuf, int inbuflen, char *outbuf, int outbuflen, char *en return ret; } -