00001 
00002 #include "define.h"
00003 
00004 static int unicode_up = 0;
00005 static iconv_t i16to8;
00006 static const char *target_charset = NULL;
00007 static int         target_open_from = 0;
00008 static int         target_open_to   = 0;
00009 static iconv_t     i8totarget = (iconv_t)-1;
00010 static iconv_t     target2i8  = (iconv_t)-1;
00011 
00012 
00013 #define ASSERT(x,...) { if( !(x) ) DIE(( __VA_ARGS__)); }
00014 
00015 
00018 static void   pst_vbresize(pst_vbuf *vb, size_t len);
00019 static void pst_vbresize(pst_vbuf *vb, size_t len)
00020 {
00021     vb->dlen = 0;
00022 
00023     if (vb->blen >= len) {
00024         vb->b = vb->buf;
00025         return;
00026     }
00027 
00028     vb->buf  = realloc(vb->buf, len);
00029     vb->b    = vb->buf;
00030     vb->blen = len;
00031 }
00032 
00033 
00034 static size_t pst_vbavail(pst_vbuf * vb);
00035 static size_t pst_vbavail(pst_vbuf * vb)
00036 {
00037     return vb->blen  - vb->dlen - (size_t)(vb->b - vb->buf);
00038 }
00039 
00040 
00041 static void open_targets(const char* charset);
00042 static void open_targets(const char* charset)
00043 {
00044     if (!target_charset || strcasecmp(target_charset, charset)) {
00045         if (target_open_from) iconv_close(i8totarget);
00046         if (target_open_to)   iconv_close(target2i8);
00047         if (target_charset)   free((char *)target_charset);
00048         target_charset   = strdup(charset);
00049         target_open_from = 1;
00050         target_open_to   = 1;
00051         i8totarget = iconv_open(target_charset, "utf-8");
00052         if (i8totarget == (iconv_t)-1) {
00053             target_open_from = 0;
00054             DEBUG_WARN(("Couldn't open iconv descriptor for utf-8 to %s.\n", target_charset));
00055         }
00056         target2i8 = iconv_open("utf-8", target_charset);
00057         if (target2i8 == (iconv_t)-1) {
00058             target_open_to = 0;
00059             DEBUG_WARN(("Couldn't open iconv descriptor for %s to utf-8.\n", target_charset));
00060         }
00061     }
00062 }
00063 
00064 
00065 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion);
00066 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion)
00067 {
00068     size_t inbytesleft  = iblen;
00069     size_t icresult     = (size_t)-1;
00070     size_t outbytesleft = 0;
00071     char *outbuf        = NULL;
00072     int   myerrno;
00073 
00074     DEBUG_ENT("sbcs_conversion");
00075     pst_vbresize(dest, 2*iblen);
00076 
00077     do {
00078         outbytesleft = dest->blen - dest->dlen;
00079         outbuf = dest->b + dest->dlen;
00080         icresult = iconv(conversion, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00081         myerrno  = errno;
00082         dest->dlen = outbuf - dest->b;
00083         if (inbytesleft) pst_vbgrow(dest, 2*inbytesleft);
00084     } while ((size_t)-1 == icresult && E2BIG == myerrno);
00085 
00086     if (icresult == (size_t)-1) {
00087         DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00088         pst_unicode_init();
00089         DEBUG_RET();
00090         return (size_t)-1;
00091     }
00092     DEBUG_RET();
00093     return 0;
00094 }
00095 
00096 
00097 static void pst_unicode_close();
00098 static void pst_unicode_close()
00099 {
00100     iconv_close(i16to8);
00101     if (target_open_from) iconv_close(i8totarget);
00102     if (target_open_to)   iconv_close(target2i8);
00103     if (target_charset)   free((char *)target_charset);
00104     target_charset   = NULL;
00105     target_open_from = 0;
00106     target_open_to   = 0;
00107     unicode_up = 0;
00108 }
00109 
00110 
00111 static int utf16_is_terminated(const char *str, int length);
00112 static int utf16_is_terminated(const char *str, int length)
00113 {
00114     int len = -1;
00115     int i;
00116     for (i = 0; i < length; i += 2) {
00117         if (str[i] == 0 && str[i + 1] == 0) {
00118             len = i;
00119         }
00120     }
00121 
00122     if (len == -1) {
00123         DEBUG_WARN(("utf16 string is not zero terminated\n"));
00124     }
00125 
00126     return (len == -1) ? 0 : 1;
00127 }
00128 
00129 
00130 pst_vbuf *pst_vballoc(size_t len)
00131 {
00132     pst_vbuf *result = pst_malloc(sizeof(pst_vbuf));
00133     if (result) {
00134         result->dlen = 0;
00135         result->blen = 0;
00136         result->buf = NULL;
00137         pst_vbresize(result, len);
00138     }
00139     else DIE(("malloc() failure"));
00140     return result;
00141 }
00142 
00143 
00146 void pst_vbgrow(pst_vbuf *vb, size_t len)
00147 {
00148     if (0 == len)
00149         return;
00150 
00151     if (0 == vb->blen) {
00152         pst_vbresize(vb, len);
00153         return;
00154     }
00155 
00156     if (vb->dlen + len > vb->blen) {
00157         if (vb->dlen + len < vb->blen * 1.5)
00158             len = vb->blen * 1.5;
00159         char *nb = pst_malloc(vb->blen + len);
00160         if (!nb) DIE(("malloc() failure"));
00161         vb->blen = vb->blen + len;
00162         memcpy(nb, vb->b, vb->dlen);
00163 
00164         free(vb->buf);
00165         vb->buf = nb;
00166         vb->b = vb->buf;
00167     } else {
00168         if (vb->b != vb->buf)
00169             memcpy(vb->buf, vb->b, vb->dlen);
00170     }
00171 
00172     vb->b = vb->buf;
00173 
00174     ASSERT(pst_vbavail(vb) >= len, "vbgrow(): I have failed in my mission.");
00175 }
00176 
00177 
00180 void pst_vbset(pst_vbuf * vb, void *b, size_t len)
00181 {
00182     pst_vbresize(vb, len);
00183     memcpy(vb->b, b, len);
00184     vb->dlen = len;
00185 }
00186 
00187 
00190 void pst_vbappend(pst_vbuf *vb, void *b, size_t len)
00191 {
00192     if (0 == vb->dlen) {
00193         pst_vbset(vb, b, len);
00194         return;
00195     }
00196     pst_vbgrow(vb, len);
00197     memcpy(vb->b + vb->dlen, b, len);
00198     vb->dlen += len;
00199 }
00200 
00201 
00202 void pst_unicode_init()
00203 {
00204     if (unicode_up) pst_unicode_close();
00205     i16to8 = iconv_open("utf-8", "utf-16le");
00206     if (i16to8 == (iconv_t)-1) {
00207         DEBUG_WARN(("Couldn't open iconv descriptor for utf-16le to utf-8.\n"));
00208     }
00209     unicode_up = 1;
00210 }
00211 
00212 
00213 size_t pst_vb_utf16to8(pst_vbuf *dest, const char *inbuf, int iblen)
00214 {
00215     size_t inbytesleft  = iblen;
00216     size_t icresult     = (size_t)-1;
00217     size_t outbytesleft = 0;
00218     char *outbuf        = NULL;
00219     int   myerrno;
00220 
00221     if (!unicode_up) return (size_t)-1;   
00222     pst_vbresize(dest, iblen);
00223 
00224     
00225     if (!utf16_is_terminated(inbuf, iblen))
00226         return (size_t)-1;
00227 
00228     do {
00229         outbytesleft = dest->blen - dest->dlen;
00230         outbuf = dest->b + dest->dlen;
00231         icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00232         myerrno  = errno;
00233         dest->dlen = outbuf - dest->b;
00234         if (inbytesleft) pst_vbgrow(dest, inbytesleft);
00235     } while ((size_t)-1 == icresult && E2BIG == myerrno);
00236 
00237     if (icresult == (size_t)-1) {
00238         DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00239         pst_unicode_init();
00240         return (size_t)-1;
00241     }
00242     return (icresult) ? (size_t)-1 : 0;
00243 }
00244 
00245 
00246 size_t pst_vb_utf8to8bit(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00247 {
00248     open_targets(charset);
00249     if (!target_open_from) return (size_t)-1;   
00250     return sbcs_conversion(dest, inbuf, iblen, i8totarget);
00251 }
00252 
00253 
00254 size_t pst_vb_8bit2utf8(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00255 {
00256     open_targets(charset);
00257     if (!target_open_to) return (size_t)-1;     
00258     return sbcs_conversion(dest, inbuf, iblen, target2i8);
00259 }
00260