00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/IConv.hpp"
00035
00036 #if defined(BLOCXX_HAVE_ICONV_SUPPORT)
00037 #include "blocxx/Assertion.hpp"
00038 #include "blocxx/Format.hpp"
00039 #include "blocxx/Exec.hpp"
00040
00041 #include <cwchar>
00042 #include <cwctype>
00043
00044 #include <errno.h>
00045
00046 namespace BLOCXX_NAMESPACE
00047 {
00048
00049
00050 IConv_t::IConv_t()
00051 : m_iconv(iconv_t(-1))
00052 {
00053 }
00054
00055
00056
00057 IConv_t::IConv_t(const String &fromEncoding, const String &toEncoding)
00058 {
00059 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00060 if( m_iconv == iconv_t(-1))
00061 {
00062 BLOCXX_THROW(StringConversionException,
00063 Format("Unable to convert from \"%1\" to \"%2\"",
00064 fromEncoding, toEncoding).c_str());
00065 }
00066 }
00067
00068
00069
00070 IConv_t::~IConv_t()
00071 {
00072 close();
00073 }
00074
00075
00076
00077 bool
00078 IConv_t::open(const String &fromEncoding, const String &toEncoding)
00079 {
00080 close();
00081 m_iconv = ::iconv_open(toEncoding.c_str(), fromEncoding.c_str());
00082 return ( m_iconv != iconv_t(-1));
00083 }
00084
00085
00086
00087 size_t
00088 IConv_t::convert(char **istr, size_t *ibytesleft,
00089 char **ostr, size_t *obytesleft)
00090 {
00091 #if defined(BLOCXX_ICONV_INBUF_CONST)
00092 BLOCXX_ASSERT(istr != NULL);
00093 const char *ptr = *istr;
00094 int ret = ::iconv(m_iconv, &ptr, ibytesleft, ostr, obytesleft);
00095 *istr = const_cast<char*>(ptr);
00096 return ret;
00097 #else
00098 return ::iconv(m_iconv, istr, ibytesleft, ostr, obytesleft);
00099 #endif
00100 }
00101
00102
00103
00104 bool
00105 IConv_t::close()
00106 {
00107 bool ret = true;
00108 int err = errno;
00109
00110 if( m_iconv != iconv_t(-1))
00111 {
00112 if( ::iconv_close(m_iconv) == -1)
00113 ret = false;
00114 m_iconv = iconv_t(-1);
00115 }
00116
00117 errno = err;
00118 return ret;
00119 }
00120
00121
00122
00123 namespace IConv
00124 {
00125
00126
00127 static inline void
00128 mayThrowStringConversionException()
00129 {
00130 switch( errno)
00131 {
00132 case E2BIG:
00133 break;
00134
00135 case EILSEQ:
00136 BLOCXX_THROW(StringConversionException,
00137 "Invalid character or multibyte sequence in the input");
00138 break;
00139
00140 case EINVAL:
00141 default:
00142 BLOCXX_THROW(StringConversionException,
00143 "Incomplete multibyte sequence in the input");
00144 break;
00145 }
00146 }
00147
00148
00149 String
00150 fromByteString(const String &enc, const char *str, size_t len)
00151 {
00152 if( !str || len == 0)
00153 return String();
00154
00155 IConv_t iconv(enc, "UTF-8");
00156 String out;
00157 char obuf[4097];
00158 char *optr;
00159 size_t olen;
00160
00161 char *sptr = (char *)str;
00162 size_t slen = len;
00163
00164 while( slen > 0)
00165 {
00166 obuf[0] = '\0';
00167 optr = (char *)obuf;
00168 olen = sizeof(obuf) - sizeof(obuf[0]);
00169
00170 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00171 if( ret == size_t(-1))
00172 {
00173 mayThrowStringConversionException();
00174 }
00175 *optr = '\0';
00176 out += obuf;
00177 }
00178
00179 return out;
00180 }
00181
00182
00183
00184 String
00185 fromByteString(const String &enc, const std::string &str)
00186 {
00187 return fromByteString(enc, str.c_str(), str.length());
00188 }
00189
00190
00191 #ifdef BLOCXX_HAVE_STD_WSTRING
00192
00193 String
00194 fromWideString(const String &enc, const std::wstring &str)
00195 {
00196 if( str.empty())
00197 return String();
00198
00199 IConv_t iconv(enc, "UTF-8");
00200 String out;
00201 char obuf[4097];
00202 char *optr;
00203 size_t olen;
00204
00205 char *sptr = (char *)str.c_str();
00206 size_t slen = str.length() * sizeof(wchar_t);
00207
00208 while( slen > 0)
00209 {
00210 obuf[0] = '\0';
00211 optr = (char *)obuf;
00212 olen = sizeof(obuf) - sizeof(obuf[0]);
00213
00214 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00215 if( ret == size_t(-1))
00216 {
00217 mayThrowStringConversionException();
00218 }
00219 *optr = '\0';
00220 out += obuf;
00221 }
00222
00223 return out;
00224 }
00225 #endif
00226
00227
00228 std::string
00229 toByteString(const String &enc, const String &utf8)
00230 {
00231 if( utf8.empty())
00232 return std::string();
00233
00234 IConv_t iconv("UTF-8", enc);
00235 std::string out;
00236 char obuf[4097];
00237 char *optr;
00238 size_t olen;
00239
00240 char *sptr = (char *)utf8.c_str();
00241 size_t slen = utf8.length();
00242
00243 while( slen > 0)
00244 {
00245 obuf[0] = '\0';
00246 optr = (char *)obuf;
00247 olen = sizeof(obuf) - sizeof(obuf[0]);
00248
00249 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00250 if( ret == size_t(-1))
00251 {
00252 mayThrowStringConversionException();
00253 }
00254 *optr = '\0';
00255 out += obuf;
00256 }
00257
00258 return out;
00259 }
00260
00261 #ifdef BLOCXX_HAVE_STD_WSTRING
00262
00263 std::wstring
00264 toWideString(const String &enc, const String &utf8)
00265 {
00266 if( utf8.empty())
00267 return std::wstring();
00268
00269 IConv_t iconv("UTF-8", enc);
00270 std::wstring out;
00271 wchar_t obuf[1025];
00272 char *optr;
00273 size_t olen;
00274
00275 char *sptr = (char *)utf8.c_str();
00276 size_t slen = utf8.length();
00277
00278 while( slen > 0)
00279 {
00280 obuf[0] = L'\0';
00281 optr = (char *)obuf;
00282 olen = sizeof(obuf) - sizeof(obuf[0]);
00283
00284 size_t ret = iconv.convert(&sptr, &slen, &optr, &olen);
00285 if( ret == size_t(-1))
00286 {
00287 mayThrowStringConversionException();
00288 }
00289 *((wchar_t *)optr) = L'\0';
00290 out += obuf;
00291 }
00292
00293 return out;
00294 }
00295 #endif
00296
00297
00298 #if 0
00299
00300 StringArray
00301 encodings()
00302 {
00303 StringArray command;
00304 String output;
00305 int status = -1;
00306
00307 command.push_back("/usr/bin/iconv");
00308 command.push_back("--list");
00309
00310 try
00311 {
00312 Exec::executeProcessAndGatherOutput(command, output, status);
00313 }
00314 catch(...)
00315 {
00316 }
00317
00318 if(status == 0)
00319 {
00320 return output.tokenize("\r\n");
00321 }
00322 return StringArray();
00323 }
00324 #endif
00325
00326
00327 }
00328 }
00329
00330 #endif // BLOCXX_HAVE_ICONV_SUPPORT
00331
00332
00333