- const char *psz_encoding;
- char *psz_outstring;
- char psz_encbuf[sizeof( "ISO_8859-123" )];
- size_t i_in, i_out, offset = 1;
- vlc_iconv_t iconv_handle;
-
- if( i_length < 1 ) return NULL;
- if( psz_instring[0] >= 0x20 )
- {
- psz_encoding = "ISO_8859-1";
- /* According to the specification, this should be ISO6937,
- * but it seems Latin-1 is used instead. */
- offset = 0;
- }
- else switch( psz_instring[0] )
- {
- case 0x01:
- psz_encoding = "ISO_8859-5";
- break;
- case 0x02:
- psz_encoding = "ISO_8859-6";
- break;
- case 0x03:
- psz_encoding = "ISO_8859-7";
- break;
- case 0x04:
- psz_encoding = "ISO_8859-8";
- break;
- case 0x05:
- psz_encoding = "ISO_8859-9";
- break;
- case 0x06:
- psz_encoding = "ISO_8859-10";
- break;
- case 0x07:
- psz_encoding = "ISO_8859-11";
- break;
- case 0x08:
- psz_encoding = "ISO_8859-12";
- break;
- case 0x09:
- psz_encoding = "ISO_8859-13";
- break;
- case 0x0a:
- psz_encoding = "ISO_8859-14";
- break;
- case 0x0b:
- psz_encoding = "ISO_8859-15";
- break;
- case 0x10:
-#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
- if( i_length < 3 || psz_instring[1] != 0x00 || psz_instring[2] > 15
- || psz_instring[2] == 0 )
- {
- psz_encoding = "UTF-8";
- offset = 0;
- }
- else
- {
- sprintf( psz_encbuf, "ISO_8859-%u", psz_instring[2] );
- psz_encoding = psz_encbuf;
- offset = 3;
- }
- break;
- case 0x11:
-#warning Is there a BOM or do we use a fixed endianess?
- psz_encoding = "UTF-16";
- break;
- case 0x12:
- psz_encoding = "KSC5601-1987";
- break;
- case 0x13:
- psz_encoding = "GB2312"; /* GB-2312-1980 */
- break;
- case 0x14:
- psz_encoding = "BIG-5";
- break;
- case 0x15:
- psz_encoding = "UTF-8";
- break;
- default:
- /* invalid */
- psz_encoding = "UTF-8";
- offset = 0;
- }
-
- i_in = i_length - offset;
- i_out = i_in * 6 + 1;
-
- psz_outstring = malloc( i_out );
- if( !psz_outstring )
- {
- return NULL;
- }
-
- iconv_handle = vlc_iconv_open( "UTF-8", psz_encoding );
- if( iconv_handle == (vlc_iconv_t)(-1) )
- {
- /* Invalid character set (e.g. ISO_8859-12) */
- memcpy( psz_outstring, &psz_instring[offset], i_in );
- psz_outstring[i_in] = '\0';
- EnsureUTF8( psz_outstring );
- }
- else
- {
- const char *psz_in = (const char *)&psz_instring[offset];
- char *psz_out = psz_outstring;
-
- while( vlc_iconv( iconv_handle, &psz_in, &i_in,
- &psz_out, &i_out ) == (size_t)(-1) )
- {
- /* skip naughty byte. This may fail terribly for multibyte stuff,
- * but what can we do anyway? */
- psz_in++;
- i_in--;
- vlc_iconv( iconv_handle, NULL, NULL, NULL, NULL ); /* reset */
- }
- vlc_iconv_close( iconv_handle );