+/* FIXME same than dvbsi_to_utf8 from dvb access */
+static char *EITConvertToUTF8( const unsigned char *psz_instring,
+ size_t i_length,
+ bool b_broken )
+{
+ const char *psz_encoding;
+ char *psz_outstring;
+ char psz_encbuf[sizeof( "ISO_8859-123" )];
+ size_t i_in, i_out, offset = 1;
+ vlc_iconv_t iconv_handle;
+
+ if( i_length < 1 ) return NULL;
+ if( psz_instring[0] >= 0x20 )
+ {
+ /* According to ETSI EN 300 468 Annex A, this should be ISO6937,
+ * but some broadcasters use different charset... */
+ if( b_broken )
+ psz_encoding = "ISO_8859-1";
+ else
+ psz_encoding = "ISO_6937";
+
+ offset = 0;
+ }
+ else switch( psz_instring[0] )
+ {
+ case 0x01:
+ psz_encoding = "ISO_8859-5";
+ break;
+ case 0x02:
+ psz_encoding = "ISO_8859-6";
+ break;
+ case 0x03:
+ psz_encoding = "ISO_8859-7";
+ break;
+ case 0x04:
+ psz_encoding = "ISO_8859-8";
+ break;
+ case 0x05:
+ psz_encoding = "ISO_8859-9";
+ break;
+ case 0x06:
+ psz_encoding = "ISO_8859-10";
+ break;
+ case 0x07:
+ psz_encoding = "ISO_8859-11";
+ break;
+ case 0x08:
+ psz_encoding = "ISO_8859-12";
+ break;
+ case 0x09:
+ psz_encoding = "ISO_8859-13";
+ break;
+ case 0x0a:
+ psz_encoding = "ISO_8859-14";
+ break;
+ case 0x0b:
+ psz_encoding = "ISO_8859-15";
+ break;
+ case 0x10:
+#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
+ if( i_length < 3 || psz_instring[1] != 0x00 || psz_instring[2] > 15
+ || psz_instring[2] == 0 )
+ {
+ psz_encoding = "UTF-8";
+ offset = 0;
+ }
+ else
+ {
+ sprintf( psz_encbuf, "ISO_8859-%u", psz_instring[2] );
+ psz_encoding = psz_encbuf;
+ offset = 3;
+ }
+ break;
+ case 0x11:
+#warning Is there a BOM or do we use a fixed endianess?
+ psz_encoding = "UTF-16";
+ break;
+ case 0x12:
+ psz_encoding = "KSC5601-1987";
+ break;
+ case 0x13:
+ psz_encoding = "GB2312"; /* GB-2312-1980 */
+ break;
+ case 0x14:
+ psz_encoding = "BIG-5";
+ break;
+ case 0x15:
+ psz_encoding = "UTF-8";
+ break;
+ default:
+ /* invalid */
+ psz_encoding = "UTF-8";
+ offset = 0;
+ }
+
+ i_in = i_length - offset;
+ i_out = i_in * 6 + 1;
+
+ psz_outstring = malloc( i_out );
+ if( !psz_outstring )
+ {
+ return NULL;
+ }
+
+ iconv_handle = vlc_iconv_open( "UTF-8", psz_encoding );
+ if( iconv_handle == (vlc_iconv_t)(-1) )
+ {
+ /* Invalid character set (e.g. ISO_8859-12) */
+ memcpy( psz_outstring, &psz_instring[offset], i_in );
+ psz_outstring[i_in] = '\0';
+ EnsureUTF8( psz_outstring );
+ }
+ else
+ {
+ const char *psz_in = (const char *)&psz_instring[offset];
+ char *psz_out = psz_outstring;
+
+ while( vlc_iconv( iconv_handle, &psz_in, &i_in,
+ &psz_out, &i_out ) == (size_t)(-1) )
+ {
+ /* skip naughty byte. This may fail terribly for multibyte stuff,
+ * but what can we do anyway? */
+ psz_in++;
+ i_in--;
+ vlc_iconv( iconv_handle, NULL, NULL, NULL, NULL ); /* reset */
+ }
+ vlc_iconv_close( iconv_handle );
+
+ *psz_out = '\0';
+
+ /* Convert EIT-coded CR/LFs */
+ unsigned char *pbuf = (unsigned char *)psz_outstring;
+ for( ; pbuf < (unsigned char *)psz_out ; pbuf++)
+ {
+ if( pbuf[0] == 0xc2 && pbuf[1] == 0x8a )
+ {
+ pbuf[0] = ' ';
+ pbuf[1] = '\n';
+ }
+ }
+
+
+ }
+ return psz_outstring;
+}
+