]> git.sesse.net Git - vlc/blob - src/text/strings.c
Move encoding and language stuff to their own directory
[vlc] / src / text / strings.c
1 /*****************************************************************************
2  * strings.c: String related functions
3  *****************************************************************************
4  * Copyright (C) 2006 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Antoine Cellerier <dionoea at videolan dot org>
8  *          Daniel Stranger <vlc at schmaller dot de>
9  *          RĂ©mi Denis-Courmont <rem # videolan org>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24  *****************************************************************************/
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include <vlc/vlc.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <assert.h>
34
35 /* Needed by str_format_time */
36 #include <time.h>
37
38 /* Needed by str_format_meta */
39 #include "vlc_input.h"
40 #include "vlc_meta.h"
41 #include "vlc_playlist.h"
42 #include <vlc/aout.h>
43
44 #include "vlc_strings.h"
45 #include "vlc_url.h"
46 #include "charset.h"
47
48 /**
49  * Unescape URI encoded string
50  * \return decoded duplicated string
51  */
52 char *unescape_URI_duplicate( const char *psz )
53 {
54     char *psz_dup = strdup( psz );
55     unescape_URI( psz_dup );
56     return psz_dup;
57 }
58
59 /**
60  * Unescape URI encoded string in place
61  * \return nothing
62  */
63 void unescape_URI( char *psz )
64 {
65     unsigned char *in = (unsigned char *)psz, *out = in, c;
66
67     while( ( c = *in++ ) != '\0' )
68     {
69         switch( c )
70         {
71             case '%':
72             {
73                 char val[5], *pval = val;
74                 unsigned long cp;
75
76                 switch( c = *in++ )
77                 {
78                     case '\0':
79                         return;
80
81                     case 'u':
82                     case 'U':
83                         if( ( *pval++ = *in++ ) == '\0' )
84                             return;
85                         if( ( *pval++ = *in++ ) == '\0' )
86                             return;
87                         c = *in++;
88
89                     default:
90                         *pval++ = c;
91                         if( ( *pval++ = *in++ ) == '\0' )
92                             return;
93                         *pval = '\0';
94                 }
95
96                 cp = strtoul( val, NULL, 0x10 );
97                 if( cp < 0x80 )
98                     *out++ = cp;
99                 else
100                 if( cp < 0x800 )
101                 {
102                     *out++ = (( cp >>  6)         | 0xc0);
103                     *out++ = (( cp        & 0x3f) | 0x80);
104                 }
105                 else
106                 {
107                     assert( cp < 0x10000 );
108                     *out++ = (( cp >> 12)         | 0xe0);
109                     *out++ = (((cp >>  6) & 0x3f) | 0x80);
110                     *out++ = (( cp        & 0x3f) | 0x80);
111                 }
112                 break;
113             }
114
115             /* + is not a special case - it means plus, not space. */
116
117             default:
118                 /* Inserting non-ASCII or non-printable characters is unsafe,
119                  * and no sane browser will send these unencoded */
120                 if( ( c < 32 ) || ( c > 127 ) )
121                     *out++ = '?';
122                 else
123                     *out++ = c;
124         }
125     }
126     *out = '\0';
127 }
128
129 /**
130  * Decode encoded URI string
131  * \return decoded duplicated string
132  */
133 char *decode_URI_duplicate( const char *psz )
134 {
135     char *psz_dup = strdup( psz );
136     decode_URI( psz_dup );
137     return psz_dup;
138 }
139
140 /**
141  * Decode encoded URI string in place
142  * \return nothing
143  */
144 void decode_URI( char *psz )
145 {
146     unsigned char *in = (unsigned char *)psz, *out = in, c;
147
148     while( ( c = *in++ ) != '\0' )
149     {
150         switch( c )
151         {
152             case '%':
153             {
154                 char hex[3];
155
156                 if( ( ( hex[0] = *in++ ) == 0 )
157                  || ( ( hex[1] = *in++ ) == 0 ) )
158                     return;
159
160                 hex[2] = '\0';
161                 *out++ = (unsigned char)strtoul( hex, NULL, 0x10 );
162                 break;
163             }
164
165             case '+':
166                 *out++ = ' ';
167                 break;
168
169             default:
170                 /* Inserting non-ASCII or non-printable characters is unsafe,
171                  * and no sane browser will send these unencoded */
172                 if( ( c < 32 ) || ( c > 127 ) )
173                     *out++ = '?';
174                 else
175                     *out++ = c;
176         }
177     }
178     *out = '\0';
179     EnsureUTF8( psz );
180 }
181
182 static inline int isurlsafe( int c )
183 {
184     return ( (unsigned char)( c - 'a' ) < 26 )
185             || ( (unsigned char)( c - 'A' ) < 26 )
186             || ( (unsigned char)( c - '0' ) < 10 )
187         /* Hmm, we should not encode character that are allowed in URLs
188          * (even if they are not URL-safe), nor URL-safe characters.
189          * We still encode some of them because of Microsoft's crap browser.
190          */
191             || ( strchr( "-_.", c ) != NULL );
192 }
193
194 static inline char url_hexchar( int c )
195 {
196     return ( c < 10 ) ? c + '0' : c + 'A' - 10;
197 }
198
199 /**
200  * encode_URI_component
201  * Encodes an URI component.
202  *
203  * @param psz_url nul-terminated UTF-8 representation of the component.
204  * Obviously, you can't pass an URI containing a nul character, but you don't
205  * want to do that, do you?
206  *
207  * @return encoded string (must be free()'d)
208  */
209 char *encode_URI_component( const char *psz_url )
210 {
211     char psz_enc[3 * strlen( psz_url ) + 1], *out = psz_enc;
212     const uint8_t *in;
213
214     for( in = (const uint8_t *)psz_url; *in; in++ )
215     {
216         uint8_t c = *in;
217
218         if( isurlsafe( c ) )
219             *out++ = (char)c;
220         else
221         if ( c == ' ')
222             *out++ = '+';
223         else
224         {
225             *out++ = '%';
226             *out++ = url_hexchar( c >> 4 );
227             *out++ = url_hexchar( c & 0xf );
228         }
229     }
230     *out++ = '\0';
231
232     return strdup( psz_enc );
233 }
234
235 /**
236  * Converts "&lt;", "&gt;" and "&amp;" to "<", ">" and "&"
237  * \param string to convert
238  */
239 void resolve_xml_special_chars( char *psz_value )
240 {
241     char *p_pos = psz_value;
242
243     while ( *psz_value )
244     {
245         if( !strncmp( psz_value, "&lt;", 4 ) )
246         {
247             *p_pos = '<';
248             psz_value += 4;
249         }
250         else if( !strncmp( psz_value, "&gt;", 4 ) )
251         {
252             *p_pos = '>';
253             psz_value += 4;
254         }
255         else if( !strncmp( psz_value, "&amp;", 5 ) )
256         {
257             *p_pos = '&';
258             psz_value += 5;
259         }
260         else if( !strncmp( psz_value, "&quot;", 6 ) )
261         {
262             *p_pos = '\"';
263             psz_value += 6;
264         }
265         else if( !strncmp( psz_value, "&#039;", 6 ) )
266         {
267             *p_pos = '\'';
268             psz_value += 6;
269         }
270         else
271         {
272             *p_pos = *psz_value;
273             psz_value++;
274         }
275
276         p_pos++;
277     }
278
279     *p_pos = '\0';
280 }
281
282 /**
283  * Converts '<', '>', '\"', '\'' and '&' to their html entities
284  * \param psz_content simple element content that is to be converted
285  */
286 char *convert_xml_special_chars( const char *psz_content )
287 {
288     char *psz_temp = malloc( 6 * strlen( psz_content ) + 1 );
289     const char *p_from = psz_content;
290     char *p_to   = psz_temp;
291
292     while ( *p_from )
293     {
294         if ( *p_from == '<' )
295         {
296             strcpy( p_to, "&lt;" );
297             p_to += 4;
298         }
299         else if ( *p_from == '>' )
300         {
301             strcpy( p_to, "&gt;" );
302             p_to += 4;
303         }
304         else if ( *p_from == '&' )
305         {
306             strcpy( p_to, "&amp;" );
307             p_to += 5;
308         }
309         else if( *p_from == '\"' )
310         {
311             strcpy( p_to, "&quot;" );
312             p_to += 6;
313         }
314         else if( *p_from == '\'' )
315         {
316             strcpy( p_to, "&#039;" );
317             p_to += 6;
318         }
319         else
320         {
321             *p_to = *p_from;
322             p_to++;
323         }
324         p_from++;
325     }
326     *p_to = '\0';
327
328     return psz_temp;
329 }
330
331 /****************************************************************************
332  * String formating functions
333  ****************************************************************************/
334 char *str_format_time(char *tformat )
335 {
336     char buffer[255];
337     time_t curtime;
338 #if defined(HAVE_LOCALTIME_R)
339     struct tm loctime;
340 #else
341     struct tm *loctime;
342 #endif
343
344     /* Get the current time.  */
345     curtime = time( NULL );
346
347     /* Convert it to local time representation.  */
348 #if defined(HAVE_LOCALTIME_R)
349     localtime_r( &curtime, &loctime );
350     strftime( buffer, 255, tformat, &loctime );
351 #else
352     loctime = localtime( &curtime );
353     strftime( buffer, 255, tformat, loctime );
354 #endif
355     return strdup( buffer );
356 }
357
358 #define INSERT_STRING( check, string )                              \
359                     if( check && string )                           \
360                     {                                               \
361                         int len = strlen( string );                 \
362                         dst = realloc( dst,                         \
363                                        i_size = i_size + len + 1 ); \
364                         strncpy( d, string, len+1 );                \
365                         d += len;                                   \
366                     }                                               \
367                     else                                            \
368                     {                                               \
369                         *d = '-';                                   \
370                         d++;                                        \
371                     }
372 char *__str_format_meta( vlc_object_t *p_object, char *string )
373 {
374     char *s = string;
375     char *dst = malloc( 1000 );
376     char *d = dst;
377     int b_is_format = 0;
378     char buf[10];
379     int i_size = strlen( string );
380
381     playlist_t *p_playlist = pl_Yield( p_object );
382     input_thread_t *p_input = p_playlist->p_input;
383     input_item_t *p_item = NULL;
384     pl_Release( p_object );
385     if( p_input )
386     {
387         vlc_object_yield( p_input );
388         p_item = p_input->input.p_item;
389         if( p_item )
390             vlc_mutex_lock( &p_item->lock );
391     }
392
393     sprintf( dst, string );
394
395     while( *s )
396     {
397         if( b_is_format )
398         {
399             switch( *s )
400             {
401                 case 'a':
402                     INSERT_STRING( p_item && p_item->p_meta,
403                                    p_item->p_meta->psz_artist );
404                     break;
405                 case 'b':
406                     INSERT_STRING( p_item && p_item->p_meta,
407                                    p_item->p_meta->psz_album );
408                     break;
409                 case 'c':
410                     INSERT_STRING( p_item && p_item->p_meta,
411                                    p_item->p_meta->psz_copyright );
412                     break;
413                 case 'd':
414                     INSERT_STRING( p_item && p_item->p_meta,
415                                    p_item->p_meta->psz_description );
416                     break;
417                 case 'e':
418                     INSERT_STRING( p_item && p_item->p_meta,
419                                    p_item->p_meta->psz_encodedby );
420                     break;
421                 case 'g':
422                     INSERT_STRING( p_item && p_item->p_meta,
423                                    p_item->p_meta->psz_genre );
424                     break;
425                 case 'l':
426                     INSERT_STRING( p_item && p_item->p_meta,
427                                    p_item->p_meta->psz_language );
428                     break;
429                 case 'n':
430                     INSERT_STRING( p_item && p_item->p_meta,
431                                    p_item->p_meta->psz_tracknum );
432                     break;
433                 case 'p':
434                     INSERT_STRING( p_item && p_item->p_meta,
435                                    p_item->p_meta->psz_nowplaying );
436                     break;
437                 case 'r':
438                     INSERT_STRING( p_item && p_item->p_meta,
439                                    p_item->p_meta->psz_rating );
440                     break;
441                 case 's':
442                 {
443                     char *lang;
444                     if( p_input )
445                     {
446                         lang = var_GetString( p_input, "sub-language" );
447                     }
448                     else
449                     {
450                         lang = strdup( "-" );
451                     }
452                     INSERT_STRING( 1, lang );
453                     free( lang );
454                     break;
455                 }
456                 case 't':
457                     INSERT_STRING( p_item && p_item->p_meta,
458                                    p_item->p_meta->psz_title );
459                     break;
460                 case 'u':
461                     INSERT_STRING( p_item && p_item->p_meta,
462                                    p_item->p_meta->psz_url );
463                     break;
464                 case 'A':
465                     INSERT_STRING( p_item && p_item->p_meta,
466                                    p_item->p_meta->psz_date );
467                     break;
468                 case 'B':
469                     if( p_input )
470                     {
471                         snprintf( buf, 10, "%d",
472                                   var_GetInteger( p_input, "bit-rate" )/1000 );
473                     }
474                     else
475                     {
476                         sprintf( buf, "-" );
477                     }
478                     INSERT_STRING( 1, buf );
479                     break;
480                 case 'C':
481                     if( p_input )
482                     {
483                         snprintf( buf, 10, "%d",
484                                   var_GetInteger( p_input, "chapter" ) );
485                     }
486                     else
487                     {
488                         sprintf( buf, "-" );
489                     }
490                     INSERT_STRING( 1, buf );
491                     break;
492                 case 'D':
493                     if( p_item )
494                     {
495                         sprintf( buf, "%02d:%02d:%02d",
496                                  (int)(p_item->i_duration/(3600000000)),
497                                  (int)((p_item->i_duration/(60000000))%60),
498                                  (int)((p_item->i_duration/1000000)%60) );
499                     }
500                     else
501                     {
502                         sprintf( buf, "--:--:--" );
503                     }
504                     INSERT_STRING( 1, buf );
505                     break;
506                 case 'F':
507                     INSERT_STRING( p_item, p_item->psz_uri );
508                     break;
509                 case 'I':
510                     if( p_input )
511                     {
512                         snprintf( buf, 10, "%d",
513                                   var_GetInteger( p_input, "title" ) );
514                     }
515                     else
516                     {
517                         sprintf( buf, "-" );
518                     }
519                     INSERT_STRING( 1, buf );
520                     break;
521                 case 'L':
522                     if( p_item && p_input )
523                     {
524                         sprintf( buf, "%02d:%02d:%02d",
525                      (int)((p_item->i_duration-p_input->i_time)/(3600000000)),
526                      (int)(((p_item->i_duration-p_input->i_time)/(60000000))%60),
527                      (int)(((p_item->i_duration-p_input->i_time)/1000000)%60) );
528                     }
529                     else
530                     {
531                         sprintf( buf, "--:--:--" );
532                     }
533                     INSERT_STRING( 1, buf );
534                     break;
535                 case 'N':
536                     INSERT_STRING( p_item, p_item->psz_name );
537                     break;
538                 case 'O':
539                 {
540                     char *lang;
541                     if( p_input )
542                     {
543                         lang = var_GetString( p_input, "audio-language" );
544                     }
545                     else
546                     {
547                         lang = strdup( "-" );
548                     }
549                     INSERT_STRING( 1, lang );
550                     free( lang );
551                     break;
552                 }
553                 case 'P':
554                     if( p_input )
555                     {
556                         snprintf( buf, 10, "%2.1lf",
557                                   var_GetFloat( p_input, "position" ) * 100. );
558                     }
559                     else
560                     {
561                         sprintf( buf, "--.-%%" );
562                     }
563                     INSERT_STRING( 1, buf );
564                     break;
565                 case 'R':
566                     if( p_input )
567                     {
568                         int r = var_GetInteger( p_input, "rate" );
569                         snprintf( buf, 10, "%d.%d", r/1000, r%1000 );
570                     }
571                     else
572                     {
573                         sprintf( buf, "-" );
574                     }
575                     INSERT_STRING( 1, buf );
576                     break;
577                 case 'S':
578                     if( p_input )
579                     {
580                         int r = var_GetInteger( p_input, "sample-rate" );
581                         snprintf( buf, 10, "%d.%d", r/1000, (r/100)%10 );
582                     }
583                     else
584                     {
585                         sprintf( buf, "-" );
586                     }
587                     INSERT_STRING( 1, buf );
588                     break;
589                 case 'T':
590                     if( p_input )
591                     {
592                         sprintf( buf, "%02d:%02d:%02d",
593                                  (int)(p_input->i_time/(3600000000)),
594                                  (int)((p_input->i_time/(60000000))%60),
595                                  (int)((p_input->i_time/1000000)%60) );
596                     }
597                     else
598                     {
599                         sprintf( buf, "--:--:--" );
600                     }
601                     INSERT_STRING( 1, buf );
602                     break;
603                 case 'U':
604                     INSERT_STRING( p_item && p_item->p_meta,
605                                    p_item->p_meta->psz_publisher );
606                     break;
607                 case 'V':
608                 {
609                     audio_volume_t volume;
610                     aout_VolumeGet( p_object, &volume );
611                     snprintf( buf, 10, "%d", volume );
612                     INSERT_STRING( 1, buf );
613                     break;
614                 }
615                 case '_':
616                     *d = '\n';
617                     d++;
618                     break;
619
620                 default:
621                     *d = *s;
622                     d++;
623                     break;
624             }
625             b_is_format = 0;
626         }
627         else if( *s == '$' )
628         {
629             b_is_format = 1;
630         }
631         else
632         {
633             *d = *s;
634             d++;
635         }
636         s++;
637     }
638     *d = '\0';
639
640     if( p_input )
641     {
642         vlc_object_release( p_input );
643         if( p_item )
644             vlc_mutex_unlock( &p_item->lock );
645     }
646
647     return dst;
648 }