1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
33 #include <vlc_common.h>
39 * Decodes an encoded URI component. See also decode_URI().
40 * \return decoded string allocated on the heap, or NULL on error.
42 char *decode_URI_duplicate (const char *str)
44 char *buf = strdup (str);
45 if (decode_URI (buf) == NULL)
54 * Decodes an encoded URI component in place.
55 * <b>This function does NOT decode entire URIs.</b> Instead, it decodes one
56 * component at a time (e.g. host name, directory, file name).
57 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
58 * Complete URIs are always "encoded" (or they are syntaxically invalid).
60 * Note that URI encoding is different from Javascript escaping. Especially,
61 * white spaces and Unicode non-ASCII code points are encoded differently.
63 * \param str nul-terminated URI component to decode
64 * \return str on success, NULL if it was not properly encoded
66 char *decode_URI (char *str)
68 char *in = str, *out = str;
73 while ((c = *(in++)) != '\0')
79 if (!(hex[0] = *(in++)) || !(hex[1] = *(in++)))
82 *(out++) = strtoul (hex, NULL, 0x10);
88 /* Inserting non-ASCII or non-printable characters is unsafe,
89 * and no sane browser will send these unencoded */
96 static inline bool isurisafe (int c)
98 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
99 return ((unsigned char)(c - 'a') < 26)
100 || ((unsigned char)(c - 'A') < 26)
101 || ((unsigned char)(c - '0') < 10)
102 || (strchr ("-._~", c) != NULL);
105 static char *encode_URI_bytes (const char *str, size_t *restrict lenp)
107 char *buf = malloc (3 * *lenp + 1);
108 if (unlikely(buf == NULL))
112 for (size_t i = 0; i < *lenp; i++)
114 static const char hex[16] = "0123456789ABCDEF";
115 unsigned char c = str[i];
119 /* This is URI encoding, not HTTP forms:
120 * Space is encoded as '%20', not '+'. */
124 *(out++) = hex[c >> 4];
125 *(out++) = hex[c & 0xf];
130 out = realloc (buf, *lenp + 1);
131 return likely(out != NULL) ? out : buf;
135 * Encodes a URI component (RFC3986 §2).
137 * @param str nul-terminated UTF-8 representation of the component.
138 * @note Obviously, a URI containing nul bytes cannot be passed.
139 * @return encoded string (must be free()'d), or NULL for ENOMEM.
141 char *encode_URI_component (const char *str)
143 size_t len = strlen (str);
144 char *ret = encode_URI_bytes (str, &len);
145 if (likely(ret != NULL))
151 * Builds a URL representation from a local file path.
152 * @param path path to convert (or URI to copy)
153 * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
154 * @return a nul-terminated URI string (use free() to release it),
155 * or NULL in case of error
157 char *vlc_path2uri (const char *path, const char *scheme)
161 if (scheme == NULL && !strcmp (path, "-"))
162 return strdup ("fd://0"); // standard input
163 /* Note: VLC cannot handle URI schemes without double slash after the
164 * scheme name (such as mailto: or news:). */
169 char p[strlen (path) + 1];
171 for (buf = p; *path; buf++, path++)
172 *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
178 #if defined( WIN32 ) || defined( __OS2__ )
180 if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
182 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
186 # warning Drive letter-relative path not implemented!
187 if (path[0] != DIR_SEP_CHAR)
192 if (!strncmp (path, "\\\\", 2))
193 { /* Windows UNC paths */
194 #if !defined( WIN32 ) && !defined( __OS2__ )
196 return NULL; /* remote files not supported */
198 /* \\host\share\path -> smb://host/share/path */
199 if (strchr (path + 2, '\\') != NULL)
200 { /* Convert backslashes to slashes */
201 char *dup = strdup (path);
204 for (size_t i = 2; dup[i]; i++)
206 dup[i] = DIR_SEP_CHAR;
208 char *ret = vlc_path2uri (dup, scheme);
212 # define SMB_SCHEME "smb"
214 /* \\host\share\path -> file://host/share/path */
215 # define SMB_SCHEME "file"
217 size_t hostlen = strcspn (path + 2, DIR_SEP);
219 buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
221 snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
222 SMB_SCHEME"://%s", path + 2);
226 return buf; /* Hostname without path */
229 if (path[0] != DIR_SEP_CHAR)
230 { /* Relative path: prepend the current working directory */
233 if ((cwd = vlc_getcwd ()) == NULL)
235 if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
239 ret = (buf != NULL) ? vlc_path2uri (buf, scheme) : NULL;
244 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
249 /* Absolute file path */
250 assert (path[0] == DIR_SEP_CHAR);
253 size_t len = strcspn (++path, DIR_SEP);
256 char *component = encode_URI_bytes (path - len, &len);
257 if (unlikely(component == NULL))
262 component[len] = '\0';
265 int val = asprintf (&uri, "%s/%s", buf, component);
268 if (unlikely(val == -1))
278 * Tries to convert a URI to a local (UTF-8-encoded) file path.
279 * @param url URI to convert
280 * @return NULL on error, a nul-terminated string otherwise
281 * (use free() to release it)
283 char *make_path (const char *url)
288 char *path = strstr (url, "://");
290 return NULL; /* unsupported scheme or invalid syntax */
292 end = memchr (url, '/', path - url);
293 size_t schemelen = ((end != NULL) ? end : path) - url;
294 path += 3; /* skip "://" */
296 /* Remove HTML anchor if present */
297 end = strchr (path, '#');
299 path = strndup (path, end - path);
301 path = strdup (path);
302 if (unlikely(path == NULL))
303 return NULL; /* boom! */
308 if (schemelen == 4 && !strncasecmp (url, "file", 4))
310 #if !defined (WIN32) && !defined (__OS2__)
311 /* Leading slash => local path */
314 /* Local path disguised as a remote one */
315 if (!strncasecmp (path, "localhost/", 10))
316 return memmove (path, path + 9, strlen (path + 9) + 1);
318 /* cannot start with a space */
321 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
324 /* Leading backslash => local path */
326 return memmove (path, path + 1, strlen (path + 1) + 1);
327 /* Local path disguised as a remote one */
328 if (!strncasecmp (path, "localhost\\", 10))
329 return memmove (path, path + 10, strlen (path + 10) + 1);
331 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
334 /* non-local path :-( */
337 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
339 int fd = strtol (path, &end, 0);
344 #if !defined( WIN32 ) && !defined( __OS2__ )
348 ret = strdup ("/dev/stdin");
351 ret = strdup ("/dev/stdout");
354 ret = strdup ("/dev/stderr");
357 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
361 /* XXX: Does this work on WinCE? */
363 ret = strdup ("CON");
371 return ret; /* unknown scheme */
374 static char *vlc_idna_to_ascii (const char *);
377 * Splits an URL into parts.
378 * \param url structure of URL parts [OUT]
379 * \param str nul-terminated URL string to split
380 * \param opt if non-zero, character separating paths from options,
381 * normally the question mark
382 * \note Use vlc_UrlClean() to free associated resources
383 * \bug Errors cannot be detected.
386 void vlc_UrlParse (vlc_url_t *restrict url, const char *str, unsigned char opt)
388 url->psz_protocol = NULL;
389 url->psz_username = NULL;
390 url->psz_password = NULL;
391 url->psz_host = NULL;
393 url->psz_path = NULL;
394 url->psz_option = NULL;
395 url->psz_buffer = NULL;
400 char *buf = strdup (str);
401 if (unlikely(buf == NULL))
403 url->psz_buffer = buf;
405 char *cur = buf, *next;
408 next = strchr (cur, ':');
409 /* This is not strictly correct. In principles, the scheme is always
410 * present in an absolute URL and followed by a colon. Depending on the
411 * URL scheme, the two subsequent slashes are not required.
412 * VLC uses a different scheme for historical compatibility reasons - the
413 * scheme is often implicit. */
414 if (next != NULL && !strncmp (next + 1, "//", 2))
418 url->psz_protocol = cur;
423 next = strchr (cur, '/');
426 *next = '\0'; /* temporary nul, reset to slash later */
427 url->psz_path = next;
428 if (opt && (next = strchr (next, opt)) != NULL)
431 url->psz_option = next;
435 url->psz_path = "/";*/
438 next = strchr (cur, '@');
442 url->psz_username = cur;
445 /* Password (obsolete) */
446 next = strchr (url->psz_username, ':');
450 url->psz_password = next;
451 decode_URI (url->psz_password);
453 decode_URI (url->psz_username);
457 if (*cur == '[' && (next = strrchr (cur, ']')) != NULL)
458 { /* Try IPv6 numeral within brackets */
460 url->psz_host = strdup (cur + 1);
469 next = strchr (cur, ':');
473 url->psz_host = vlc_idna_to_ascii (cur);
478 url->i_port = atoi (next);
480 if (url->psz_path != NULL)
481 *url->psz_path = '/'; /* restore leading slash */
485 * Releases resources allocated by vlc_UrlParse().
487 void vlc_UrlClean (vlc_url_t *restrict url)
489 free (url->psz_host);
490 free (url->psz_buffer);
493 #if defined (HAVE_IDN)
495 #elif defined (WIN32)
496 # include <windows.h>
497 # include <vlc_charset.h>
501 * Converts a UTF-8 nul-terminated IDN to nul-terminated ASCII domain name.
502 * \param idn UTF-8 Internationalized Domain Name to convert
503 * \return a heap-allocated string or NULL on error.
505 static char *vlc_idna_to_ascii (const char *idn)
507 #if defined (HAVE_IDN)
510 if (idna_to_ascii_8z (idn, &adn, IDNA_ALLOW_UNASSIGNED) != IDNA_SUCCESS)
514 #elif defined (WIN32) && (_WIN32_WINNT >= 0x0601)
517 wchar_t *wide = ToWide (idn);
521 int len = IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, NULL, 0);
525 wchar_t *buf = malloc (sizeof (*buf) * len);
526 if (unlikely(buf == NULL))
528 if (!IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, buf, len))
535 ret = FromWide (buf);
542 /* No IDN support, filter out non-ASCII domain names */
543 for (const char *p = idn; *p; p++)
544 if (((unsigned char)*p) >= 0x80)