1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
33 #include <vlc_common.h>
39 * Decodes an encoded URI component. See also decode_URI().
40 * \return decoded string allocated on the heap, or NULL on error.
42 char *decode_URI_duplicate (const char *str)
44 char *buf = strdup (str);
45 if (decode_URI (buf) == NULL)
54 * Decodes an encoded URI component in place.
55 * <b>This function does NOT decode entire URIs.</b> Instead, it decodes one
56 * component at a time (e.g. host name, directory, file name).
57 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
58 * Complete URIs are always "encoded" (or they are syntaxically invalid).
60 * Note that URI encoding is different from Javascript escaping. Especially,
61 * white spaces and Unicode non-ASCII code points are encoded differently.
63 * \param str nul-terminated URI component to decode
64 * \return str on success, NULL if it was not properly encoded
66 char *decode_URI (char *str)
68 char *in = str, *out = str;
73 while ((c = *(in++)) != '\0')
79 if (!(hex[0] = *(in++)) || !(hex[1] = *(in++)))
82 *(out++) = strtoul (hex, NULL, 0x10);
91 static inline bool isurisafe (int c)
93 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
94 return ((unsigned char)(c - 'a') < 26)
95 || ((unsigned char)(c - 'A') < 26)
96 || ((unsigned char)(c - '0') < 10)
97 || (strchr ("-._~", c) != NULL);
100 static char *encode_URI_bytes (const char *str, size_t *restrict lenp)
102 char *buf = malloc (3 * *lenp + 1);
103 if (unlikely(buf == NULL))
107 for (size_t i = 0; i < *lenp; i++)
109 static const char hex[16] = "0123456789ABCDEF";
110 unsigned char c = str[i];
114 /* This is URI encoding, not HTTP forms:
115 * Space is encoded as '%20', not '+'. */
119 *(out++) = hex[c >> 4];
120 *(out++) = hex[c & 0xf];
125 out = realloc (buf, *lenp + 1);
126 return likely(out != NULL) ? out : buf;
130 * Encodes a URI component (RFC3986 §2).
132 * @param str nul-terminated UTF-8 representation of the component.
133 * @note Obviously, a URI containing nul bytes cannot be passed.
134 * @return encoded string (must be free()'d), or NULL for ENOMEM.
136 char *encode_URI_component (const char *str)
138 size_t len = strlen (str);
139 char *ret = encode_URI_bytes (str, &len);
140 if (likely(ret != NULL))
146 * Builds a URL representation from a local file path.
147 * @param path path to convert (or URI to copy)
148 * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
149 * @return a nul-terminated URI string (use free() to release it),
150 * or NULL in case of error
152 char *vlc_path2uri (const char *path, const char *scheme)
156 if (scheme == NULL && !strcmp (path, "-"))
157 return strdup ("fd://0"); // standard input
158 /* Note: VLC cannot handle URI schemes without double slash after the
159 * scheme name (such as mailto: or news:). */
164 char p[strlen (path) + 1];
166 for (buf = p; *path; buf++, path++)
167 *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
173 #if defined( WIN32 ) || defined( __OS2__ )
175 if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
177 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
181 # warning Drive letter-relative path not implemented!
182 if (path[0] != DIR_SEP_CHAR)
187 if (!strncmp (path, "\\\\", 2))
188 { /* Windows UNC paths */
189 #if !defined( WIN32 ) && !defined( __OS2__ )
191 return NULL; /* remote files not supported */
193 /* \\host\share\path -> smb://host/share/path */
194 if (strchr (path + 2, '\\') != NULL)
195 { /* Convert backslashes to slashes */
196 char *dup = strdup (path);
199 for (size_t i = 2; dup[i]; i++)
201 dup[i] = DIR_SEP_CHAR;
203 char *ret = vlc_path2uri (dup, scheme);
207 # define SMB_SCHEME "smb"
209 /* \\host\share\path -> file://host/share/path */
210 # define SMB_SCHEME "file"
212 size_t hostlen = strcspn (path + 2, DIR_SEP);
214 buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
216 snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
217 SMB_SCHEME"://%s", path + 2);
221 return buf; /* Hostname without path */
224 if (path[0] != DIR_SEP_CHAR)
225 { /* Relative path: prepend the current working directory */
228 if ((cwd = vlc_getcwd ()) == NULL)
230 if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
234 ret = (buf != NULL) ? vlc_path2uri (buf, scheme) : NULL;
239 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
244 /* Absolute file path */
245 assert (path[0] == DIR_SEP_CHAR);
248 size_t len = strcspn (++path, DIR_SEP);
251 char *component = encode_URI_bytes (path - len, &len);
252 if (unlikely(component == NULL))
257 component[len] = '\0';
260 int val = asprintf (&uri, "%s/%s", buf, component);
263 if (unlikely(val == -1))
273 * Tries to convert a URI to a local (UTF-8-encoded) file path.
274 * @param url URI to convert
275 * @return NULL on error, a nul-terminated string otherwise
276 * (use free() to release it)
278 char *make_path (const char *url)
283 char *path = strstr (url, "://");
285 return NULL; /* unsupported scheme or invalid syntax */
287 end = memchr (url, '/', path - url);
288 size_t schemelen = ((end != NULL) ? end : path) - url;
289 path += 3; /* skip "://" */
291 /* Remove HTML anchor if present */
292 end = strchr (path, '#');
294 path = strndup (path, end - path);
296 path = strdup (path);
297 if (unlikely(path == NULL))
298 return NULL; /* boom! */
303 if (schemelen == 4 && !strncasecmp (url, "file", 4))
305 #if !defined (WIN32) && !defined (__OS2__)
306 /* Leading slash => local path */
309 /* Local path disguised as a remote one */
310 if (!strncasecmp (path, "localhost/", 10))
311 return memmove (path, path + 9, strlen (path + 9) + 1);
313 /* cannot start with a space */
316 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
319 /* Leading backslash => local path */
321 return memmove (path, path + 1, strlen (path + 1) + 1);
322 /* Local path disguised as a remote one */
323 if (!strncasecmp (path, "localhost\\", 10))
324 return memmove (path, path + 10, strlen (path + 10) + 1);
326 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
329 /* non-local path :-( */
332 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
334 int fd = strtol (path, &end, 0);
339 #if !defined( WIN32 ) && !defined( __OS2__ )
343 ret = strdup ("/dev/stdin");
346 ret = strdup ("/dev/stdout");
349 ret = strdup ("/dev/stderr");
352 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
356 /* XXX: Does this work on WinCE? */
358 ret = strdup ("CON");
366 return ret; /* unknown scheme */
369 static char *vlc_idna_to_ascii (const char *);
372 * Splits an URL into parts.
373 * \param url structure of URL parts [OUT]
374 * \param str nul-terminated URL string to split
375 * \param opt if non-zero, character separating paths from options,
376 * normally the question mark
377 * \note Use vlc_UrlClean() to free associated resources
378 * \bug Errors cannot be detected.
381 void vlc_UrlParse (vlc_url_t *restrict url, const char *str, unsigned char opt)
383 url->psz_protocol = NULL;
384 url->psz_username = NULL;
385 url->psz_password = NULL;
386 url->psz_host = NULL;
388 url->psz_path = NULL;
389 url->psz_option = NULL;
390 url->psz_buffer = NULL;
395 char *buf = strdup (str);
396 if (unlikely(buf == NULL))
398 url->psz_buffer = buf;
400 char *cur = buf, *next;
404 while ((*next >= 'A' && *next <= 'Z') || (*next >= 'a' && *next <= 'z')
405 || (*next >= '0' && *next <= '9') || (strchr ("+-.", *next) != NULL))
407 /* This is not strictly correct. In principles, the scheme is always
408 * present in an absolute URL and followed by a colon. Depending on the
409 * URL scheme, the two subsequent slashes are not required.
410 * VLC uses a different scheme for historical compatibility reasons - the
411 * scheme is often implicit. */
412 if (!strncmp (next, "://", 3))
416 url->psz_protocol = cur;
421 next = strchr (cur, '/');
424 *next = '\0'; /* temporary nul, reset to slash later */
425 url->psz_path = next;
426 if (opt && (next = strchr (next, opt)) != NULL)
429 url->psz_option = next;
433 url->psz_path = "/";*/
436 next = strchr (cur, '@');
440 url->psz_username = cur;
443 /* Password (obsolete) */
444 next = strchr (url->psz_username, ':');
448 url->psz_password = next;
449 decode_URI (url->psz_password);
451 decode_URI (url->psz_username);
455 if (*cur == '[' && (next = strrchr (cur, ']')) != NULL)
456 { /* Try IPv6 numeral within brackets */
458 url->psz_host = strdup (cur + 1);
467 next = strchr (cur, ':');
471 url->psz_host = vlc_idna_to_ascii (cur);
476 url->i_port = atoi (next);
478 if (url->psz_path != NULL)
479 *url->psz_path = '/'; /* restore leading slash */
483 * Releases resources allocated by vlc_UrlParse().
485 void vlc_UrlClean (vlc_url_t *restrict url)
487 free (url->psz_host);
488 free (url->psz_buffer);
491 #if defined (HAVE_IDN)
493 #elif defined (WIN32)
494 # include <windows.h>
495 # include <vlc_charset.h>
499 * Converts a UTF-8 nul-terminated IDN to nul-terminated ASCII domain name.
500 * \param idn UTF-8 Internationalized Domain Name to convert
501 * \return a heap-allocated string or NULL on error.
503 static char *vlc_idna_to_ascii (const char *idn)
505 #if defined (HAVE_IDN)
508 if (idna_to_ascii_8z (idn, &adn, IDNA_ALLOW_UNASSIGNED) != IDNA_SUCCESS)
512 #elif defined (WIN32) && (_WIN32_WINNT >= 0x0601)
515 wchar_t *wide = ToWide (idn);
519 int len = IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, NULL, 0);
523 wchar_t *buf = malloc (sizeof (*buf) * len);
524 if (unlikely(buf == NULL))
526 if (!IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, buf, len))
531 ret = FromWide (buf);
538 /* No IDN support, filter out non-ASCII domain names */
539 for (const char *p = idn; *p; p++)
540 if (((unsigned char)*p) >= 0x80)