1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
34 #include <vlc_common.h>
40 * Decodes an encoded URI component. See also decode_URI().
41 * \return decoded string allocated on the heap, or NULL on error.
43 char *decode_URI_duplicate (const char *str)
45 char *buf = strdup (str);
46 if (decode_URI (buf) == NULL)
55 * Decodes an encoded URI component in place.
56 * <b>This function does NOT decode entire URIs.</b> Instead, it decodes one
57 * component at a time (e.g. host name, directory, file name).
58 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
59 * Complete URIs are always "encoded" (or they are syntaxically invalid).
61 * Note that URI encoding is different from Javascript escaping. Especially,
62 * white spaces and Unicode non-ASCII code points are encoded differently.
64 * \param str nul-terminated URI component to decode
65 * \return str on success, NULL if it was not properly encoded
67 char *decode_URI (char *str)
69 char *in = str, *out = str;
74 while ((c = *(in++)) != '\0')
80 if (!(hex[0] = *(in++)) || !(hex[1] = *(in++)))
83 *(out++) = strtoul (hex, NULL, 0x10);
92 static inline bool isurisafe (int c)
94 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
95 return ((unsigned char)(c - 'a') < 26)
96 || ((unsigned char)(c - 'A') < 26)
97 || ((unsigned char)(c - '0') < 10)
98 || (strchr ("-._~", c) != NULL);
101 static char *encode_URI_bytes (const char *str, size_t *restrict lenp)
103 char *buf = malloc (3 * *lenp + 1);
104 if (unlikely(buf == NULL))
108 for (size_t i = 0; i < *lenp; i++)
110 static const char hex[16] = "0123456789ABCDEF";
111 unsigned char c = str[i];
115 /* This is URI encoding, not HTTP forms:
116 * Space is encoded as '%20', not '+'. */
120 *(out++) = hex[c >> 4];
121 *(out++) = hex[c & 0xf];
126 out = realloc (buf, *lenp + 1);
127 return likely(out != NULL) ? out : buf;
131 * Encodes a URI component (RFC3986 §2).
133 * @param str nul-terminated UTF-8 representation of the component.
134 * @note Obviously, a URI containing nul bytes cannot be passed.
135 * @return encoded string (must be free()'d), or NULL for ENOMEM.
137 char *encode_URI_component (const char *str)
139 size_t len = strlen (str);
140 char *ret = encode_URI_bytes (str, &len);
141 if (likely(ret != NULL))
147 * Builds a URL representation from a local file path.
148 * @param path path to convert (or URI to copy)
149 * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
150 * @return a nul-terminated URI string (use free() to release it),
151 * or NULL in case of error (errno will be set accordingly)
153 char *vlc_path2uri (const char *path, const char *scheme)
160 if (scheme == NULL && !strcmp (path, "-"))
161 return strdup ("fd://0"); // standard input
162 /* Note: VLC cannot handle URI schemes without double slash after the
163 * scheme name (such as mailto: or news:). */
168 char p[strlen (path) + 1];
170 for (buf = p; *path; buf++, path++)
171 *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
177 #if defined (_WIN32) || defined (__OS2__)
179 if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
181 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
185 # warning Drive letter-relative path not implemented!
186 if (path[0] != DIR_SEP_CHAR)
193 if (!strncmp (path, "\\\\", 2))
194 { /* Windows UNC paths */
195 /* \\host\share\path -> file://host/share/path */
196 size_t hostlen = strcspn (path + 2, DIR_SEP);
198 buf = malloc (7 + hostlen);
200 snprintf (buf, 7 + hostlen, "file://%s", path + 2);
204 return buf; /* Hostname without path */
208 if (path[0] != DIR_SEP_CHAR)
209 { /* Relative path: prepend the current working directory */
212 if ((cwd = vlc_getcwd ()) == NULL)
214 if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
218 ret = (buf != NULL) ? vlc_path2uri (buf, scheme) : NULL;
223 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
228 /* Absolute file path */
229 assert (path[0] == DIR_SEP_CHAR);
232 size_t len = strcspn (++path, DIR_SEP);
235 char *component = encode_URI_bytes (path - len, &len);
236 if (unlikely(component == NULL))
241 component[len] = '\0';
244 int val = asprintf (&uri, "%s/%s", buf, component);
247 if (unlikely(val == -1))
257 * Tries to convert a URI to a local (UTF-8-encoded) file path.
258 * @param url URI to convert
259 * @return NULL on error, a nul-terminated string otherwise
260 * (use free() to release it)
262 char *make_path (const char *url)
267 char *path = strstr (url, "://");
269 return NULL; /* unsupported scheme or invalid syntax */
271 end = memchr (url, '/', path - url);
272 size_t schemelen = ((end != NULL) ? end : path) - url;
273 path += 3; /* skip "://" */
275 /* Remove HTML anchor if present */
276 end = strchr (path, '#');
278 path = strndup (path, end - path);
280 path = strdup (path);
281 if (unlikely(path == NULL))
282 return NULL; /* boom! */
287 if (schemelen == 4 && !strncasecmp (url, "file", 4))
289 #if !defined (_WIN32) && !defined (__OS2__)
290 /* Leading slash => local path */
293 /* Local path disguised as a remote one */
294 if (!strncasecmp (path, "localhost/", 10))
295 return memmove (path, path + 9, strlen (path + 9) + 1);
297 /* cannot start with a space */
300 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
303 /* Leading backslash => local path */
305 return memmove (path, path + 1, strlen (path + 1) + 1);
306 /* Local path disguised as a remote one */
307 if (!strncasecmp (path, "localhost\\", 10))
308 return memmove (path, path + 10, strlen (path + 10) + 1);
310 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
313 /* non-local path :-( */
316 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
318 int fd = strtol (path, &end, 0);
323 #if !defined( _WIN32 ) && !defined( __OS2__ )
327 ret = strdup ("/dev/stdin");
330 ret = strdup ("/dev/stdout");
333 ret = strdup ("/dev/stderr");
336 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
340 /* XXX: Does this work on WinCE? */
342 ret = strdup ("CON");
350 return ret; /* unknown scheme */
353 static char *vlc_idna_to_ascii (const char *);
356 * Splits an URL into parts.
357 * \param url structure of URL parts [OUT]
358 * \param str nul-terminated URL string to split
359 * \param opt if non-zero, character separating paths from options,
360 * normally the question mark
361 * \note Use vlc_UrlClean() to free associated resources
362 * \bug Errors cannot be detected.
365 void vlc_UrlParse (vlc_url_t *restrict url, const char *str, unsigned char opt)
367 url->psz_protocol = NULL;
368 url->psz_username = NULL;
369 url->psz_password = NULL;
370 url->psz_host = NULL;
372 url->psz_path = NULL;
373 url->psz_option = NULL;
374 url->psz_buffer = NULL;
379 char *buf = strdup (str);
380 if (unlikely(buf == NULL))
382 url->psz_buffer = buf;
384 char *cur = buf, *next;
388 while ((*next >= 'A' && *next <= 'Z') || (*next >= 'a' && *next <= 'z')
389 || (*next >= '0' && *next <= '9') || memchr ("+-.", *next, 3) != NULL)
391 /* This is not strictly correct. In principles, the scheme is always
392 * present in an absolute URL and followed by a colon. Depending on the
393 * URL scheme, the two subsequent slashes are not required.
394 * VLC uses a different scheme for historical compatibility reasons - the
395 * scheme is often implicit. */
396 if (!strncmp (next, "://", 3))
400 url->psz_protocol = cur;
405 next = strchr (cur, '/');
408 *next = '\0'; /* temporary nul, reset to slash later */
409 url->psz_path = next;
410 if (opt && (next = strchr (next, opt)) != NULL)
413 url->psz_option = next;
417 url->psz_path = "/";*/
420 next = strrchr (cur, '@');
424 url->psz_username = cur;
427 /* Password (obsolete) */
428 next = strchr (url->psz_username, ':');
432 url->psz_password = next;
433 decode_URI (url->psz_password);
435 decode_URI (url->psz_username);
439 if (*cur == '[' && (next = strrchr (cur, ']')) != NULL)
440 { /* Try IPv6 numeral within brackets */
442 url->psz_host = strdup (cur + 1);
451 next = strchr (cur, ':');
455 url->psz_host = vlc_idna_to_ascii (cur);
460 url->i_port = atoi (next);
462 if (url->psz_path != NULL)
463 *url->psz_path = '/'; /* restore leading slash */
467 * Releases resources allocated by vlc_UrlParse().
469 void vlc_UrlClean (vlc_url_t *restrict url)
471 free (url->psz_host);
472 free (url->psz_buffer);
475 #if defined (HAVE_IDN)
477 #elif defined (_WIN32)
478 # include <windows.h>
479 # include <vlc_charset.h>
483 * Converts a UTF-8 nul-terminated IDN to nul-terminated ASCII domain name.
484 * \param idn UTF-8 Internationalized Domain Name to convert
485 * \return a heap-allocated string or NULL on error.
487 static char *vlc_idna_to_ascii (const char *idn)
489 #if defined (HAVE_IDN)
492 if (idna_to_ascii_8z (idn, &adn, IDNA_ALLOW_UNASSIGNED) != IDNA_SUCCESS)
496 #elif defined (_WIN32) && (_WIN32_WINNT >= 0x0601)
499 wchar_t *wide = ToWide (idn);
503 int len = IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, NULL, 0);
507 wchar_t *buf = malloc (sizeof (*buf) * len);
508 if (unlikely(buf == NULL))
510 if (!IdnToAscii (IDN_ALLOW_UNASSIGNED, wide, -1, buf, len))
515 ret = FromWide (buf);
522 /* No IDN support, filter out non-ASCII domain names */
523 for (const char *p = idn; *p; p++)
524 if (((unsigned char)*p) >= 0x80)