1 /*****************************************************************************
2 * url.c: URL related functions
3 *****************************************************************************
4 * Copyright (C) 2006 VLC authors and VideoLAN
5 * Copyright (C) 2008-2012 Rémi Denis-Courmont
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 *****************************************************************************/
33 #include <vlc_common.h>
38 * Decodes an encoded URI component. See also decode_URI().
39 * \return decoded string allocated on the heap, or NULL on error.
41 char *decode_URI_duplicate (const char *str)
43 char *buf = strdup (str);
44 if (decode_URI (buf) == NULL)
53 * Decodes an encoded URI component in place.
54 * <b>This function does NOT decode entire URIs.</b> Instead, it decodes one
55 * component at a time (e.g. host name, directory, file name).
56 * Decoded URIs do not exist in the real world (see RFC3986 §2.4).
57 * Complete URIs are always "encoded" (or they are syntaxically invalid).
59 * Note that URI encoding is different from Javascript escaping. Especially,
60 * white spaces and Unicode non-ASCII code points are encoded differently.
62 * \param str nul-terminated URI component to decode
63 * \return str on success, NULL if it was not properly encoded
65 char *decode_URI (char *str)
67 char *in = str, *out = str;
72 while ((c = *(in++)) != '\0')
78 if (!(hex[0] = *(in++)) || !(hex[1] = *(in++)))
81 *(out++) = strtoul (hex, NULL, 0x10);
87 /* Inserting non-ASCII or non-printable characters is unsafe,
88 * and no sane browser will send these unencoded */
95 static inline bool isurisafe (int c)
97 /* These are the _unreserved_ URI characters (RFC3986 §2.3) */
98 return ((unsigned char)(c - 'a') < 26)
99 || ((unsigned char)(c - 'A') < 26)
100 || ((unsigned char)(c - '0') < 10)
101 || (strchr ("-._~", c) != NULL);
104 static char *encode_URI_bytes (const char *str, size_t *restrict lenp)
106 char *buf = malloc (3 * *lenp + 1);
107 if (unlikely(buf == NULL))
111 for (size_t i = 0; i < *lenp; i++)
113 static const char hex[16] = "0123456789ABCDEF";
114 unsigned char c = str[i];
118 /* This is URI encoding, not HTTP forms:
119 * Space is encoded as '%20', not '+'. */
123 *(out++) = hex[c >> 4];
124 *(out++) = hex[c & 0xf];
129 out = realloc (buf, *lenp + 1);
130 return likely(out != NULL) ? out : buf;
134 * Encodes a URI component (RFC3986 §2).
136 * @param str nul-terminated UTF-8 representation of the component.
137 * @note Obviously, a URI containing nul bytes cannot be passed.
138 * @return encoded string (must be free()'d), or NULL for ENOMEM.
140 char *encode_URI_component (const char *str)
142 size_t len = strlen (str);
143 char *ret = encode_URI_bytes (str, &len);
144 if (likely(ret != NULL))
150 * Builds a URL representation from a local file path.
151 * @param path path to convert (or URI to copy)
152 * @param scheme URI scheme to use (default is auto: "file", "fd" or "smb")
153 * @return a nul-terminated URI string (use free() to release it),
154 * or NULL in case of error
156 char *vlc_path2uri (const char *path, const char *scheme)
160 if (scheme == NULL && !strcmp (path, "-"))
161 return strdup ("fd://0"); // standard input
162 /* Note: VLC cannot handle URI schemes without double slash after the
163 * scheme name (such as mailto: or news:). */
168 char p[strlen (path) + 1];
170 for (buf = p; *path; buf++, path++)
171 *buf = (*path == '/') ? DIR_SEP_CHAR : *path;
177 #if defined( WIN32 ) || defined( __OS2__ )
179 if (isalpha ((unsigned char)path[0]) && (path[1] == ':'))
181 if (asprintf (&buf, "%s:///%c:", scheme ? scheme : "file",
185 # warning Drive letter-relative path not implemented!
186 if (path[0] != DIR_SEP_CHAR)
191 if (!strncmp (path, "\\\\", 2))
192 { /* Windows UNC paths */
193 #if !defined( WIN32 ) && !defined( __OS2__ )
195 return NULL; /* remote files not supported */
197 /* \\host\share\path -> smb://host/share/path */
198 if (strchr (path + 2, '\\') != NULL)
199 { /* Convert backslashes to slashes */
200 char *dup = strdup (path);
203 for (size_t i = 2; dup[i]; i++)
205 dup[i] = DIR_SEP_CHAR;
207 char *ret = vlc_path2uri (dup, scheme);
211 # define SMB_SCHEME "smb"
213 /* \\host\share\path -> file://host/share/path */
214 # define SMB_SCHEME "file"
216 size_t hostlen = strcspn (path + 2, DIR_SEP);
218 buf = malloc (sizeof (SMB_SCHEME) + 3 + hostlen);
220 snprintf (buf, sizeof (SMB_SCHEME) + 3 + hostlen,
221 SMB_SCHEME"://%s", path + 2);
225 return buf; /* Hostname without path */
228 if (path[0] != DIR_SEP_CHAR)
229 { /* Relative path: prepend the current working directory */
232 if ((cwd = vlc_getcwd ()) == NULL)
234 if (asprintf (&buf, "%s"DIR_SEP"%s", cwd, path) == -1)
238 ret = (buf != NULL) ? vlc_path2uri (buf, scheme) : NULL;
243 if (asprintf (&buf, "%s://", scheme ? scheme : "file") == -1)
248 /* Absolute file path */
249 assert (path[0] == DIR_SEP_CHAR);
252 size_t len = strcspn (++path, DIR_SEP);
255 char *component = encode_URI_bytes (path - len, &len);
256 if (unlikely(component == NULL))
261 component[len] = '\0';
264 int val = asprintf (&uri, "%s/%s", buf, component);
267 if (unlikely(val == -1))
277 * Tries to convert a URI to a local (UTF-8-encoded) file path.
278 * @param url URI to convert
279 * @return NULL on error, a nul-terminated string otherwise
280 * (use free() to release it)
282 char *make_path (const char *url)
287 char *path = strstr (url, "://");
289 return NULL; /* unsupported scheme or invalid syntax */
291 end = memchr (url, '/', path - url);
292 size_t schemelen = ((end != NULL) ? end : path) - url;
293 path += 3; /* skip "://" */
295 /* Remove HTML anchor if present */
296 end = strchr (path, '#');
298 path = strndup (path, end - path);
300 path = strdup (path);
301 if (unlikely(path == NULL))
302 return NULL; /* boom! */
307 if (schemelen == 4 && !strncasecmp (url, "file", 4))
309 #if (!defined (WIN32) && !defined (__OS2__)) || defined (UNDER_CE)
310 /* Leading slash => local path */
313 /* Local path disguised as a remote one */
314 if (!strncasecmp (path, "localhost/", 10))
315 return memmove (path, path + 9, strlen (path + 9) + 1);
317 /* cannot start with a space */
320 for (char *p = strchr (path, '/'); p; p = strchr (p + 1, '/'))
323 /* Leading backslash => local path */
325 return memmove (path, path + 1, strlen (path + 1) + 1);
326 /* Local path disguised as a remote one */
327 if (!strncasecmp (path, "localhost\\", 10))
328 return memmove (path, path + 10, strlen (path + 10) + 1);
330 if (*path && asprintf (&ret, "\\\\%s", path) == -1)
333 /* non-local path :-( */
336 if (schemelen == 2 && !strncasecmp (url, "fd", 2))
338 int fd = strtol (path, &end, 0);
343 #if !defined( WIN32 ) && !defined( __OS2__ )
347 ret = strdup ("/dev/stdin");
350 ret = strdup ("/dev/stdout");
353 ret = strdup ("/dev/stderr");
356 if (asprintf (&ret, "/dev/fd/%d", fd) == -1)
360 /* XXX: Does this work on WinCE? */
362 ret = strdup ("CON");
370 return ret; /* unknown scheme */