2 * Copyright (c) 2007 Mans Rullgard
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef AVUTIL_AVSTRING_H
22 #define AVUTIL_AVSTRING_H
26 #include "attributes.h"
29 * @addtogroup lavu_string
34 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
35 * the address of the first character in str after the prefix.
37 * @param str input string
38 * @param pfx prefix to test
39 * @param ptr updated if the prefix is matched inside str
40 * @return non-zero if the prefix matches, zero otherwise
42 int av_strstart(const char *str, const char *pfx, const char **ptr);
45 * Return non-zero if pfx is a prefix of str independent of case. If
46 * it is, *ptr is set to the address of the first character in str
49 * @param str input string
50 * @param pfx prefix to test
51 * @param ptr updated if the prefix is matched inside str
52 * @return non-zero if the prefix matches, zero otherwise
54 int av_stristart(const char *str, const char *pfx, const char **ptr);
57 * Locate the first case-independent occurrence in the string haystack
58 * of the string needle. A zero-length string needle is considered to
59 * match at the start of haystack.
61 * This function is a case-insensitive version of the standard strstr().
63 * @param haystack string to search in
64 * @param needle string to search for
65 * @return pointer to the located match within haystack
66 * or a null pointer if no match
68 char *av_stristr(const char *haystack, const char *needle);
71 * Locate the first occurrence of the string needle in the string haystack
72 * where not more than hay_length characters are searched. A zero-length
73 * string needle is considered to match at the start of haystack.
75 * This function is a length-limited version of the standard strstr().
77 * @param haystack string to search in
78 * @param needle string to search for
79 * @param hay_length length of string to search in
80 * @return pointer to the located match within haystack
81 * or a null pointer if no match
83 char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
86 * Copy the string src to dst, but no more than size - 1 bytes, and
89 * This function is the same as BSD strlcpy().
91 * @param dst destination buffer
92 * @param src source string
93 * @param size size of destination buffer
94 * @return the length of src
96 * @warning since the return value is the length of src, src absolutely
97 * _must_ be a properly 0-terminated string, otherwise this will read beyond
98 * the end of the buffer and possibly crash.
100 size_t av_strlcpy(char *dst, const char *src, size_t size);
103 * Append the string src to the string dst, but to a total length of
104 * no more than size - 1 bytes, and null-terminate dst.
106 * This function is similar to BSD strlcat(), but differs when
107 * size <= strlen(dst).
109 * @param dst destination buffer
110 * @param src source string
111 * @param size size of destination buffer
112 * @return the total length of src and dst
114 * @warning since the return value use the length of src and dst, these
115 * absolutely _must_ be a properly 0-terminated strings, otherwise this
116 * will read beyond the end of the buffer and possibly crash.
118 size_t av_strlcat(char *dst, const char *src, size_t size);
121 * Append output to a string, according to a format. Never write out of
122 * the destination buffer, and always put a terminating 0 within
124 * @param dst destination buffer (string to which the output is
126 * @param size total size of the destination buffer
127 * @param fmt printf-compatible format string, specifying how the
128 * following parameters are used
129 * @return the length of the string that would have been generated
130 * if enough space had been available
132 size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
135 * Print arguments following specified format into a large enough auto
136 * allocated buffer. It is similar to GNU asprintf().
137 * @param fmt printf-compatible format string, specifying how the
138 * following parameters are used.
139 * @return the allocated string
140 * @note You have to free the string yourself with av_free().
142 char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
145 * Convert a number to a av_malloced string.
147 char *av_d2str(double d);
150 * Unescape the given string until a non escaped terminating char,
151 * and return the token corresponding to the unescaped string.
153 * The normal \ and ' escaping is supported. Leading and trailing
154 * whitespaces are removed, unless they are escaped with '\' or are
155 * enclosed between ''.
157 * @param buf the buffer to parse, buf will be updated to point to the
159 * @param term a 0-terminated list of terminating chars
160 * @return the malloced unescaped string, which must be av_freed by
161 * the user, NULL in case of allocation failure
163 char *av_get_token(const char **buf, const char *term);
166 * Split the string into several tokens which can be accessed by
167 * successive calls to av_strtok().
169 * A token is defined as a sequence of characters not belonging to the
170 * set specified in delim.
172 * On the first call to av_strtok(), s should point to the string to
173 * parse, and the value of saveptr is ignored. In subsequent calls, s
174 * should be NULL, and saveptr should be unchanged since the previous
177 * This function is similar to strtok_r() defined in POSIX.1.
179 * @param s the string to parse, may be NULL
180 * @param delim 0-terminated list of token delimiters, must be non-NULL
181 * @param saveptr user-provided pointer which points to stored
182 * information necessary for av_strtok() to continue scanning the same
183 * string. saveptr is updated to point to the next character after the
184 * first delimiter found, or to NULL if the string was terminated
185 * @return the found token, or NULL when no token is found
187 char *av_strtok(char *s, const char *delim, char **saveptr);
190 * Locale-independent conversion of ASCII isdigit.
192 int av_isdigit(int c);
195 * Locale-independent conversion of ASCII isgraph.
197 int av_isgraph(int c);
200 * Locale-independent conversion of ASCII isspace.
202 int av_isspace(int c);
205 * Locale-independent conversion of ASCII characters to uppercase.
207 static inline int av_toupper(int c)
209 if (c >= 'a' && c <= 'z')
215 * Locale-independent conversion of ASCII characters to lowercase.
217 static inline int av_tolower(int c)
219 if (c >= 'A' && c <= 'Z')
225 * Locale-independent conversion of ASCII isxdigit.
227 int av_isxdigit(int c);
230 * Locale-independent case-insensitive compare.
231 * @note This means only ASCII-range characters are case-insensitive
233 int av_strcasecmp(const char *a, const char *b);
236 * Locale-independent case-insensitive compare.
237 * @note This means only ASCII-range characters are case-insensitive
239 int av_strncasecmp(const char *a, const char *b, size_t n);
243 * Thread safe basename.
244 * @param path the path, on DOS both \ and / are considered separators.
245 * @return pointer to the basename substring.
247 const char *av_basename(const char *path);
250 * Thread safe dirname.
251 * @param path the path, on DOS both \ and / are considered separators.
252 * @return the path with the separator replaced by the string terminator or ".".
253 * @note the function may change the input string.
255 const char *av_dirname(char *path);
258 AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
259 AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
260 AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
264 * Consider spaces special and escape them even in the middle of the
267 * This is equivalent to adding the whitespace characters to the special
268 * characters lists, except it is guaranteed to use the exact same list
269 * of whitespace characters as the rest of libavutil.
271 #define AV_ESCAPE_FLAG_WHITESPACE 0x01
274 * Escape only specified special characters.
275 * Without this flag, escape also any characters that may be considered
276 * special by av_get_token(), such as the single quote.
278 #define AV_ESCAPE_FLAG_STRICT 0x02
281 * Escape string in src, and put the escaped string in an allocated
282 * string in *dst, which must be freed with av_free().
284 * @param dst pointer where an allocated string is put
285 * @param src string to escape, must be non-NULL
286 * @param special_chars string containing the special characters which
287 * need to be escaped, can be NULL
288 * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.
289 * Any unknown value for mode will be considered equivalent to
290 * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
292 * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros
293 * @return the length of the allocated string, or a negative error code in case of error
294 * @see av_bprint_escape()
296 int av_escape(char **dst, const char *src, const char *special_chars,
297 enum AVEscapeMode mode, int flags);
299 #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
300 #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
301 #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
302 #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
304 #define AV_UTF8_FLAG_ACCEPT_ALL \
305 AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
308 * Read and decode a single UTF-8 code point (character) from the
309 * buffer in *buf, and update *buf to point to the next byte to
312 * In case of an invalid byte sequence, the pointer will be updated to
313 * the next byte after the invalid sequence and the function will
314 * return an error code.
316 * Depending on the specified flags, the function will also fail in
317 * case the decoded code point does not belong to a valid range.
319 * @note For speed-relevant code a carefully implemented use of
320 * GET_UTF8() may be preferred.
322 * @param codep pointer used to return the parsed code in case of success.
323 * The value in *codep is set even in case the range check fails.
324 * @param bufp pointer to the address the first byte of the sequence
325 * to decode, updated by the function to point to the
326 * byte next after the decoded sequence
327 * @param buf_end pointer to the end of the buffer, points to the next
328 * byte past the last in the buffer. This is used to
329 * avoid buffer overreads (in case of an unfinished
330 * UTF-8 sequence towards the end of the buffer).
331 * @param flags a collection of AV_UTF8_FLAG_* flags
332 * @return >= 0 in case a sequence was successfully read, a negative
333 * value in case of invalid sequence
335 int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
342 #endif /* AVUTIL_AVSTRING_H */