avstring.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. /*
  2. * Copyright (c) 2007 Mans Rullgard
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVUTIL_AVSTRING_H
  21. #define AVUTIL_AVSTRING_H
  22. #include <stddef.h>
  23. #include <stdint.h>
  24. #include "attributes.h"
  25. /**
  26. * @addtogroup lavu_string
  27. * @{
  28. */
  29. /**
  30. * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
  31. * the address of the first character in str after the prefix.
  32. *
  33. * @param str input string
  34. * @param pfx prefix to test
  35. * @param ptr updated if the prefix is matched inside str
  36. * @return non-zero if the prefix matches, zero otherwise
  37. */
  38. int av_strstart(const char *str, const char *pfx, const char **ptr);
  39. /**
  40. * Return non-zero if pfx is a prefix of str independent of case. If
  41. * it is, *ptr is set to the address of the first character in str
  42. * after the prefix.
  43. *
  44. * @param str input string
  45. * @param pfx prefix to test
  46. * @param ptr updated if the prefix is matched inside str
  47. * @return non-zero if the prefix matches, zero otherwise
  48. */
  49. int av_stristart(const char *str, const char *pfx, const char **ptr);
  50. /**
  51. * Locate the first case-independent occurrence in the string haystack
  52. * of the string needle. A zero-length string needle is considered to
  53. * match at the start of haystack.
  54. *
  55. * This function is a case-insensitive version of the standard strstr().
  56. *
  57. * @param haystack string to search in
  58. * @param needle string to search for
  59. * @return pointer to the located match within haystack
  60. * or a null pointer if no match
  61. */
  62. char *av_stristr(const char *haystack, const char *needle);
  63. /**
  64. * Locate the first occurrence of the string needle in the string haystack
  65. * where not more than hay_length characters are searched. A zero-length
  66. * string needle is considered to match at the start of haystack.
  67. *
  68. * This function is a length-limited version of the standard strstr().
  69. *
  70. * @param haystack string to search in
  71. * @param needle string to search for
  72. * @param hay_length length of string to search in
  73. * @return pointer to the located match within haystack
  74. * or a null pointer if no match
  75. */
  76. char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
  77. /**
  78. * Copy the string src to dst, but no more than size - 1 bytes, and
  79. * null-terminate dst.
  80. *
  81. * This function is the same as BSD strlcpy().
  82. *
  83. * @param dst destination buffer
  84. * @param src source string
  85. * @param size size of destination buffer
  86. * @return the length of src
  87. *
  88. * @warning since the return value is the length of src, src absolutely
  89. * _must_ be a properly 0-terminated string, otherwise this will read beyond
  90. * the end of the buffer and possibly crash.
  91. */
  92. size_t av_strlcpy(char *dst, const char *src, size_t size);
  93. /**
  94. * Append the string src to the string dst, but to a total length of
  95. * no more than size - 1 bytes, and null-terminate dst.
  96. *
  97. * This function is similar to BSD strlcat(), but differs when
  98. * size <= strlen(dst).
  99. *
  100. * @param dst destination buffer
  101. * @param src source string
  102. * @param size size of destination buffer
  103. * @return the total length of src and dst
  104. *
  105. * @warning since the return value use the length of src and dst, these
  106. * absolutely _must_ be a properly 0-terminated strings, otherwise this
  107. * will read beyond the end of the buffer and possibly crash.
  108. */
  109. size_t av_strlcat(char *dst, const char *src, size_t size);
  110. /**
  111. * Append output to a string, according to a format. Never write out of
  112. * the destination buffer, and always put a terminating 0 within
  113. * the buffer.
  114. * @param dst destination buffer (string to which the output is
  115. * appended)
  116. * @param size total size of the destination buffer
  117. * @param fmt printf-compatible format string, specifying how the
  118. * following parameters are used
  119. * @return the length of the string that would have been generated
  120. * if enough space had been available
  121. */
  122. size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
  123. /**
  124. * Get the count of continuous non zero chars starting from the beginning.
  125. *
  126. * @param len maximum number of characters to check in the string, that
  127. * is the maximum value which is returned by the function
  128. */
  129. static inline size_t av_strnlen(const char *s, size_t len)
  130. {
  131. size_t i;
  132. for (i = 0; i < len && s[i]; i++)
  133. ;
  134. return i;
  135. }
  136. /**
  137. * Print arguments following specified format into a large enough auto
  138. * allocated buffer. It is similar to GNU asprintf().
  139. * @param fmt printf-compatible format string, specifying how the
  140. * following parameters are used.
  141. * @return the allocated string
  142. * @note You have to free the string yourself with av_free().
  143. */
  144. char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
  145. /**
  146. * Convert a number to an av_malloced string.
  147. */
  148. char *av_d2str(double d);
  149. /**
  150. * Unescape the given string until a non escaped terminating char,
  151. * and return the token corresponding to the unescaped string.
  152. *
  153. * The normal \ and ' escaping is supported. Leading and trailing
  154. * whitespaces are removed, unless they are escaped with '\' or are
  155. * enclosed between ''.
  156. *
  157. * @param buf the buffer to parse, buf will be updated to point to the
  158. * terminating char
  159. * @param term a 0-terminated list of terminating chars
  160. * @return the malloced unescaped string, which must be av_freed by
  161. * the user, NULL in case of allocation failure
  162. */
  163. char *av_get_token(const char **buf, const char *term);
  164. /**
  165. * Split the string into several tokens which can be accessed by
  166. * successive calls to av_strtok().
  167. *
  168. * A token is defined as a sequence of characters not belonging to the
  169. * set specified in delim.
  170. *
  171. * On the first call to av_strtok(), s should point to the string to
  172. * parse, and the value of saveptr is ignored. In subsequent calls, s
  173. * should be NULL, and saveptr should be unchanged since the previous
  174. * call.
  175. *
  176. * This function is similar to strtok_r() defined in POSIX.1.
  177. *
  178. * @param s the string to parse, may be NULL
  179. * @param delim 0-terminated list of token delimiters, must be non-NULL
  180. * @param saveptr user-provided pointer which points to stored
  181. * information necessary for av_strtok() to continue scanning the same
  182. * string. saveptr is updated to point to the next character after the
  183. * first delimiter found, or to NULL if the string was terminated
  184. * @return the found token, or NULL when no token is found
  185. */
  186. char *av_strtok(char *s, const char *delim, char **saveptr);
  187. /**
  188. * Locale-independent conversion of ASCII isdigit.
  189. */
  190. static inline av_const int av_isdigit(int c)
  191. {
  192. return c >= '0' && c <= '9';
  193. }
  194. /**
  195. * Locale-independent conversion of ASCII isgraph.
  196. */
  197. static inline av_const int av_isgraph(int c)
  198. {
  199. return c > 32 && c < 127;
  200. }
  201. /**
  202. * Locale-independent conversion of ASCII isspace.
  203. */
  204. static inline av_const int av_isspace(int c)
  205. {
  206. return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
  207. c == '\v';
  208. }
  209. /**
  210. * Locale-independent conversion of ASCII characters to uppercase.
  211. */
  212. static inline av_const int av_toupper(int c)
  213. {
  214. if (c >= 'a' && c <= 'z')
  215. c ^= 0x20;
  216. return c;
  217. }
  218. /**
  219. * Locale-independent conversion of ASCII characters to lowercase.
  220. */
  221. static inline av_const int av_tolower(int c)
  222. {
  223. if (c >= 'A' && c <= 'Z')
  224. c ^= 0x20;
  225. return c;
  226. }
  227. /**
  228. * Locale-independent conversion of ASCII isxdigit.
  229. */
  230. static inline av_const int av_isxdigit(int c)
  231. {
  232. c = av_tolower(c);
  233. return av_isdigit(c) || (c >= 'a' && c <= 'f');
  234. }
  235. /**
  236. * Locale-independent case-insensitive compare.
  237. * @note This means only ASCII-range characters are case-insensitive
  238. */
  239. int av_strcasecmp(const char *a, const char *b);
  240. /**
  241. * Locale-independent case-insensitive compare.
  242. * @note This means only ASCII-range characters are case-insensitive
  243. */
  244. int av_strncasecmp(const char *a, const char *b, size_t n);
  245. /**
  246. * Locale-independent strings replace.
  247. * @note This means only ASCII-range characters are replace
  248. */
  249. char *av_strireplace(const char *str, const char *from, const char *to);
  250. /**
  251. * Thread safe basename.
  252. * @param path the path, on DOS both \ and / are considered separators.
  253. * @return pointer to the basename substring.
  254. */
  255. const char *av_basename(const char *path);
  256. /**
  257. * Thread safe dirname.
  258. * @param path the path, on DOS both \ and / are considered separators.
  259. * @return the path with the separator replaced by the string terminator or ".".
  260. * @note the function may change the input string.
  261. */
  262. const char *av_dirname(char *path);
  263. /**
  264. * Match instances of a name in a comma-separated list of names.
  265. * List entries are checked from the start to the end of the names list,
  266. * the first match ends further processing. If an entry prefixed with '-'
  267. * matches, then 0 is returned. The "ALL" list entry is considered to
  268. * match all names.
  269. *
  270. * @param name Name to look for.
  271. * @param names List of names.
  272. * @return 1 on match, 0 otherwise.
  273. */
  274. int av_match_name(const char *name, const char *names);
  275. /**
  276. * Append path component to the existing path.
  277. * Path separator '/' is placed between when needed.
  278. * Resulting string have to be freed with av_free().
  279. * @param path base path
  280. * @param component component to be appended
  281. * @return new path or NULL on error.
  282. */
  283. char *av_append_path_component(const char *path, const char *component);
  284. enum AVEscapeMode {
  285. AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
  286. AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
  287. AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
  288. };
  289. /**
  290. * Consider spaces special and escape them even in the middle of the
  291. * string.
  292. *
  293. * This is equivalent to adding the whitespace characters to the special
  294. * characters lists, except it is guaranteed to use the exact same list
  295. * of whitespace characters as the rest of libavutil.
  296. */
  297. #define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
  298. /**
  299. * Escape only specified special characters.
  300. * Without this flag, escape also any characters that may be considered
  301. * special by av_get_token(), such as the single quote.
  302. */
  303. #define AV_ESCAPE_FLAG_STRICT (1 << 1)
  304. /**
  305. * Escape string in src, and put the escaped string in an allocated
  306. * string in *dst, which must be freed with av_free().
  307. *
  308. * @param dst pointer where an allocated string is put
  309. * @param src string to escape, must be non-NULL
  310. * @param special_chars string containing the special characters which
  311. * need to be escaped, can be NULL
  312. * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.
  313. * Any unknown value for mode will be considered equivalent to
  314. * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
  315. * notice.
  316. * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros
  317. * @return the length of the allocated string, or a negative error code in case of error
  318. * @see av_bprint_escape()
  319. */
  320. av_warn_unused_result
  321. int av_escape(char **dst, const char *src, const char *special_chars,
  322. enum AVEscapeMode mode, int flags);
  323. #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
  324. #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
  325. #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
  326. #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
  327. #define AV_UTF8_FLAG_ACCEPT_ALL \
  328. AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
  329. /**
  330. * Read and decode a single UTF-8 code point (character) from the
  331. * buffer in *buf, and update *buf to point to the next byte to
  332. * decode.
  333. *
  334. * In case of an invalid byte sequence, the pointer will be updated to
  335. * the next byte after the invalid sequence and the function will
  336. * return an error code.
  337. *
  338. * Depending on the specified flags, the function will also fail in
  339. * case the decoded code point does not belong to a valid range.
  340. *
  341. * @note For speed-relevant code a carefully implemented use of
  342. * GET_UTF8() may be preferred.
  343. *
  344. * @param codep pointer used to return the parsed code in case of success.
  345. * The value in *codep is set even in case the range check fails.
  346. * @param bufp pointer to the address the first byte of the sequence
  347. * to decode, updated by the function to point to the
  348. * byte next after the decoded sequence
  349. * @param buf_end pointer to the end of the buffer, points to the next
  350. * byte past the last in the buffer. This is used to
  351. * avoid buffer overreads (in case of an unfinished
  352. * UTF-8 sequence towards the end of the buffer).
  353. * @param flags a collection of AV_UTF8_FLAG_* flags
  354. * @return >= 0 in case a sequence was successfully read, a negative
  355. * value in case of invalid sequence
  356. */
  357. av_warn_unused_result
  358. int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
  359. unsigned int flags);
  360. /**
  361. * Check if a name is in a list.
  362. * @returns 0 if not found, or the 1 based index where it has been found in the
  363. * list.
  364. */
  365. int av_match_list(const char *name, const char *list, char separator);
  366. /**
  367. * @}
  368. */
  369. #endif /* AVUTIL_AVSTRING_H */