You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
12KB

  1. /*
  2. * Copyright (c) 2007 Mans Rullgard
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVUTIL_AVSTRING_H
  21. #define AVUTIL_AVSTRING_H
  22. #include <stddef.h>
  23. #include <stdint.h>
  24. #include "attributes.h"
  25. /**
  26. * @addtogroup lavu_string
  27. * @{
  28. */
  29. /**
  30. * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
  31. * the address of the first character in str after the prefix.
  32. *
  33. * @param str input string
  34. * @param pfx prefix to test
  35. * @param ptr updated if the prefix is matched inside str
  36. * @return non-zero if the prefix matches, zero otherwise
  37. */
  38. int av_strstart(const char *str, const char *pfx, const char **ptr);
  39. /**
  40. * Return non-zero if pfx is a prefix of str independent of case. If
  41. * it is, *ptr is set to the address of the first character in str
  42. * after the prefix.
  43. *
  44. * @param str input string
  45. * @param pfx prefix to test
  46. * @param ptr updated if the prefix is matched inside str
  47. * @return non-zero if the prefix matches, zero otherwise
  48. */
  49. int av_stristart(const char *str, const char *pfx, const char **ptr);
  50. /**
  51. * Locate the first case-independent occurrence in the string haystack
  52. * of the string needle. A zero-length string needle is considered to
  53. * match at the start of haystack.
  54. *
  55. * This function is a case-insensitive version of the standard strstr().
  56. *
  57. * @param haystack string to search in
  58. * @param needle string to search for
  59. * @return pointer to the located match within haystack
  60. * or a null pointer if no match
  61. */
  62. char *av_stristr(const char *haystack, const char *needle);
  63. /**
  64. * Locate the first occurrence of the string needle in the string haystack
  65. * where not more than hay_length characters are searched. A zero-length
  66. * string needle is considered to match at the start of haystack.
  67. *
  68. * This function is a length-limited version of the standard strstr().
  69. *
  70. * @param haystack string to search in
  71. * @param needle string to search for
  72. * @param hay_length length of string to search in
  73. * @return pointer to the located match within haystack
  74. * or a null pointer if no match
  75. */
  76. char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
  77. /**
  78. * Copy the string src to dst, but no more than size - 1 bytes, and
  79. * null-terminate dst.
  80. *
  81. * This function is the same as BSD strlcpy().
  82. *
  83. * @param dst destination buffer
  84. * @param src source string
  85. * @param size size of destination buffer
  86. * @return the length of src
  87. *
  88. * @warning since the return value is the length of src, src absolutely
  89. * _must_ be a properly 0-terminated string, otherwise this will read beyond
  90. * the end of the buffer and possibly crash.
  91. */
  92. size_t av_strlcpy(char *dst, const char *src, size_t size);
  93. /**
  94. * Append the string src to the string dst, but to a total length of
  95. * no more than size - 1 bytes, and null-terminate dst.
  96. *
  97. * This function is similar to BSD strlcat(), but differs when
  98. * size <= strlen(dst).
  99. *
  100. * @param dst destination buffer
  101. * @param src source string
  102. * @param size size of destination buffer
  103. * @return the total length of src and dst
  104. *
  105. * @warning since the return value use the length of src and dst, these
  106. * absolutely _must_ be a properly 0-terminated strings, otherwise this
  107. * will read beyond the end of the buffer and possibly crash.
  108. */
  109. size_t av_strlcat(char *dst, const char *src, size_t size);
  110. /**
  111. * Append output to a string, according to a format. Never write out of
  112. * the destination buffer, and always put a terminating 0 within
  113. * the buffer.
  114. * @param dst destination buffer (string to which the output is
  115. * appended)
  116. * @param size total size of the destination buffer
  117. * @param fmt printf-compatible format string, specifying how the
  118. * following parameters are used
  119. * @return the length of the string that would have been generated
  120. * if enough space had been available
  121. */
  122. size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
  123. /**
  124. * Print arguments following specified format into a large enough auto
  125. * allocated buffer. It is similar to GNU asprintf().
  126. * @param fmt printf-compatible format string, specifying how the
  127. * following parameters are used.
  128. * @return the allocated string
  129. * @note You have to free the string yourself with av_free().
  130. */
  131. char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
  132. /**
  133. * Convert a number to a av_malloced string.
  134. */
  135. char *av_d2str(double d);
  136. /**
  137. * Unescape the given string until a non escaped terminating char,
  138. * and return the token corresponding to the unescaped string.
  139. *
  140. * The normal \ and ' escaping is supported. Leading and trailing
  141. * whitespaces are removed, unless they are escaped with '\' or are
  142. * enclosed between ''.
  143. *
  144. * @param buf the buffer to parse, buf will be updated to point to the
  145. * terminating char
  146. * @param term a 0-terminated list of terminating chars
  147. * @return the malloced unescaped string, which must be av_freed by
  148. * the user, NULL in case of allocation failure
  149. */
  150. char *av_get_token(const char **buf, const char *term);
  151. /**
  152. * Split the string into several tokens which can be accessed by
  153. * successive calls to av_strtok().
  154. *
  155. * A token is defined as a sequence of characters not belonging to the
  156. * set specified in delim.
  157. *
  158. * On the first call to av_strtok(), s should point to the string to
  159. * parse, and the value of saveptr is ignored. In subsequent calls, s
  160. * should be NULL, and saveptr should be unchanged since the previous
  161. * call.
  162. *
  163. * This function is similar to strtok_r() defined in POSIX.1.
  164. *
  165. * @param s the string to parse, may be NULL
  166. * @param delim 0-terminated list of token delimiters, must be non-NULL
  167. * @param saveptr user-provided pointer which points to stored
  168. * information necessary for av_strtok() to continue scanning the same
  169. * string. saveptr is updated to point to the next character after the
  170. * first delimiter found, or to NULL if the string was terminated
  171. * @return the found token, or NULL when no token is found
  172. */
  173. char *av_strtok(char *s, const char *delim, char **saveptr);
  174. /**
  175. * Locale-independent conversion of ASCII isdigit.
  176. */
  177. int av_isdigit(int c);
  178. /**
  179. * Locale-independent conversion of ASCII isgraph.
  180. */
  181. int av_isgraph(int c);
  182. /**
  183. * Locale-independent conversion of ASCII isspace.
  184. */
  185. int av_isspace(int c);
  186. /**
  187. * Locale-independent conversion of ASCII characters to uppercase.
  188. */
  189. static inline int av_toupper(int c)
  190. {
  191. if (c >= 'a' && c <= 'z')
  192. c ^= 0x20;
  193. return c;
  194. }
  195. /**
  196. * Locale-independent conversion of ASCII characters to lowercase.
  197. */
  198. static inline int av_tolower(int c)
  199. {
  200. if (c >= 'A' && c <= 'Z')
  201. c ^= 0x20;
  202. return c;
  203. }
  204. /**
  205. * Locale-independent conversion of ASCII isxdigit.
  206. */
  207. int av_isxdigit(int c);
  208. /**
  209. * Locale-independent case-insensitive compare.
  210. * @note This means only ASCII-range characters are case-insensitive
  211. */
  212. int av_strcasecmp(const char *a, const char *b);
  213. /**
  214. * Locale-independent case-insensitive compare.
  215. * @note This means only ASCII-range characters are case-insensitive
  216. */
  217. int av_strncasecmp(const char *a, const char *b, size_t n);
  218. /**
  219. * Thread safe basename.
  220. * @param path the path, on DOS both \ and / are considered separators.
  221. * @return pointer to the basename substring.
  222. */
  223. const char *av_basename(const char *path);
  224. /**
  225. * Thread safe dirname.
  226. * @param path the path, on DOS both \ and / are considered separators.
  227. * @return the path with the separator replaced by the string terminator or ".".
  228. * @note the function may change the input string.
  229. */
  230. const char *av_dirname(char *path);
  231. enum AVEscapeMode {
  232. AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
  233. AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
  234. AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
  235. };
  236. /**
  237. * Consider spaces special and escape them even in the middle of the
  238. * string.
  239. *
  240. * This is equivalent to adding the whitespace characters to the special
  241. * characters lists, except it is guaranteed to use the exact same list
  242. * of whitespace characters as the rest of libavutil.
  243. */
  244. #define AV_ESCAPE_FLAG_WHITESPACE 0x01
  245. /**
  246. * Escape only specified special characters.
  247. * Without this flag, escape also any characters that may be considered
  248. * special by av_get_token(), such as the single quote.
  249. */
  250. #define AV_ESCAPE_FLAG_STRICT 0x02
  251. /**
  252. * Escape string in src, and put the escaped string in an allocated
  253. * string in *dst, which must be freed with av_free().
  254. *
  255. * @param dst pointer where an allocated string is put
  256. * @param src string to escape, must be non-NULL
  257. * @param special_chars string containing the special characters which
  258. * need to be escaped, can be NULL
  259. * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros.
  260. * Any unknown value for mode will be considered equivalent to
  261. * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
  262. * notice.
  263. * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros
  264. * @return the length of the allocated string, or a negative error code in case of error
  265. * @see av_bprint_escape()
  266. */
  267. int av_escape(char **dst, const char *src, const char *special_chars,
  268. enum AVEscapeMode mode, int flags);
  269. #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
  270. #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
  271. #define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
  272. #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
  273. #define AV_UTF8_FLAG_ACCEPT_ALL \
  274. AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
  275. /**
  276. * Read and decode a single UTF-8 code point (character) from the
  277. * buffer in *buf, and update *buf to point to the next byte to
  278. * decode.
  279. *
  280. * In case of an invalid byte sequence, the pointer will be updated to
  281. * the next byte after the invalid sequence and the function will
  282. * return an error code.
  283. *
  284. * Depending on the specified flags, the function will also fail in
  285. * case the decoded code point does not belong to a valid range.
  286. *
  287. * @note For speed-relevant code a carefully implemented use of
  288. * GET_UTF8() may be preferred.
  289. *
  290. * @param codep pointer used to return the parsed code in case of success.
  291. * The value in *codep is set even in case the range check fails.
  292. * @param bufp pointer to the address the first byte of the sequence
  293. * to decode, updated by the function to point to the
  294. * byte next after the decoded sequence
  295. * @param buf_end pointer to the end of the buffer, points to the next
  296. * byte past the last in the buffer. This is used to
  297. * avoid buffer overreads (in case of an unfinished
  298. * UTF-8 sequence towards the end of the buffer).
  299. * @param flags a collection of AV_UTF8_FLAG_* flags
  300. * @return >= 0 in case a sequence was successfully read, a negative
  301. * value in case of invalid sequence
  302. */
  303. int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
  304. unsigned int flags);
  305. /**
  306. * @}
  307. */
  308. #endif /* AVUTIL_AVSTRING_H */