You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

241 lines
8.4KB

  1. /*
  2. * Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/avstring.h"
  21. #include "libavutil/common.h"
  22. #include "libavutil/parseutils.h"
  23. #include "htmlsubtitles.h"
  24. #include <ctype.h>
  25. static int html_color_parse(void *log_ctx, const char *str)
  26. {
  27. uint8_t rgba[4];
  28. if (av_parse_color(rgba, str, strcspn(str, "\" >"), log_ctx) < 0)
  29. return -1;
  30. return rgba[0] | rgba[1] << 8 | rgba[2] << 16;
  31. }
  32. enum {
  33. PARAM_UNKNOWN = -1,
  34. PARAM_SIZE,
  35. PARAM_COLOR,
  36. PARAM_FACE,
  37. PARAM_NUMBER
  38. };
  39. typedef struct SrtStack {
  40. char tag[128];
  41. char param[PARAM_NUMBER][128];
  42. } SrtStack;
  43. static void rstrip_spaces_buf(AVBPrint *buf)
  44. {
  45. if (av_bprint_is_complete(buf))
  46. while (buf->len > 0 && buf->str[buf->len - 1] == ' ')
  47. buf->str[--buf->len] = 0;
  48. }
  49. /*
  50. * Fast code for scanning text enclosed in braces. Functionally
  51. * equivalent to this sscanf call:
  52. *
  53. * sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0
  54. */
  55. static int scanbraces(const char* in) {
  56. if (strncmp(in, "{\\an", 4) != 0) {
  57. return 0;
  58. }
  59. if (!isdigit(in[4])) {
  60. return 0;
  61. }
  62. if (in[5] != '}') {
  63. return 0;
  64. }
  65. return 1;
  66. }
  67. /*
  68. * Fast code for scanning the rest of a tag. Functionally equivalent to
  69. * this sscanf call:
  70. *
  71. * sscanf(in, "%127[^<>]>%n", buffer, lenp) == 2
  72. */
  73. static int scantag(const char* in, char* buffer, int* lenp) {
  74. int len;
  75. for (len = 0; len < 128; len++) {
  76. const char c = *in++;
  77. switch (c) {
  78. case '\0':
  79. return 0;
  80. case '<':
  81. return 0;
  82. case '>':
  83. buffer[len] = '\0';
  84. *lenp = len+1;
  85. return 1;
  86. default:
  87. break;
  88. }
  89. buffer[len] = c;
  90. }
  91. return 0;
  92. }
  93. int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
  94. {
  95. char *param, buffer[128], tmp[128];
  96. int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0;
  97. SrtStack stack[16];
  98. int closing_brace_missing = 0;
  99. stack[0].tag[0] = 0;
  100. strcpy(stack[0].param[PARAM_SIZE], "{\\fs}");
  101. strcpy(stack[0].param[PARAM_COLOR], "{\\c}");
  102. strcpy(stack[0].param[PARAM_FACE], "{\\fn}");
  103. for (; !end && *in; in++) {
  104. switch (*in) {
  105. case '\r':
  106. break;
  107. case '\n':
  108. if (line_start) {
  109. end = 1;
  110. break;
  111. }
  112. rstrip_spaces_buf(dst);
  113. av_bprintf(dst, "\\N");
  114. line_start = 1;
  115. break;
  116. case ' ':
  117. if (!line_start)
  118. av_bprint_chars(dst, *in, 1);
  119. break;
  120. case '{': /* skip all {\xxx} substrings except for {\an%d}
  121. and all microdvd like styles such as {Y:xxx} */
  122. an += scanbraces(in);
  123. if (!closing_brace_missing) {
  124. if ( (an != 1 && in[1] == '\\')
  125. || (in[1] && strchr("CcFfoPSsYy", in[1]) && in[2] == ':')) {
  126. char *bracep = strchr(in+2, '}');
  127. if (bracep) {
  128. in = bracep;
  129. break;
  130. } else
  131. closing_brace_missing = 1;
  132. }
  133. }
  134. av_bprint_chars(dst, *in, 1);
  135. break;
  136. case '<':
  137. tag_close = in[1] == '/';
  138. len = 0;
  139. if (scantag(in+tag_close+1, buffer, &len) && len > 0) {
  140. const char *tagname = buffer;
  141. while (*tagname == ' ')
  142. tagname++;
  143. if ((param = strchr(tagname, ' ')))
  144. *param++ = 0;
  145. if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack) && *tagname) ||
  146. ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, tagname))) {
  147. int i, j, unknown = 0;
  148. in += len + tag_close;
  149. if (!tag_close)
  150. memset(stack+sptr, 0, sizeof(*stack));
  151. if (!strcmp(tagname, "font")) {
  152. if (tag_close) {
  153. for (i=PARAM_NUMBER-1; i>=0; i--)
  154. if (stack[sptr-1].param[i][0])
  155. for (j=sptr-2; j>=0; j--)
  156. if (stack[j].param[i][0]) {
  157. av_bprintf(dst, "%s", stack[j].param[i]);
  158. break;
  159. }
  160. } else {
  161. while (param) {
  162. if (!strncmp(param, "size=", 5)) {
  163. unsigned font_size;
  164. param += 5 + (param[5] == '"');
  165. if (sscanf(param, "%u", &font_size) == 1) {
  166. snprintf(stack[sptr].param[PARAM_SIZE],
  167. sizeof(stack[0].param[PARAM_SIZE]),
  168. "{\\fs%u}", font_size);
  169. }
  170. } else if (!strncmp(param, "color=", 6)) {
  171. param += 6 + (param[6] == '"');
  172. snprintf(stack[sptr].param[PARAM_COLOR],
  173. sizeof(stack[0].param[PARAM_COLOR]),
  174. "{\\c&H%X&}",
  175. html_color_parse(log_ctx, param));
  176. } else if (!strncmp(param, "face=", 5)) {
  177. param += 5 + (param[5] == '"');
  178. len = strcspn(param,
  179. param[-1] == '"' ? "\"" :" ");
  180. av_strlcpy(tmp, param,
  181. FFMIN(sizeof(tmp), len+1));
  182. param += len;
  183. snprintf(stack[sptr].param[PARAM_FACE],
  184. sizeof(stack[0].param[PARAM_FACE]),
  185. "{\\fn%s}", tmp);
  186. }
  187. if ((param = strchr(param, ' ')))
  188. param++;
  189. }
  190. for (i=0; i<PARAM_NUMBER; i++)
  191. if (stack[sptr].param[i][0])
  192. av_bprintf(dst, "%s", stack[sptr].param[i]);
  193. }
  194. } else if (tagname[0] && !tagname[1] && strspn(tagname, "bisu") == 1) {
  195. av_bprintf(dst, "{\\%c%d}", tagname[0], !tag_close);
  196. } else {
  197. unknown = 1;
  198. snprintf(tmp, sizeof(tmp), "</%s>", tagname);
  199. }
  200. if (tag_close) {
  201. sptr--;
  202. } else if (unknown && !strstr(in, tmp)) {
  203. in -= len + tag_close;
  204. av_bprint_chars(dst, *in, 1);
  205. } else
  206. av_strlcpy(stack[sptr++].tag, tagname,
  207. sizeof(stack[0].tag));
  208. break;
  209. }
  210. }
  211. default:
  212. av_bprint_chars(dst, *in, 1);
  213. break;
  214. }
  215. if (*in != ' ' && *in != '\r' && *in != '\n')
  216. line_start = 0;
  217. }
  218. if (!av_bprint_is_complete(dst))
  219. return AVERROR(ENOMEM);
  220. while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2))
  221. dst->len -= 2;
  222. dst->str[dst->len] = 0;
  223. rstrip_spaces_buf(dst);
  224. return 0;
  225. }