You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

506 lines
15KB

  1. /*
  2. * 3GPP TS 26.245 Timed Text decoder
  3. * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "ass.h"
  23. #include "libavutil/avstring.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/bprint.h"
  26. #include "libavutil/intreadwrite.h"
  27. #include "libavutil/mem.h"
  28. #define STYLE_FLAG_BOLD (1<<0)
  29. #define STYLE_FLAG_ITALIC (1<<1)
  30. #define STYLE_FLAG_UNDERLINE (1<<2)
  31. #define BOX_SIZE_INITIAL 40
  32. #define STYL_BOX (1<<0)
  33. #define HLIT_BOX (1<<1)
  34. #define HCLR_BOX (1<<2)
  35. #define BOTTOM_LEFT 1
  36. #define BOTTOM_CENTER 2
  37. #define BOTTOM_RIGHT 3
  38. #define MIDDLE_LEFT 4
  39. #define MIDDLE_CENTER 5
  40. #define MIDDLE_RIGHT 6
  41. #define TOP_LEFT 7
  42. #define TOP_CENTER 8
  43. #define TOP_RIGHT 9
  44. typedef struct {
  45. char *font;
  46. int fontsize;
  47. int color;
  48. int back_color;
  49. int bold;
  50. int italic;
  51. int underline;
  52. int alignment;
  53. } MovTextDefault;
  54. typedef struct {
  55. uint16_t fontID;
  56. char *font;
  57. } FontRecord;
  58. typedef struct {
  59. uint16_t style_start;
  60. uint16_t style_end;
  61. uint8_t style_flag;
  62. uint8_t fontsize;
  63. uint16_t style_fontID;
  64. } StyleBox;
  65. typedef struct {
  66. uint16_t hlit_start;
  67. uint16_t hlit_end;
  68. } HighlightBox;
  69. typedef struct {
  70. uint8_t hlit_color[4];
  71. } HilightcolorBox;
  72. typedef struct {
  73. StyleBox **s;
  74. StyleBox *s_temp;
  75. HighlightBox h;
  76. HilightcolorBox c;
  77. FontRecord **ftab;
  78. FontRecord *ftab_temp;
  79. MovTextDefault d;
  80. uint8_t box_flags;
  81. uint16_t style_entries, ftab_entries;
  82. uint64_t tracksize;
  83. int size_var;
  84. int count_s, count_f;
  85. } MovTextContext;
  86. typedef struct {
  87. uint32_t type;
  88. size_t base_size;
  89. int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
  90. } Box;
  91. static void mov_text_cleanup(MovTextContext *m)
  92. {
  93. int i;
  94. if (m->box_flags & STYL_BOX) {
  95. for(i = 0; i < m->count_s; i++) {
  96. av_freep(&m->s[i]);
  97. }
  98. av_freep(&m->s);
  99. }
  100. }
  101. static void mov_text_cleanup_ftab(MovTextContext *m)
  102. {
  103. int i;
  104. if (m->ftab) {
  105. for(i = 0; i < m->count_f; i++) {
  106. av_freep(&m->ftab[i]->font);
  107. av_freep(&m->ftab[i]);
  108. }
  109. }
  110. av_freep(&m->ftab);
  111. }
  112. static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
  113. {
  114. char *tx3g_ptr = avctx->extradata;
  115. int i, box_size, font_length;
  116. int8_t v_align, h_align;
  117. int style_fontID;
  118. StyleBox s_default;
  119. m->count_f = 0;
  120. m->ftab_entries = 0;
  121. box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
  122. if (avctx->extradata_size < box_size)
  123. return -1;
  124. // Display Flags
  125. tx3g_ptr += 4;
  126. // Alignment
  127. h_align = *tx3g_ptr++;
  128. v_align = *tx3g_ptr++;
  129. if (h_align == 0) {
  130. if (v_align == 0)
  131. m->d.alignment = TOP_LEFT;
  132. if (v_align == 1)
  133. m->d.alignment = MIDDLE_LEFT;
  134. if (v_align == -1)
  135. m->d.alignment = BOTTOM_LEFT;
  136. }
  137. if (h_align == 1) {
  138. if (v_align == 0)
  139. m->d.alignment = TOP_CENTER;
  140. if (v_align == 1)
  141. m->d.alignment = MIDDLE_CENTER;
  142. if (v_align == -1)
  143. m->d.alignment = BOTTOM_CENTER;
  144. }
  145. if (h_align == -1) {
  146. if (v_align == 0)
  147. m->d.alignment = TOP_RIGHT;
  148. if (v_align == 1)
  149. m->d.alignment = MIDDLE_RIGHT;
  150. if (v_align == -1)
  151. m->d.alignment = BOTTOM_RIGHT;
  152. }
  153. // Background Color
  154. m->d.back_color = AV_RB24(tx3g_ptr);
  155. tx3g_ptr += 4;
  156. // BoxRecord
  157. tx3g_ptr += 8;
  158. // StyleRecord
  159. tx3g_ptr += 4;
  160. // fontID
  161. style_fontID = AV_RB16(tx3g_ptr);
  162. tx3g_ptr += 2;
  163. // face-style-flags
  164. s_default.style_flag = *tx3g_ptr++;
  165. m->d.bold = s_default.style_flag & STYLE_FLAG_BOLD;
  166. m->d.italic = s_default.style_flag & STYLE_FLAG_ITALIC;
  167. m->d.underline = s_default.style_flag & STYLE_FLAG_UNDERLINE;
  168. // fontsize
  169. m->d.fontsize = *tx3g_ptr++;
  170. // Primary color
  171. m->d.color = AV_RB24(tx3g_ptr);
  172. tx3g_ptr += 4;
  173. // FontRecord
  174. // FontRecord Size
  175. tx3g_ptr += 4;
  176. // ftab
  177. tx3g_ptr += 4;
  178. m->ftab_entries = AV_RB16(tx3g_ptr);
  179. tx3g_ptr += 2;
  180. for (i = 0; i < m->ftab_entries; i++) {
  181. box_size += 3;
  182. if (avctx->extradata_size < box_size) {
  183. mov_text_cleanup_ftab(m);
  184. m->ftab_entries = 0;
  185. return -1;
  186. }
  187. m->ftab_temp = av_malloc(sizeof(*m->ftab_temp));
  188. if (!m->ftab_temp) {
  189. mov_text_cleanup_ftab(m);
  190. return AVERROR(ENOMEM);
  191. }
  192. m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
  193. tx3g_ptr += 2;
  194. font_length = *tx3g_ptr++;
  195. box_size = box_size + font_length;
  196. if (avctx->extradata_size < box_size) {
  197. mov_text_cleanup_ftab(m);
  198. m->ftab_entries = 0;
  199. return -1;
  200. }
  201. m->ftab_temp->font = av_malloc(font_length + 1);
  202. if (!m->ftab_temp->font) {
  203. mov_text_cleanup_ftab(m);
  204. return AVERROR(ENOMEM);
  205. }
  206. memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
  207. m->ftab_temp->font[font_length] = '\0';
  208. av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
  209. if (!m->ftab) {
  210. mov_text_cleanup_ftab(m);
  211. return AVERROR(ENOMEM);
  212. }
  213. tx3g_ptr = tx3g_ptr + font_length;
  214. }
  215. for (i = 0; i < m->ftab_entries; i++) {
  216. if (style_fontID == m->ftab[i]->fontID)
  217. m->d.font = m->ftab[i]->font;
  218. }
  219. return 0;
  220. }
  221. static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  222. {
  223. m->box_flags |= HLIT_BOX;
  224. m->h.hlit_start = AV_RB16(tsmb);
  225. tsmb += 2;
  226. m->h.hlit_end = AV_RB16(tsmb);
  227. tsmb += 2;
  228. return 0;
  229. }
  230. static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  231. {
  232. m->box_flags |= HCLR_BOX;
  233. memcpy(m->c.hlit_color, tsmb, 4);
  234. tsmb += 4;
  235. return 0;
  236. }
  237. static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  238. {
  239. int i;
  240. m->style_entries = AV_RB16(tsmb);
  241. tsmb += 2;
  242. // A single style record is of length 12 bytes.
  243. if (m->tracksize + m->size_var + 2 + m->style_entries * 12 > avpkt->size)
  244. return -1;
  245. m->box_flags |= STYL_BOX;
  246. for(i = 0; i < m->style_entries; i++) {
  247. m->s_temp = av_malloc(sizeof(*m->s_temp));
  248. if (!m->s_temp) {
  249. mov_text_cleanup(m);
  250. return AVERROR(ENOMEM);
  251. }
  252. m->s_temp->style_start = AV_RB16(tsmb);
  253. tsmb += 2;
  254. m->s_temp->style_end = AV_RB16(tsmb);
  255. tsmb += 2;
  256. m->s_temp->style_fontID = AV_RB16(tsmb);
  257. tsmb += 2;
  258. m->s_temp->style_flag = AV_RB8(tsmb);
  259. tsmb++;
  260. m->s_temp->fontsize = AV_RB8(tsmb);
  261. av_dynarray_add(&m->s, &m->count_s, m->s_temp);
  262. if(!m->s) {
  263. mov_text_cleanup(m);
  264. return AVERROR(ENOMEM);
  265. }
  266. tsmb++;
  267. // text-color-rgba
  268. tsmb += 4;
  269. }
  270. return 0;
  271. }
  272. static const Box box_types[] = {
  273. { MKBETAG('s','t','y','l'), 2, decode_styl },
  274. { MKBETAG('h','l','i','t'), 4, decode_hlit },
  275. { MKBETAG('h','c','l','r'), 4, decode_hclr }
  276. };
  277. const static size_t box_count = FF_ARRAY_ELEMS(box_types);
  278. static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
  279. MovTextContext *m)
  280. {
  281. int i = 0;
  282. int j = 0;
  283. int text_pos = 0;
  284. while (text < text_end) {
  285. if (m->box_flags & STYL_BOX) {
  286. for (i = 0; i < m->style_entries; i++) {
  287. if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
  288. av_bprintf(buf, "{\\r}");
  289. }
  290. }
  291. for (i = 0; i < m->style_entries; i++) {
  292. if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
  293. if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
  294. av_bprintf(buf, "{\\b1}");
  295. if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
  296. av_bprintf(buf, "{\\i1}");
  297. if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
  298. av_bprintf(buf, "{\\u1}");
  299. av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
  300. for (j = 0; j < m->ftab_entries; j++) {
  301. if (m->s[i]->style_fontID == m->ftab[j]->fontID)
  302. av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
  303. }
  304. }
  305. }
  306. }
  307. if (m->box_flags & HLIT_BOX) {
  308. if (text_pos == m->h.hlit_start) {
  309. /* If hclr box is present, set the secondary color to the color
  310. * specified. Otherwise, set primary color to white and secondary
  311. * color to black. These colors will come from TextSampleModifier
  312. * boxes in future and inverse video technique for highlight will
  313. * be implemented.
  314. */
  315. if (m->box_flags & HCLR_BOX) {
  316. av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
  317. m->c.hlit_color[1], m->c.hlit_color[0]);
  318. } else {
  319. av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
  320. }
  321. }
  322. if (text_pos == m->h.hlit_end) {
  323. if (m->box_flags & HCLR_BOX) {
  324. av_bprintf(buf, "{\\2c&H000000&}");
  325. } else {
  326. av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
  327. }
  328. }
  329. }
  330. switch (*text) {
  331. case '\r':
  332. break;
  333. case '\n':
  334. av_bprintf(buf, "\\N");
  335. break;
  336. default:
  337. av_bprint_chars(buf, *text, 1);
  338. break;
  339. }
  340. text++;
  341. text_pos++;
  342. }
  343. return 0;
  344. }
  345. static int mov_text_init(AVCodecContext *avctx) {
  346. /*
  347. * TODO: Handle the default text style.
  348. * NB: Most players ignore styles completely, with the result that
  349. * it's very common to find files where the default style is broken
  350. * and respecting it results in a worse experience than ignoring it.
  351. */
  352. int ret;
  353. MovTextContext *m = avctx->priv_data;
  354. ret = mov_text_tx3g(avctx, m);
  355. if (ret == 0) {
  356. return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
  357. m->d.back_color, m->d.bold, m->d.italic,
  358. m->d.underline, m->d.alignment);
  359. } else
  360. return ff_ass_subtitle_header_default(avctx);
  361. }
  362. static int mov_text_decode_frame(AVCodecContext *avctx,
  363. void *data, int *got_sub_ptr, AVPacket *avpkt)
  364. {
  365. AVSubtitle *sub = data;
  366. MovTextContext *m = avctx->priv_data;
  367. int ret, ts_start, ts_end;
  368. AVBPrint buf;
  369. char *ptr = avpkt->data;
  370. char *end;
  371. int text_length, tsmb_type, ret_tsmb;
  372. uint64_t tsmb_size;
  373. const uint8_t *tsmb;
  374. if (!ptr || avpkt->size < 2)
  375. return AVERROR_INVALIDDATA;
  376. /*
  377. * A packet of size two with value zero is an empty subtitle
  378. * used to mark the end of the previous non-empty subtitle.
  379. * We can just drop them here as we have duration information
  380. * already. If the value is non-zero, then it's technically a
  381. * bad packet.
  382. */
  383. if (avpkt->size == 2)
  384. return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
  385. /*
  386. * The first two bytes of the packet are the length of the text string
  387. * In complex cases, there are style descriptors appended to the string
  388. * so we can't just assume the packet size is the string size.
  389. */
  390. text_length = AV_RB16(ptr);
  391. end = ptr + FFMIN(2 + text_length, avpkt->size);
  392. ptr += 2;
  393. ts_start = av_rescale_q(avpkt->pts,
  394. avctx->time_base,
  395. (AVRational){1,100});
  396. ts_end = av_rescale_q(avpkt->pts + avpkt->duration,
  397. avctx->time_base,
  398. (AVRational){1,100});
  399. tsmb_size = 0;
  400. m->tracksize = 2 + text_length;
  401. m->style_entries = 0;
  402. m->box_flags = 0;
  403. m->count_s = 0;
  404. // Note that the spec recommends lines be no longer than 2048 characters.
  405. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  406. if (text_length + 2 != avpkt->size) {
  407. while (m->tracksize + 8 <= avpkt->size) {
  408. // A box is a minimum of 8 bytes.
  409. tsmb = ptr + m->tracksize - 2;
  410. tsmb_size = AV_RB32(tsmb);
  411. tsmb += 4;
  412. tsmb_type = AV_RB32(tsmb);
  413. tsmb += 4;
  414. if (tsmb_size == 1) {
  415. if (m->tracksize + 16 > avpkt->size)
  416. break;
  417. tsmb_size = AV_RB64(tsmb);
  418. tsmb += 8;
  419. m->size_var = 16;
  420. } else
  421. m->size_var = 8;
  422. //size_var is equal to 8 or 16 depending on the size of box
  423. if (m->tracksize + tsmb_size > avpkt->size)
  424. break;
  425. for (size_t i = 0; i < box_count; i++) {
  426. if (tsmb_type == box_types[i].type) {
  427. if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
  428. break;
  429. ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
  430. if (ret_tsmb == -1)
  431. break;
  432. }
  433. }
  434. m->tracksize = m->tracksize + tsmb_size;
  435. }
  436. text_to_ass(&buf, ptr, end, m);
  437. mov_text_cleanup(m);
  438. } else
  439. text_to_ass(&buf, ptr, end, m);
  440. ret = ff_ass_add_rect_bprint(sub, &buf, ts_start, ts_end - ts_start);
  441. av_bprint_finalize(&buf, NULL);
  442. if (ret < 0)
  443. return ret;
  444. *got_sub_ptr = sub->num_rects > 0;
  445. return avpkt->size;
  446. }
  447. static int mov_text_decode_close(AVCodecContext *avctx)
  448. {
  449. MovTextContext *m = avctx->priv_data;
  450. mov_text_cleanup_ftab(m);
  451. return 0;
  452. }
  453. AVCodec ff_movtext_decoder = {
  454. .name = "mov_text",
  455. .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
  456. .type = AVMEDIA_TYPE_SUBTITLE,
  457. .id = AV_CODEC_ID_MOV_TEXT,
  458. .priv_data_size = sizeof(MovTextContext),
  459. .init = mov_text_init,
  460. .decode = mov_text_decode_frame,
  461. .close = mov_text_decode_close,
  462. };