You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
5.8KB

  1. /*
  2. * Copyright (c) 2012 Clément Bœsch
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * SAMI subtitle decoder
  23. * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
  24. */
  25. #include "ass.h"
  26. #include "libavutil/avstring.h"
  27. #include "libavutil/bprint.h"
  28. #include "htmlsubtitles.h"
  29. typedef struct {
  30. AVBPrint source;
  31. AVBPrint content;
  32. AVBPrint encoded_source;
  33. AVBPrint encoded_content;
  34. AVBPrint full;
  35. int readorder;
  36. } SAMIContext;
  37. static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
  38. {
  39. SAMIContext *sami = avctx->priv_data;
  40. int ret = 0;
  41. char *tag = NULL;
  42. char *dupsrc = av_strdup(src);
  43. char *p = dupsrc;
  44. AVBPrint *dst_content = &sami->encoded_content;
  45. AVBPrint *dst_source = &sami->encoded_source;
  46. if (!dupsrc)
  47. return AVERROR(ENOMEM);
  48. av_bprint_clear(&sami->encoded_content);
  49. av_bprint_clear(&sami->content);
  50. av_bprint_clear(&sami->encoded_source);
  51. for (;;) {
  52. char *saveptr = NULL;
  53. int prev_chr_is_space = 0;
  54. AVBPrint *dst = &sami->content;
  55. /* parse & extract paragraph tag */
  56. p = av_stristr(p, "<P");
  57. if (!p)
  58. break;
  59. if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
  60. p++;
  61. continue;
  62. }
  63. if (dst->len) // add a separator with the previous paragraph if there was one
  64. av_bprintf(dst, "\\N");
  65. tag = av_strtok(p, ">", &saveptr);
  66. if (!tag || !saveptr)
  67. break;
  68. p = saveptr;
  69. /* check if the current paragraph is the "source" (speaker name) */
  70. if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
  71. dst = &sami->source;
  72. av_bprint_clear(dst);
  73. }
  74. /* if empty event -> skip subtitle */
  75. while (av_isspace(*p))
  76. p++;
  77. if (!strncmp(p, "&nbsp;", 6)) {
  78. ret = -1;
  79. goto end;
  80. }
  81. /* extract the text, stripping most of the tags */
  82. while (*p) {
  83. if (*p == '<') {
  84. if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
  85. break;
  86. }
  87. if (!av_strncasecmp(p, "<BR", 3)) {
  88. av_bprintf(dst, "\\N");
  89. p++;
  90. while (*p && *p != '>')
  91. p++;
  92. if (!*p)
  93. break;
  94. if (*p == '>')
  95. p++;
  96. continue;
  97. }
  98. if (!av_isspace(*p))
  99. av_bprint_chars(dst, *p, 1);
  100. else if (!prev_chr_is_space)
  101. av_bprint_chars(dst, ' ', 1);
  102. prev_chr_is_space = av_isspace(*p);
  103. p++;
  104. }
  105. }
  106. av_bprint_clear(&sami->full);
  107. if (sami->source.len) {
  108. ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
  109. if (ret < 0)
  110. goto end;
  111. av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
  112. }
  113. ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
  114. if (ret < 0)
  115. goto end;
  116. av_bprintf(&sami->full, "%s", sami->encoded_content.str);
  117. end:
  118. av_free(dupsrc);
  119. return ret;
  120. }
  121. static int sami_decode_frame(AVCodecContext *avctx,
  122. void *data, int *got_sub_ptr, AVPacket *avpkt)
  123. {
  124. AVSubtitle *sub = data;
  125. const char *ptr = avpkt->data;
  126. SAMIContext *sami = avctx->priv_data;
  127. if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
  128. // TODO: pass escaped sami->encoded_source.str as source
  129. int ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
  130. if (ret < 0)
  131. return ret;
  132. }
  133. *got_sub_ptr = sub->num_rects > 0;
  134. return avpkt->size;
  135. }
  136. static av_cold int sami_init(AVCodecContext *avctx)
  137. {
  138. SAMIContext *sami = avctx->priv_data;
  139. av_bprint_init(&sami->source, 0, 2048);
  140. av_bprint_init(&sami->content, 0, 2048);
  141. av_bprint_init(&sami->encoded_source, 0, 2048);
  142. av_bprint_init(&sami->encoded_content, 0, 2048);
  143. av_bprint_init(&sami->full, 0, 2048);
  144. return ff_ass_subtitle_header_default(avctx);
  145. }
  146. static av_cold int sami_close(AVCodecContext *avctx)
  147. {
  148. SAMIContext *sami = avctx->priv_data;
  149. av_bprint_finalize(&sami->source, NULL);
  150. av_bprint_finalize(&sami->content, NULL);
  151. av_bprint_finalize(&sami->encoded_source, NULL);
  152. av_bprint_finalize(&sami->encoded_content, NULL);
  153. av_bprint_finalize(&sami->full, NULL);
  154. return 0;
  155. }
  156. static void sami_flush(AVCodecContext *avctx)
  157. {
  158. SAMIContext *sami = avctx->priv_data;
  159. if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
  160. sami->readorder = 0;
  161. }
  162. AVCodec ff_sami_decoder = {
  163. .name = "sami",
  164. .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
  165. .type = AVMEDIA_TYPE_SUBTITLE,
  166. .id = AV_CODEC_ID_SAMI,
  167. .priv_data_size = sizeof(SAMIContext),
  168. .init = sami_init,
  169. .close = sami_close,
  170. .decode = sami_decode_frame,
  171. .flush = sami_flush,
  172. };