You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
5.7KB

  1. /*
  2. * Copyright (c) 2012 Clément Bœsch
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * SAMI subtitle decoder
  23. * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
  24. */
  25. #include "ass.h"
  26. #include "libavutil/avstring.h"
  27. #include "libavutil/bprint.h"
  28. #include "htmlsubtitles.h"
  29. typedef struct {
  30. AVBPrint source;
  31. AVBPrint content;
  32. AVBPrint encoded_source;
  33. AVBPrint encoded_content;
  34. AVBPrint full;
  35. int readorder;
  36. } SAMIContext;
  37. static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
  38. {
  39. SAMIContext *sami = avctx->priv_data;
  40. int ret = 0;
  41. char *tag = NULL;
  42. char *dupsrc = av_strdup(src);
  43. char *p = dupsrc;
  44. AVBPrint *dst_content = &sami->encoded_content;
  45. AVBPrint *dst_source = &sami->encoded_source;
  46. av_bprint_clear(&sami->encoded_content);
  47. av_bprint_clear(&sami->content);
  48. av_bprint_clear(&sami->encoded_source);
  49. for (;;) {
  50. char *saveptr = NULL;
  51. int prev_chr_is_space = 0;
  52. AVBPrint *dst = &sami->content;
  53. /* parse & extract paragraph tag */
  54. p = av_stristr(p, "<P");
  55. if (!p)
  56. break;
  57. if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
  58. p++;
  59. continue;
  60. }
  61. if (dst->len) // add a separator with the previous paragraph if there was one
  62. av_bprintf(dst, "\\N");
  63. tag = av_strtok(p, ">", &saveptr);
  64. if (!tag || !saveptr)
  65. break;
  66. p = saveptr;
  67. /* check if the current paragraph is the "source" (speaker name) */
  68. if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
  69. dst = &sami->source;
  70. av_bprint_clear(dst);
  71. }
  72. /* if empty event -> skip subtitle */
  73. while (av_isspace(*p))
  74. p++;
  75. if (!strncmp(p, "&nbsp;", 6)) {
  76. ret = -1;
  77. goto end;
  78. }
  79. /* extract the text, stripping most of the tags */
  80. while (*p) {
  81. if (*p == '<') {
  82. if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
  83. break;
  84. }
  85. if (!av_strncasecmp(p, "<BR", 3)) {
  86. av_bprintf(dst, "\\N");
  87. p++;
  88. while (*p && *p != '>')
  89. p++;
  90. if (!*p)
  91. break;
  92. if (*p == '>')
  93. p++;
  94. continue;
  95. }
  96. if (!av_isspace(*p))
  97. av_bprint_chars(dst, *p, 1);
  98. else if (!prev_chr_is_space)
  99. av_bprint_chars(dst, ' ', 1);
  100. prev_chr_is_space = av_isspace(*p);
  101. p++;
  102. }
  103. }
  104. av_bprint_clear(&sami->full);
  105. if (sami->source.len) {
  106. ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
  107. if (ret < 0)
  108. goto end;
  109. av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
  110. }
  111. ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
  112. if (ret < 0)
  113. goto end;
  114. av_bprintf(&sami->full, "%s", sami->encoded_content.str);
  115. end:
  116. av_free(dupsrc);
  117. return ret;
  118. }
  119. static int sami_decode_frame(AVCodecContext *avctx,
  120. void *data, int *got_sub_ptr, AVPacket *avpkt)
  121. {
  122. AVSubtitle *sub = data;
  123. const char *ptr = avpkt->data;
  124. SAMIContext *sami = avctx->priv_data;
  125. if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
  126. // TODO: pass escaped sami->encoded_source.str as source
  127. int ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
  128. if (ret < 0)
  129. return ret;
  130. }
  131. *got_sub_ptr = sub->num_rects > 0;
  132. return avpkt->size;
  133. }
  134. static av_cold int sami_init(AVCodecContext *avctx)
  135. {
  136. SAMIContext *sami = avctx->priv_data;
  137. av_bprint_init(&sami->source, 0, 2048);
  138. av_bprint_init(&sami->content, 0, 2048);
  139. av_bprint_init(&sami->encoded_source, 0, 2048);
  140. av_bprint_init(&sami->encoded_content, 0, 2048);
  141. av_bprint_init(&sami->full, 0, 2048);
  142. return ff_ass_subtitle_header_default(avctx);
  143. }
  144. static av_cold int sami_close(AVCodecContext *avctx)
  145. {
  146. SAMIContext *sami = avctx->priv_data;
  147. av_bprint_finalize(&sami->source, NULL);
  148. av_bprint_finalize(&sami->content, NULL);
  149. av_bprint_finalize(&sami->encoded_source, NULL);
  150. av_bprint_finalize(&sami->encoded_content, NULL);
  151. av_bprint_finalize(&sami->full, NULL);
  152. return 0;
  153. }
  154. static void sami_flush(AVCodecContext *avctx)
  155. {
  156. SAMIContext *sami = avctx->priv_data;
  157. if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
  158. sami->readorder = 0;
  159. }
  160. AVCodec ff_sami_decoder = {
  161. .name = "sami",
  162. .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
  163. .type = AVMEDIA_TYPE_SUBTITLE,
  164. .id = AV_CODEC_ID_SAMI,
  165. .priv_data_size = sizeof(SAMIContext),
  166. .init = sami_init,
  167. .close = sami_close,
  168. .decode = sami_decode_frame,
  169. .flush = sami_flush,
  170. };