You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

318 lines
9.7KB

  1. /*
  2. * RTP parser for VP9 payload format (draft version 0) - experimental
  3. * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. */
  22. #include "libavcodec/bytestream.h"
  23. #include "rtpdec_formats.h"
  24. #define RTP_VP9_DESC_REQUIRED_SIZE 1
  25. struct PayloadContext {
  26. AVIOContext *buf;
  27. uint32_t timestamp;
  28. };
  29. static av_cold PayloadContext *vp9_new_context(void)
  30. {
  31. return av_mallocz(sizeof(PayloadContext));
  32. }
  33. static void vp9_free_dyn_buffer(AVIOContext **dyn_buf)
  34. {
  35. uint8_t *ptr_dyn_buffer;
  36. avio_close_dyn_buf(*dyn_buf, &ptr_dyn_buffer);
  37. av_free(ptr_dyn_buffer);
  38. *dyn_buf = NULL;
  39. }
  40. static av_cold void vp9_free_context(PayloadContext *data)
  41. {
  42. av_free(data);
  43. }
  44. static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
  45. PayloadContext *data)
  46. {
  47. av_dlog(ctx, "vp9_init() for stream %d\n", st_index);
  48. av_log(ctx, AV_LOG_WARNING,
  49. "RTP/VP9 support is still experimental\n");
  50. if (st_index < 0)
  51. return 0;
  52. ctx->streams[st_index]->need_parsing = AVSTREAM_PARSE_FULL;
  53. return 0;
  54. }
  55. static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
  56. AVStream *st, AVPacket *pkt, uint32_t *timestamp,
  57. const uint8_t *buf, int len, uint16_t seq,
  58. int flags)
  59. {
  60. int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data, has_su_data;
  61. av_unused int pic_id = 0, non_key_frame = 0;
  62. av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
  63. int ref_fields = 0, has_ref_field_ext_pic_id = 0;
  64. int first_fragment, last_fragment;
  65. int res = 0;
  66. /* drop data of previous packets in case of non-continuous (lossy) packet stream */
  67. if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) {
  68. vp9_free_dyn_buffer(&rtp_vp9_ctx->buf);
  69. }
  70. /* sanity check for size of input packet: 1 byte payload at least */
  71. if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
  72. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
  73. return AVERROR_INVALIDDATA;
  74. }
  75. /*
  76. decode the required VP9 payload descriptor according to section 4.2 of the spec.:
  77. 0 1 2 3 4 5 6 7
  78. +-+-+-+-+-+-+-+-+
  79. |I|L|F|B|E|V|U|-| (REQUIRED)
  80. +-+-+-+-+-+-+-+-+
  81. I: PictureID present
  82. L: Layer indices present
  83. F: Reference indices present
  84. B: Start of VP9 frame
  85. E: End of picture
  86. V: Scalability Structure (SS) present
  87. U: Scalability Structure Update (SU) present
  88. */
  89. has_pic_id = buf[0] & 0x80;
  90. has_layer_idc = buf[0] & 0x40;
  91. has_ref_idc = buf[0] & 0x20;
  92. first_fragment = buf[0] & 0x10;
  93. last_fragment = buf[0] & 0x08;
  94. has_ss_data = buf[0] & 0x04;
  95. has_su_data = buf[0] & 0x02;
  96. /* sanity check for markers: B should always be equal to the RTP M marker */
  97. if (last_fragment >> 2 != flags & RTP_FLAG_MARKER) {
  98. av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker\n");
  99. return AVERROR_INVALIDDATA;
  100. }
  101. /* pass the extensions field */
  102. buf += RTP_VP9_DESC_REQUIRED_SIZE;
  103. len -= RTP_VP9_DESC_REQUIRED_SIZE;
  104. /*
  105. decode the 1-byte/2-byte picture ID:
  106. 0 1 2 3 4 5 6 7
  107. +-+-+-+-+-+-+-+-+
  108. I: |M|PICTURE ID | (RECOMMENDED)
  109. +-+-+-+-+-+-+-+-+
  110. M: | EXTENDED PID | (RECOMMENDED)
  111. +-+-+-+-+-+-+-+-+
  112. M: The most significant bit of the first octet is an extension flag.
  113. PictureID: 8 or 16 bits including the M bit.
  114. */
  115. if (has_pic_id) {
  116. if (len < 1) {
  117. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  118. return AVERROR_INVALIDDATA;
  119. }
  120. /* check for 1-byte or 2-byte picture index */
  121. if (buf[0] & 0x80) {
  122. if (len < 2) {
  123. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  124. return AVERROR_INVALIDDATA;
  125. }
  126. pic_id = AV_RB16(buf) & 0x7fff;
  127. buf += 2;
  128. len -= 2;
  129. } else {
  130. pic_id = buf[0] & 0x7f;
  131. buf++;
  132. len--;
  133. }
  134. }
  135. /*
  136. decode layer indices
  137. 0 1 2 3 4 5 6 7
  138. +-+-+-+-+-+-+-+-+
  139. L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
  140. +-+-+-+-+-+-+-+-+
  141. T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
  142. If "F" is set in the initial octet, R is 2 bits representing the number
  143. of reference fields this frame refers to.
  144. */
  145. if (has_layer_idc) {
  146. if (len < 1) {
  147. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet");
  148. return AVERROR_INVALIDDATA;
  149. }
  150. layer_temporal = buf[0] & 0xC0;
  151. layer_spatial = buf[0] & 0x30;
  152. layer_quality = buf[0] & 0x0C;
  153. if (has_ref_idc) {
  154. ref_fields = buf[0] & 0x03;
  155. if (ref_fields)
  156. non_key_frame = 1;
  157. }
  158. buf++;
  159. len--;
  160. }
  161. /*
  162. decode the reference fields
  163. 0 1 2 3 4 5 6 7
  164. +-+-+-+-+-+-+-+-+ -\
  165. F: | PID |X| RS| RQ| (OPTIONAL) .
  166. +-+-+-+-+-+-+-+-+ . - R times
  167. X: | EXTENDED PID | (OPTIONAL) .
  168. +-+-+-+-+-+-+-+-+ -/
  169. PID: The relative Picture ID referred to by this frame.
  170. RS and RQ: The spatial and quality layer IDs.
  171. X: 1 if this layer index has an extended relative Picture ID.
  172. */
  173. if (has_ref_idc) {
  174. while (ref_fields) {
  175. if (len < 1) {
  176. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  177. return AVERROR_INVALIDDATA;
  178. }
  179. has_ref_field_ext_pic_id = buf[0] & 0x10;
  180. /* pass ref. field */
  181. if (has_ref_field_ext_pic_id) {
  182. if (len < 2) {
  183. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  184. return AVERROR_INVALIDDATA;
  185. }
  186. /* ignore ref. data */
  187. buf += 2;
  188. len -= 2;
  189. } else {
  190. /* ignore ref. data */
  191. buf++;
  192. len--;
  193. }
  194. ref_fields--;
  195. }
  196. }
  197. /*
  198. decode the scalability structure (SS)
  199. 0 1 2 3 4 5 6 7
  200. +-+-+-+-+-+-+-+-+
  201. V: | PATTERN LENGTH|
  202. +-+-+-+-+-+-+-+-+ -\
  203. | T | S | Q | R | (OPTIONAL) .
  204. +-+-+-+-+-+-+-+-+ -\ .
  205. | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times
  206. +-+-+-+-+-+-+-+-+ . - R times .
  207. X: | EXTENDED PID | (OPTIONAL) . .
  208. +-+-+-+-+-+-+-+-+ -/ -/
  209. PID: The relative Picture ID referred to by this frame.
  210. RS and RQ: The spatial and quality layer IDs.
  211. X: 1 if this layer index has an extended relative Picture ID.
  212. */
  213. if (has_ss_data) {
  214. avpriv_report_missing_feature(ctx, "VP9 scalability structure data\n");
  215. return AVERROR_PATCHWELCOME;
  216. }
  217. /*
  218. decode the scalability update structure (SU)
  219. spec. is tbd
  220. */
  221. if (has_su_data) {
  222. avpriv_report_missing_feature(ctx, "VP9 scalability update structure data\n");
  223. return AVERROR_PATCHWELCOME;
  224. }
  225. /*
  226. decode the VP9 payload header
  227. spec. is tbd
  228. */
  229. //XXX: implement when specified
  230. /* sanity check: 1 byte payload as minimum */
  231. if (len < 1) {
  232. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  233. return AVERROR_INVALIDDATA;
  234. }
  235. /* start frame buffering with new dynamic buffer */
  236. if (!rtp_vp9_ctx->buf) {
  237. /* sanity check: a new frame should have started */
  238. if (first_fragment) {
  239. res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
  240. if (res < 0)
  241. return res;
  242. /* update the timestamp in the frame packet with the one from the RTP packet */
  243. rtp_vp9_ctx->timestamp = *timestamp;
  244. } else {
  245. /* frame not started yet, need more packets */
  246. return AVERROR(EAGAIN);
  247. }
  248. }
  249. /* write the fragment to the dyn. buffer */
  250. avio_write(rtp_vp9_ctx->buf, buf, len);
  251. /* do we need more fragments? */
  252. if (!last_fragment)
  253. return AVERROR(EAGAIN);
  254. /* close frame buffering and create resulting A/V packet */
  255. res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
  256. if (res < 0)
  257. return res;
  258. return 0;
  259. }
  260. RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
  261. .enc_name = "VP9",
  262. .codec_type = AVMEDIA_TYPE_VIDEO,
  263. .codec_id = AV_CODEC_ID_VP9,
  264. .init = vp9_init,
  265. .alloc = vp9_new_context,
  266. .free = vp9_free_context,
  267. .parse_packet = vp9_handle_packet
  268. };