You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

295 lines
9.4KB

  1. /*
  2. * RTP parser for VP9 payload format (draft version 0) - experimental
  3. * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/intreadwrite.h"
  22. #include "avio_internal.h"
  23. #include "rtpdec_formats.h"
  24. #define RTP_VP9_DESC_REQUIRED_SIZE 1
  25. struct PayloadContext {
  26. AVIOContext *buf;
  27. uint32_t timestamp;
  28. };
  29. static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
  30. PayloadContext *data)
  31. {
  32. av_log(ctx, AV_LOG_WARNING,
  33. "RTP/VP9 support is still experimental\n");
  34. return 0;
  35. }
  36. static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
  37. AVStream *st, AVPacket *pkt, uint32_t *timestamp,
  38. const uint8_t *buf, int len, uint16_t seq,
  39. int flags)
  40. {
  41. int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data, has_su_data;
  42. av_unused int pic_id = 0, non_key_frame = 0;
  43. av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
  44. int ref_fields = 0, has_ref_field_ext_pic_id = 0;
  45. int first_fragment, last_fragment;
  46. int rtp_m;
  47. int res = 0;
  48. /* drop data of previous packets in case of non-continuous (lossy) packet stream */
  49. if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp)
  50. ffio_free_dyn_buf(&rtp_vp9_ctx->buf);
  51. /* sanity check for size of input packet: 1 byte payload at least */
  52. if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
  53. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
  54. return AVERROR_INVALIDDATA;
  55. }
  56. /*
  57. * decode the required VP9 payload descriptor according to section 4.2 of the spec.:
  58. *
  59. * 0 1 2 3 4 5 6 7
  60. * +-+-+-+-+-+-+-+-+
  61. * |I|L|F|B|E|V|U|-| (REQUIRED)
  62. * +-+-+-+-+-+-+-+-+
  63. *
  64. * I: PictureID present
  65. * L: Layer indices present
  66. * F: Reference indices present
  67. * B: Start of VP9 frame
  68. * E: End of picture
  69. * V: Scalability Structure (SS) present
  70. * U: Scalability Structure Update (SU) present
  71. */
  72. has_pic_id = !!(buf[0] & 0x80);
  73. has_layer_idc = !!(buf[0] & 0x40);
  74. has_ref_idc = !!(buf[0] & 0x20);
  75. first_fragment = !!(buf[0] & 0x10);
  76. last_fragment = !!(buf[0] & 0x08);
  77. has_ss_data = !!(buf[0] & 0x04);
  78. has_su_data = !!(buf[0] & 0x02);
  79. rtp_m = !!(flags & RTP_FLAG_MARKER);
  80. /* sanity check for markers: B should always be equal to the RTP M marker */
  81. if (last_fragment != rtp_m) {
  82. av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
  83. return AVERROR_INVALIDDATA;
  84. }
  85. /* pass the extensions field */
  86. buf += RTP_VP9_DESC_REQUIRED_SIZE;
  87. len -= RTP_VP9_DESC_REQUIRED_SIZE;
  88. /*
  89. * decode the 1-byte/2-byte picture ID:
  90. *
  91. * 0 1 2 3 4 5 6 7
  92. * +-+-+-+-+-+-+-+-+
  93. * I: |M|PICTURE ID | (RECOMMENDED)
  94. * +-+-+-+-+-+-+-+-+
  95. * M: | EXTENDED PID | (RECOMMENDED)
  96. * +-+-+-+-+-+-+-+-+
  97. *
  98. * M: The most significant bit of the first octet is an extension flag.
  99. * PictureID: 8 or 16 bits including the M bit.
  100. */
  101. if (has_pic_id) {
  102. if (len < 1) {
  103. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  104. return AVERROR_INVALIDDATA;
  105. }
  106. /* check for 1-byte or 2-byte picture index */
  107. if (buf[0] & 0x80) {
  108. if (len < 2) {
  109. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  110. return AVERROR_INVALIDDATA;
  111. }
  112. pic_id = AV_RB16(buf) & 0x7fff;
  113. buf += 2;
  114. len -= 2;
  115. } else {
  116. pic_id = buf[0] & 0x7f;
  117. buf++;
  118. len--;
  119. }
  120. }
  121. /*
  122. * decode layer indices
  123. *
  124. * 0 1 2 3 4 5 6 7
  125. * +-+-+-+-+-+-+-+-+
  126. * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
  127. * +-+-+-+-+-+-+-+-+
  128. *
  129. * T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
  130. * If "F" is set in the initial octet, R is 2 bits representing the number
  131. * of reference fields this frame refers to.
  132. */
  133. if (has_layer_idc) {
  134. if (len < 1) {
  135. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  136. return AVERROR_INVALIDDATA;
  137. }
  138. layer_temporal = buf[0] & 0xC0;
  139. layer_spatial = buf[0] & 0x30;
  140. layer_quality = buf[0] & 0x0C;
  141. if (has_ref_idc) {
  142. ref_fields = buf[0] & 0x03;
  143. if (ref_fields)
  144. non_key_frame = 1;
  145. }
  146. buf++;
  147. len--;
  148. }
  149. /*
  150. * decode the reference fields
  151. *
  152. * 0 1 2 3 4 5 6 7
  153. * +-+-+-+-+-+-+-+-+ -\
  154. * F: | PID |X| RS| RQ| (OPTIONAL) .
  155. * +-+-+-+-+-+-+-+-+ . - R times
  156. * X: | EXTENDED PID | (OPTIONAL) .
  157. * +-+-+-+-+-+-+-+-+ -/
  158. *
  159. * PID: The relative Picture ID referred to by this frame.
  160. * RS and RQ: The spatial and quality layer IDs.
  161. * X: 1 if this layer index has an extended relative Picture ID.
  162. */
  163. if (has_ref_idc) {
  164. while (ref_fields) {
  165. if (len < 1) {
  166. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  167. return AVERROR_INVALIDDATA;
  168. }
  169. has_ref_field_ext_pic_id = buf[0] & 0x10;
  170. /* pass ref. field */
  171. if (has_ref_field_ext_pic_id) {
  172. if (len < 2) {
  173. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  174. return AVERROR_INVALIDDATA;
  175. }
  176. /* ignore ref. data */
  177. buf += 2;
  178. len -= 2;
  179. } else {
  180. /* ignore ref. data */
  181. buf++;
  182. len--;
  183. }
  184. ref_fields--;
  185. }
  186. }
  187. /*
  188. * decode the scalability structure (SS)
  189. *
  190. * 0 1 2 3 4 5 6 7
  191. * +-+-+-+-+-+-+-+-+
  192. * V: | PATTERN LENGTH|
  193. * +-+-+-+-+-+-+-+-+ -\
  194. * | T | S | Q | R | (OPTIONAL) .
  195. * +-+-+-+-+-+-+-+-+ -\ .
  196. * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times
  197. * +-+-+-+-+-+-+-+-+ . - R times .
  198. * X: | EXTENDED PID | (OPTIONAL) . .
  199. * +-+-+-+-+-+-+-+-+ -/ -/
  200. *
  201. * PID: The relative Picture ID referred to by this frame.
  202. * RS and RQ: The spatial and quality layer IDs.
  203. * X: 1 if this layer index has an extended relative Picture ID.
  204. */
  205. if (has_ss_data) {
  206. avpriv_report_missing_feature(ctx, "VP9 scalability structure data");
  207. return AVERROR(ENOSYS);
  208. }
  209. /*
  210. * decode the scalability update structure (SU)
  211. *
  212. * spec. is tbd
  213. */
  214. if (has_su_data) {
  215. avpriv_report_missing_feature(ctx, "VP9 scalability update structure data");
  216. return AVERROR(ENOSYS);
  217. }
  218. /*
  219. * decode the VP9 payload header
  220. *
  221. * spec. is tbd
  222. */
  223. //XXX: implement when specified
  224. /* sanity check: 1 byte payload as minimum */
  225. if (len < 1) {
  226. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  227. return AVERROR_INVALIDDATA;
  228. }
  229. /* start frame buffering with new dynamic buffer */
  230. if (!rtp_vp9_ctx->buf) {
  231. /* sanity check: a new frame should have started */
  232. if (first_fragment) {
  233. res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
  234. if (res < 0)
  235. return res;
  236. /* update the timestamp in the frame packet with the one from the RTP packet */
  237. rtp_vp9_ctx->timestamp = *timestamp;
  238. } else {
  239. /* frame not started yet, need more packets */
  240. return AVERROR(EAGAIN);
  241. }
  242. }
  243. /* write the fragment to the dyn. buffer */
  244. avio_write(rtp_vp9_ctx->buf, buf, len);
  245. /* do we need more fragments? */
  246. if (!last_fragment)
  247. return AVERROR(EAGAIN);
  248. /* close frame buffering and create resulting A/V packet */
  249. res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
  250. if (res < 0)
  251. return res;
  252. return 0;
  253. }
  254. RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
  255. .enc_name = "VP9",
  256. .codec_type = AVMEDIA_TYPE_VIDEO,
  257. .codec_id = AV_CODEC_ID_VP9,
  258. .priv_data_size = sizeof(PayloadContext),
  259. .init = vp9_init,
  260. .parse_packet = vp9_handle_packet
  261. };