You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

342 lines
11KB

  1. /*
  2. * RTP parser for VP9 payload format (draft version 02) - experimental
  3. * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/intreadwrite.h"
  22. #include "avio_internal.h"
  23. #include "rtpdec_formats.h"
  24. #define RTP_VP9_DESC_REQUIRED_SIZE 1
  25. struct PayloadContext {
  26. AVIOContext *buf;
  27. uint32_t timestamp;
  28. };
  29. static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
  30. PayloadContext *data)
  31. {
  32. av_log(ctx, AV_LOG_WARNING,
  33. "RTP/VP9 support is still experimental\n");
  34. return 0;
  35. }
  36. static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
  37. AVStream *st, AVPacket *pkt, uint32_t *timestamp,
  38. const uint8_t *buf, int len, uint16_t seq,
  39. int flags)
  40. {
  41. int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
  42. av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
  43. av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
  44. int ref_fields = 0, has_ref_field_ext_pic_id = 0;
  45. int first_fragment, last_fragment;
  46. int rtp_m;
  47. int res = 0;
  48. /* drop data of previous packets in case of non-continuous (lossy) packet stream */
  49. if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp)
  50. ffio_free_dyn_buf(&rtp_vp9_ctx->buf);
  51. /* sanity check for size of input packet: 1 byte payload at least */
  52. if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
  53. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
  54. return AVERROR_INVALIDDATA;
  55. }
  56. /*
  57. * decode the required VP9 payload descriptor according to section 4.2 of the spec.:
  58. *
  59. * 0 1 2 3 4 5 6 7
  60. * +-+-+-+-+-+-+-+-+
  61. * |I|P|L|F|B|E|V|-| (REQUIRED)
  62. * +-+-+-+-+-+-+-+-+
  63. *
  64. * I: PictureID present
  65. * P: Inter-picture predicted layer frame
  66. * L: Layer indices present
  67. * F: Flexible mode
  68. * B: Start of VP9 frame
  69. * E: End of picture
  70. * V: Scalability Structure (SS) present
  71. */
  72. has_pic_id = !!(buf[0] & 0x80);
  73. inter_picture_layer_frame = !!(buf[0] & 0x40);
  74. has_layer_idc = !!(buf[0] & 0x20);
  75. has_ref_idc = !!(buf[0] & 0x10);
  76. first_fragment = !!(buf[0] & 0x08);
  77. last_fragment = !!(buf[0] & 0x04);
  78. has_ss_data = !!(buf[0] & 0x02);
  79. rtp_m = !!(flags & RTP_FLAG_MARKER);
  80. /* sanity check for markers: B should always be equal to the RTP M marker */
  81. if (last_fragment != rtp_m) {
  82. av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
  83. return AVERROR_INVALIDDATA;
  84. }
  85. /* pass the extensions field */
  86. buf += RTP_VP9_DESC_REQUIRED_SIZE;
  87. len -= RTP_VP9_DESC_REQUIRED_SIZE;
  88. /*
  89. * decode the 1-byte/2-byte picture ID:
  90. *
  91. * 0 1 2 3 4 5 6 7
  92. * +-+-+-+-+-+-+-+-+
  93. * I: |M|PICTURE ID | (RECOMMENDED)
  94. * +-+-+-+-+-+-+-+-+
  95. * M: | EXTENDED PID | (RECOMMENDED)
  96. * +-+-+-+-+-+-+-+-+
  97. *
  98. * M: The most significant bit of the first octet is an extension flag.
  99. * PictureID: 8 or 16 bits including the M bit.
  100. */
  101. if (has_pic_id) {
  102. /* check for 1-byte or 2-byte picture index */
  103. if (buf[0] & 0x80) {
  104. if (len < 2) {
  105. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  106. return AVERROR_INVALIDDATA;
  107. }
  108. pic_id = AV_RB16(buf) & 0x7fff;
  109. buf += 2;
  110. len -= 2;
  111. } else {
  112. pic_id = buf[0] & 0x7f;
  113. buf++;
  114. len--;
  115. }
  116. }
  117. /*
  118. * decode layer indices
  119. *
  120. * 0 1 2 3 4 5 6 7
  121. * +-+-+-+-+-+-+-+-+
  122. * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
  123. * +-+-+-+-+-+-+-+-+
  124. *
  125. * T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
  126. * If "F" is set in the initial octet, R is 2 bits representing the number
  127. * of reference fields this frame refers to.
  128. */
  129. if (has_layer_idc) {
  130. if (len < 1) {
  131. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  132. return AVERROR_INVALIDDATA;
  133. }
  134. layer_temporal = buf[0] & 0xC0;
  135. layer_spatial = buf[0] & 0x30;
  136. layer_quality = buf[0] & 0x0C;
  137. if (has_ref_idc) {
  138. ref_fields = buf[0] & 0x03;
  139. if (ref_fields)
  140. non_key_frame = 1;
  141. }
  142. buf++;
  143. len--;
  144. }
  145. /*
  146. * decode the reference fields
  147. *
  148. * 0 1 2 3 4 5 6 7
  149. * +-+-+-+-+-+-+-+-+ -\
  150. * F: | PID |X| RS| RQ| (OPTIONAL) .
  151. * +-+-+-+-+-+-+-+-+ . - R times
  152. * X: | EXTENDED PID | (OPTIONAL) .
  153. * +-+-+-+-+-+-+-+-+ -/
  154. *
  155. * PID: The relative Picture ID referred to by this frame.
  156. * RS and RQ: The spatial and quality layer IDs.
  157. * X: 1 if this layer index has an extended relative Picture ID.
  158. */
  159. if (has_ref_idc) {
  160. while (ref_fields) {
  161. if (len < 1) {
  162. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  163. return AVERROR_INVALIDDATA;
  164. }
  165. has_ref_field_ext_pic_id = buf[0] & 0x10;
  166. /* pass ref. field */
  167. if (has_ref_field_ext_pic_id) {
  168. if (len < 2) {
  169. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  170. return AVERROR_INVALIDDATA;
  171. }
  172. /* ignore ref. data */
  173. buf += 2;
  174. len -= 2;
  175. } else {
  176. /* ignore ref. data */
  177. buf++;
  178. len--;
  179. }
  180. ref_fields--;
  181. }
  182. }
  183. /*
  184. * decode the scalability structure (SS)
  185. *
  186. * 0 1 2 3 4 5 6 7
  187. * +-+-+-+-+-+-+-+-+
  188. * V: | PATTERN LENGTH|
  189. * +-+-+-+-+-+-+-+-+ -\
  190. * | T | S | Q | R | (OPTIONAL) .
  191. * +-+-+-+-+-+-+-+-+ -\ .
  192. * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times
  193. * +-+-+-+-+-+-+-+-+ . - R times .
  194. * X: | EXTENDED PID | (OPTIONAL) . .
  195. * +-+-+-+-+-+-+-+-+ -/ -/
  196. *
  197. * PID: The relative Picture ID referred to by this frame.
  198. * RS and RQ: The spatial and quality layer IDs.
  199. * X: 1 if this layer index has an extended relative Picture ID.
  200. */
  201. if (has_ss_data) {
  202. int n_s, y, g, i;
  203. if (len < 1) {
  204. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  205. return AVERROR_INVALIDDATA;
  206. }
  207. n_s = buf[0] >> 5;
  208. y = !!(buf[0] & 0x10);
  209. g = !!(buf[0] & 0x08);
  210. buf++;
  211. len--;
  212. if (n_s > 0) {
  213. avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers");
  214. return AVERROR_PATCHWELCOME;
  215. }
  216. if (y) {
  217. if (len < 4 * (n_s + 1)) {
  218. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  219. return AVERROR_INVALIDDATA;
  220. }
  221. for (i = 0; i < n_s + 1; i++) {
  222. av_unused int w, h;
  223. w = AV_RB16(buf);
  224. h = AV_RB16(buf + 2);
  225. buf += 4;
  226. len -= 4;
  227. }
  228. }
  229. if (g) {
  230. int n_g;
  231. if (len < 1) {
  232. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  233. return AVERROR_INVALIDDATA;
  234. }
  235. n_g = buf[0];
  236. buf++;
  237. len--;
  238. for (i = 0; i < n_g; i++) {
  239. av_unused int t, u, r, j;
  240. if (len < 1) {
  241. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  242. return AVERROR_INVALIDDATA;
  243. }
  244. t = buf[0] >> 5;
  245. u = !!(buf[0] & 0x10);
  246. r = (buf[0] >> 2) & 0x03;
  247. buf++;
  248. len--;
  249. if (len < r) {
  250. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  251. return AVERROR_INVALIDDATA;
  252. }
  253. for (j = 0; j < r; j++) {
  254. av_unused int p_diff = buf[0];
  255. buf++;
  256. len--;
  257. }
  258. }
  259. }
  260. }
  261. /*
  262. * decode the VP9 payload header
  263. *
  264. * spec. is tbd
  265. */
  266. //XXX: implement when specified
  267. /* sanity check: 1 byte payload as minimum */
  268. if (len < 1) {
  269. av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
  270. return AVERROR_INVALIDDATA;
  271. }
  272. /* start frame buffering with new dynamic buffer */
  273. if (!rtp_vp9_ctx->buf) {
  274. /* sanity check: a new frame should have started */
  275. if (first_fragment) {
  276. res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
  277. if (res < 0)
  278. return res;
  279. /* update the timestamp in the frame packet with the one from the RTP packet */
  280. rtp_vp9_ctx->timestamp = *timestamp;
  281. } else {
  282. /* frame not started yet, need more packets */
  283. return AVERROR(EAGAIN);
  284. }
  285. }
  286. /* write the fragment to the dyn. buffer */
  287. avio_write(rtp_vp9_ctx->buf, buf, len);
  288. /* do we need more fragments? */
  289. if (!last_fragment)
  290. return AVERROR(EAGAIN);
  291. /* close frame buffering and create resulting A/V packet */
  292. res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
  293. if (res < 0)
  294. return res;
  295. return 0;
  296. }
  297. static void vp9_close_context(PayloadContext *vp9)
  298. {
  299. ffio_free_dyn_buf(&vp9->buf);
  300. }
  301. RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
  302. .enc_name = "VP9",
  303. .codec_type = AVMEDIA_TYPE_VIDEO,
  304. .codec_id = AV_CODEC_ID_VP9,
  305. .priv_data_size = sizeof(PayloadContext),
  306. .init = vp9_init,
  307. .close = vp9_close_context,
  308. .parse_packet = vp9_handle_packet
  309. };