You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

283 lines
14KB

  1. static inline void FUNC(mc_dir_part)(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
  2. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  3. int src_x_offset, int src_y_offset,
  4. qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
  5. MpegEncContext * const s = &h->s;
  6. const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
  7. int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
  8. const int luma_xy= (mx&3) + ((my&3)<<2);
  9. uint8_t * src_y = pic->data[0] + ((mx>>2)<<PIXEL_SHIFT) + (my>>2)*h->mb_linesize;
  10. uint8_t * src_cb, * src_cr;
  11. int extra_width= h->emu_edge_width;
  12. int extra_height= h->emu_edge_height;
  13. int emu=0;
  14. const int full_mx= mx>>2;
  15. const int full_my= my>>2;
  16. const int pic_width = 16*s->mb_width;
  17. const int pic_height = 16*s->mb_height >> MB_FIELD;
  18. if(mx&7) extra_width -= 3;
  19. if(my&7) extra_height -= 3;
  20. if( full_mx < 0-extra_width
  21. || full_my < 0-extra_height
  22. || full_mx + 16/*FIXME*/ > pic_width + extra_width
  23. || full_my + 16/*FIXME*/ > pic_height + extra_height){
  24. s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<PIXEL_SHIFT) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
  25. src_y= s->edge_emu_buffer + (2<<PIXEL_SHIFT) + 2*h->mb_linesize;
  26. emu=1;
  27. }
  28. qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
  29. if(!square){
  30. qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
  31. }
  32. if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
  33. if(MB_FIELD){
  34. // chroma offset when predicting from a field of opposite parity
  35. my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
  36. emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
  37. }
  38. src_cb= pic->data[1] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
  39. src_cr= pic->data[2] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
  40. if(emu){
  41. s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
  42. src_cb= s->edge_emu_buffer;
  43. }
  44. chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
  45. if(emu){
  46. s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
  47. src_cr= s->edge_emu_buffer;
  48. }
  49. chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
  50. }
  51. static inline void FUNC(mc_part_std)(H264Context *h, int n, int square, int chroma_height, int delta,
  52. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  53. int x_offset, int y_offset,
  54. qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
  55. qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
  56. int list0, int list1){
  57. MpegEncContext * const s = &h->s;
  58. qpel_mc_func *qpix_op= qpix_put;
  59. h264_chroma_mc_func chroma_op= chroma_put;
  60. dest_y += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h-> mb_linesize;
  61. dest_cb += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize;
  62. dest_cr += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize;
  63. x_offset += 8*s->mb_x;
  64. y_offset += 8*(s->mb_y >> MB_FIELD);
  65. if(list0){
  66. Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
  67. FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 0,
  68. dest_y, dest_cb, dest_cr, x_offset, y_offset,
  69. qpix_op, chroma_op);
  70. qpix_op= qpix_avg;
  71. chroma_op= chroma_avg;
  72. }
  73. if(list1){
  74. Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
  75. FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 1,
  76. dest_y, dest_cb, dest_cr, x_offset, y_offset,
  77. qpix_op, chroma_op);
  78. }
  79. }
  80. static inline void FUNC(mc_part_weighted)(H264Context *h, int n, int square, int chroma_height, int delta,
  81. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  82. int x_offset, int y_offset,
  83. qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
  84. h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
  85. h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
  86. int list0, int list1){
  87. MpegEncContext * const s = &h->s;
  88. dest_y += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h-> mb_linesize;
  89. dest_cb += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize;
  90. dest_cr += ( x_offset<<PIXEL_SHIFT) + y_offset*h->mb_uvlinesize;
  91. x_offset += 8*s->mb_x;
  92. y_offset += 8*(s->mb_y >> MB_FIELD);
  93. if(list0 && list1){
  94. /* don't optimize for luma-only case, since B-frames usually
  95. * use implicit weights => chroma too. */
  96. uint8_t *tmp_cb = s->obmc_scratchpad;
  97. uint8_t *tmp_cr = s->obmc_scratchpad + (8<<PIXEL_SHIFT);
  98. uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
  99. int refn0 = h->ref_cache[0][ scan8[n] ];
  100. int refn1 = h->ref_cache[1][ scan8[n] ];
  101. FUNC(mc_dir_part)(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
  102. dest_y, dest_cb, dest_cr,
  103. x_offset, y_offset, qpix_put, chroma_put);
  104. FUNC(mc_dir_part)(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
  105. tmp_y, tmp_cb, tmp_cr,
  106. x_offset, y_offset, qpix_put, chroma_put);
  107. if(h->use_weight == 2){
  108. int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
  109. int weight1 = 64 - weight0;
  110. luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
  111. chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
  112. chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
  113. }else{
  114. luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
  115. h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
  116. h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
  117. chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
  118. h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
  119. h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
  120. chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
  121. h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
  122. h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
  123. }
  124. }else{
  125. int list = list1 ? 1 : 0;
  126. int refn = h->ref_cache[list][ scan8[n] ];
  127. Picture *ref= &h->ref_list[list][refn];
  128. FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, list,
  129. dest_y, dest_cb, dest_cr, x_offset, y_offset,
  130. qpix_put, chroma_put);
  131. luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
  132. h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
  133. if(h->use_weight_chroma){
  134. chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
  135. h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
  136. chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
  137. h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
  138. }
  139. }
  140. }
  141. static inline void FUNC(mc_part)(H264Context *h, int n, int square, int chroma_height, int delta,
  142. uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  143. int x_offset, int y_offset,
  144. qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
  145. qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
  146. h264_weight_func *weight_op, h264_biweight_func *weight_avg,
  147. int list0, int list1){
  148. if((h->use_weight==2 && list0 && list1
  149. && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
  150. || h->use_weight==1)
  151. FUNC(mc_part_weighted)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
  152. x_offset, y_offset, qpix_put, chroma_put,
  153. weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
  154. else
  155. FUNC(mc_part_std)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
  156. x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
  157. }
  158. static inline void FUNC(prefetch_motion)(H264Context *h, int list){
  159. /* fetch pixels for estimated mv 4 macroblocks ahead
  160. * optimized for 64byte cache lines */
  161. MpegEncContext * const s = &h->s;
  162. const int refn = h->ref_cache[list][scan8[0]];
  163. if(refn >= 0){
  164. const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
  165. const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
  166. uint8_t **src= h->ref_list[list][refn].data;
  167. int off= ((mx+64)<<PIXEL_SHIFT) + (my + (s->mb_x&3)*4)*h->mb_linesize;
  168. s->dsp.prefetch(src[0]+off, s->linesize, 4);
  169. off= (((mx>>1)+64)<<PIXEL_SHIFT) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
  170. s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
  171. }
  172. }
  173. static void FUNC(hl_motion)(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
  174. qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
  175. qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
  176. h264_weight_func *weight_op, h264_biweight_func *weight_avg){
  177. MpegEncContext * const s = &h->s;
  178. const int mb_xy= h->mb_xy;
  179. const int mb_type= s->current_picture.mb_type[mb_xy];
  180. assert(IS_INTER(mb_type));
  181. if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
  182. await_references(h);
  183. FUNC(prefetch_motion)(h, 0);
  184. if(IS_16X16(mb_type)){
  185. FUNC(mc_part)(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
  186. qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
  187. weight_op, weight_avg,
  188. IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
  189. }else if(IS_16X8(mb_type)){
  190. FUNC(mc_part)(h, 0, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 0,
  191. qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
  192. &weight_op[1], &weight_avg[1],
  193. IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
  194. FUNC(mc_part)(h, 8, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 4,
  195. qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
  196. &weight_op[1], &weight_avg[1],
  197. IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
  198. }else if(IS_8X16(mb_type)){
  199. FUNC(mc_part)(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
  200. qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
  201. &weight_op[2], &weight_avg[2],
  202. IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
  203. FUNC(mc_part)(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
  204. qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
  205. &weight_op[2], &weight_avg[2],
  206. IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
  207. }else{
  208. int i;
  209. assert(IS_8X8(mb_type));
  210. for(i=0; i<4; i++){
  211. const int sub_mb_type= h->sub_mb_type[i];
  212. const int n= 4*i;
  213. int x_offset= (i&1)<<2;
  214. int y_offset= (i&2)<<1;
  215. if(IS_SUB_8X8(sub_mb_type)){
  216. FUNC(mc_part)(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
  217. qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
  218. &weight_op[3], &weight_avg[3],
  219. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  220. }else if(IS_SUB_8X4(sub_mb_type)){
  221. FUNC(mc_part)(h, n , 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset,
  222. qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
  223. &weight_op[4], &weight_avg[4],
  224. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  225. FUNC(mc_part)(h, n+2, 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
  226. qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
  227. &weight_op[4], &weight_avg[4],
  228. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  229. }else if(IS_SUB_4X8(sub_mb_type)){
  230. FUNC(mc_part)(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
  231. qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
  232. &weight_op[5], &weight_avg[5],
  233. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  234. FUNC(mc_part)(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
  235. qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
  236. &weight_op[5], &weight_avg[5],
  237. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  238. }else{
  239. int j;
  240. assert(IS_SUB_4X4(sub_mb_type));
  241. for(j=0; j<4; j++){
  242. int sub_x_offset= x_offset + 2*(j&1);
  243. int sub_y_offset= y_offset + (j&2);
  244. FUNC(mc_part)(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
  245. qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
  246. &weight_op[6], &weight_avg[6],
  247. IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
  248. }
  249. }
  250. }
  251. }
  252. FUNC(prefetch_motion)(h, 1);
  253. }