You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

738 lines
26KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2002 Michael Niedermayer
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. *
  19. */
  20. //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
  21. //Note, the last line is there to kill these ugly unused var warnings
  22. #define LOAD_COMMON(x, y)\
  23. uint32_t * const score_map= s->me.score_map;\
  24. const int stride= s->linesize;\
  25. const int uvstride= s->uvlinesize;\
  26. const int time_pp= s->pp_time;\
  27. const int time_pb= s->pb_time;\
  28. uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
  29. uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
  30. uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
  31. uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
  32. uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
  33. uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
  34. uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
  35. op_pixels_func (*hpel_put)[4];\
  36. op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
  37. op_pixels_func (*chroma_hpel_put)[4];\
  38. qpel_mc_func (*qpel_put)[16];\
  39. qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
  40. const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\
  41. + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\
  42. if(s->no_rounding /*FIXME b_type*/){\
  43. hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
  44. chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
  45. qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
  46. }else{\
  47. hpel_put=& s->dsp.put_pixels_tab[size];\
  48. chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
  49. qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
  50. }
  51. #ifdef CMP_HPEL
  52. #define CHECK_HALF_MV(dx, dy, x, y)\
  53. {\
  54. const int hx= 2*(x)+(dx);\
  55. const int hy= 2*(y)+(dy);\
  56. CMP_HPEL(d, dx, dy, x, y, size);\
  57. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  58. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  59. }
  60. #if 0
  61. static int RENAME(hpel_motion_search)(MpegEncContext * s,
  62. int *mx_ptr, int *my_ptr, int dmin,
  63. int xmin, int ymin, int xmax, int ymax,
  64. int pred_x, int pred_y, Picture *ref_picture,
  65. int n, int size)
  66. {
  67. UINT8 *ptr;
  68. const int xx = 16 * s->mb_x + 8*(n&1);
  69. const int yy = 16 * s->mb_y + 8*(n>>1);
  70. const int mx = *mx_ptr;
  71. const int my = *my_ptr;
  72. LOAD_COMMON(xx, yy);
  73. // INIT;
  74. //FIXME factorize
  75. me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
  76. if(s->no_rounding /*FIXME b_type*/){
  77. hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
  78. chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
  79. }else{
  80. hpel_put=& s->dsp.put_pixels_tab[size];
  81. chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
  82. }
  83. cmp= s->dsp.me_cmp[size];
  84. chroma_cmp= s->dsp.me_cmp[size+1];
  85. cmp_sub= s->dsp.me_sub_cmp[size];
  86. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  87. if(s->me.skip){ //FIXME somehow move up (benchmark)
  88. *mx_ptr = 0;
  89. *my_ptr = 0;
  90. return dmin;
  91. }
  92. if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
  93. CMP_HPEL(dmin, 0, 0, mx, my, size);
  94. if(mx || my)
  95. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  96. }
  97. if (mx > xmin && mx < xmax &&
  98. my > ymin && my < ymax) {
  99. int bx=2*mx, by=2*my;
  100. int d= dmin;
  101. CHECK_HALF_MV(1, 1, mx-1, my-1)
  102. CHECK_HALF_MV(0, 1, mx , my-1)
  103. CHECK_HALF_MV(1, 1, mx , my-1)
  104. CHECK_HALF_MV(1, 0, mx-1, my )
  105. CHECK_HALF_MV(1, 0, mx , my )
  106. CHECK_HALF_MV(1, 1, mx-1, my )
  107. CHECK_HALF_MV(0, 1, mx , my )
  108. CHECK_HALF_MV(1, 1, mx , my )
  109. assert(bx < xmin*2 || bx > xmax*2 || by < ymin*2 || by > ymax*2);
  110. *mx_ptr = bx;
  111. *my_ptr = by;
  112. }else{
  113. *mx_ptr =2*mx;
  114. *my_ptr =2*my;
  115. }
  116. return dmin;
  117. }
  118. #else
  119. static int RENAME(hpel_motion_search)(MpegEncContext * s,
  120. int *mx_ptr, int *my_ptr, int dmin,
  121. int xmin, int ymin, int xmax, int ymax,
  122. int pred_x, int pred_y, Picture *ref_picture,
  123. int n, int size, uint16_t * const mv_penalty)
  124. {
  125. const int xx = 16 * s->mb_x + 8*(n&1);
  126. const int yy = 16 * s->mb_y + 8*(n>>1);
  127. const int mx = *mx_ptr;
  128. const int my = *my_ptr;
  129. const int penalty_factor= s->me.sub_penalty_factor;
  130. me_cmp_func cmp_sub, chroma_cmp_sub;
  131. LOAD_COMMON(xx, yy);
  132. //FIXME factorize
  133. cmp_sub= s->dsp.me_sub_cmp[size];
  134. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  135. if(s->me.skip){ //FIXME move out of hpel?
  136. *mx_ptr = 0;
  137. *my_ptr = 0;
  138. return dmin;
  139. }
  140. if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
  141. CMP_HPEL(dmin, 0, 0, mx, my, size);
  142. if(mx || my)
  143. dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
  144. }
  145. if (mx > xmin && mx < xmax &&
  146. my > ymin && my < ymax) {
  147. int bx=2*mx, by=2*my;
  148. int d= dmin;
  149. const int index= (my<<ME_MAP_SHIFT) + mx;
  150. const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  151. + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*penalty_factor;
  152. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
  153. + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
  154. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
  155. + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
  156. const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
  157. + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*penalty_factor;
  158. if(t<=b){
  159. CHECK_HALF_MV(0, 1, mx ,my-1)
  160. if(l<=r){
  161. CHECK_HALF_MV(1, 1, mx-1, my-1)
  162. if(t+r<=b+l){
  163. CHECK_HALF_MV(1, 1, mx , my-1)
  164. }else{
  165. CHECK_HALF_MV(1, 1, mx-1, my )
  166. }
  167. CHECK_HALF_MV(1, 0, mx-1, my )
  168. }else{
  169. CHECK_HALF_MV(1, 1, mx , my-1)
  170. if(t+l<=b+r){
  171. CHECK_HALF_MV(1, 1, mx-1, my-1)
  172. }else{
  173. CHECK_HALF_MV(1, 1, mx , my )
  174. }
  175. CHECK_HALF_MV(1, 0, mx , my )
  176. }
  177. }else{
  178. if(l<=r){
  179. if(t+l<=b+r){
  180. CHECK_HALF_MV(1, 1, mx-1, my-1)
  181. }else{
  182. CHECK_HALF_MV(1, 1, mx , my )
  183. }
  184. CHECK_HALF_MV(1, 0, mx-1, my)
  185. CHECK_HALF_MV(1, 1, mx-1, my)
  186. }else{
  187. if(t+r<=b+l){
  188. CHECK_HALF_MV(1, 1, mx , my-1)
  189. }else{
  190. CHECK_HALF_MV(1, 1, mx-1, my)
  191. }
  192. CHECK_HALF_MV(1, 0, mx , my)
  193. CHECK_HALF_MV(1, 1, mx , my)
  194. }
  195. CHECK_HALF_MV(0, 1, mx , my)
  196. }
  197. assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
  198. *mx_ptr = bx;
  199. *my_ptr = by;
  200. }else{
  201. *mx_ptr =2*mx;
  202. *my_ptr =2*my;
  203. }
  204. return dmin;
  205. }
  206. #endif
  207. #endif /* CMP_HPEL */
  208. #ifdef CMP_QPEL
  209. #define CHECK_QUARTER_MV(dx, dy, x, y)\
  210. {\
  211. const int hx= 4*(x)+(dx);\
  212. const int hy= 4*(y)+(dy);\
  213. CMP_QPEL(d, dx, dy, x, y, size);\
  214. d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
  215. COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
  216. }
  217. static int RENAME(qpel_motion_search)(MpegEncContext * s,
  218. int *mx_ptr, int *my_ptr, int dmin,
  219. int xmin, int ymin, int xmax, int ymax,
  220. int pred_x, int pred_y, Picture *ref_picture,
  221. int n, int size, uint16_t * const mv_penalty)
  222. {
  223. const int xx = 16 * s->mb_x + 8*(n&1);
  224. const int yy = 16 * s->mb_y + 8*(n>>1);
  225. const int mx = *mx_ptr;
  226. const int my = *my_ptr;
  227. const int penalty_factor= s->me.sub_penalty_factor;
  228. const int map_generation= s->me.map_generation;
  229. uint32_t *map= s->me.map;
  230. me_cmp_func cmp, chroma_cmp;
  231. me_cmp_func cmp_sub, chroma_cmp_sub;
  232. LOAD_COMMON(xx, yy);
  233. cmp= s->dsp.me_cmp[size];
  234. chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
  235. //FIXME factorize
  236. cmp_sub= s->dsp.me_sub_cmp[size];
  237. chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
  238. if(s->me.skip){ //FIXME somehow move up (benchmark)
  239. *mx_ptr = 0;
  240. *my_ptr = 0;
  241. return dmin;
  242. }
  243. if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
  244. CMP_QPEL(dmin, 0, 0, mx, my, size);
  245. if(mx || my)
  246. dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
  247. }
  248. if (mx > xmin && mx < xmax &&
  249. my > ymin && my < ymax) {
  250. int bx=4*mx, by=4*my;
  251. int d= dmin;
  252. int i, nx, ny;
  253. const int index= (my<<ME_MAP_SHIFT) + mx;
  254. const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  255. const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
  256. const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
  257. const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
  258. const int c= score_map[(index )&(ME_MAP_SIZE-1)];
  259. int best[8];
  260. int best_pos[8][2];
  261. memset(best, 64, sizeof(int)*8);
  262. #if 1
  263. if(s->avctx->dia_size>=2){
  264. const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  265. const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  266. const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  267. const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
  268. for(ny= -3; ny <= 3; ny++){
  269. for(nx= -3; nx <= 3; nx++){
  270. const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
  271. const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
  272. const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
  273. int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2;
  274. int i;
  275. if((nx&3)==0 && (ny&3)==0) continue;
  276. score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  277. // if(nx&1) score-=1024*s->me.penalty_factor;
  278. // if(ny&1) score-=1024*s->me.penalty_factor;
  279. for(i=0; i<8; i++){
  280. if(score < best[i]){
  281. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  282. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  283. best[i]= score;
  284. best_pos[i][0]= nx + 4*mx;
  285. best_pos[i][1]= ny + 4*my;
  286. break;
  287. }
  288. }
  289. }
  290. }
  291. }else{
  292. int tl;
  293. const int cx = 4*(r - l);
  294. const int cx2= r + l - 2*c;
  295. const int cy = 4*(b - t);
  296. const int cy2= b + t - 2*c;
  297. int cxy;
  298. if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
  299. tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
  300. }else{
  301. CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different
  302. }
  303. cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
  304. assert(16*cx2 + 4*cx + 32*c == 32*r);
  305. assert(16*cx2 - 4*cx + 32*c == 32*l);
  306. assert(16*cy2 + 4*cy + 32*c == 32*b);
  307. assert(16*cy2 - 4*cy + 32*c == 32*t);
  308. assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
  309. for(ny= -3; ny <= 3; ny++){
  310. for(nx= -3; nx <= 3; nx++){
  311. int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
  312. int i;
  313. if((nx&3)==0 && (ny&3)==0) continue;
  314. score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
  315. // if(nx&1) score-=32*s->me.penalty_factor;
  316. // if(ny&1) score-=32*s->me.penalty_factor;
  317. for(i=0; i<8; i++){
  318. if(score < best[i]){
  319. memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
  320. memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
  321. best[i]= score;
  322. best_pos[i][0]= nx + 4*mx;
  323. best_pos[i][1]= ny + 4*my;
  324. break;
  325. }
  326. }
  327. }
  328. }
  329. }
  330. for(i=0; i<8; i++){
  331. nx= best_pos[i][0];
  332. ny= best_pos[i][1];
  333. CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
  334. }
  335. #if 0
  336. nx= FFMAX(4*mx - bx, bx - 4*mx);
  337. ny= FFMAX(4*my - by, by - 4*my);
  338. static int stats[4][4];
  339. stats[nx][ny]++;
  340. if(256*256*256*64 % (stats[0][0]+1) ==0){
  341. for(i=0; i<16; i++){
  342. if((i&3)==0) printf("\n");
  343. printf("%6d ", stats[0][i]);
  344. }
  345. printf("\n");
  346. }
  347. #endif
  348. #else
  349. CHECK_QUARTER_MV(2, 2, mx-1, my-1)
  350. CHECK_QUARTER_MV(0, 2, mx , my-1)
  351. CHECK_QUARTER_MV(2, 2, mx , my-1)
  352. CHECK_QUARTER_MV(2, 0, mx , my )
  353. CHECK_QUARTER_MV(2, 2, mx , my )
  354. CHECK_QUARTER_MV(0, 2, mx , my )
  355. CHECK_QUARTER_MV(2, 2, mx-1, my )
  356. CHECK_QUARTER_MV(2, 0, mx-1, my )
  357. nx= bx;
  358. ny= by;
  359. for(i=0; i<8; i++){
  360. int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
  361. int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
  362. CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
  363. }
  364. #endif
  365. #if 0
  366. //outer ring
  367. CHECK_QUARTER_MV(1, 3, mx-1, my-1)
  368. CHECK_QUARTER_MV(1, 2, mx-1, my-1)
  369. CHECK_QUARTER_MV(1, 1, mx-1, my-1)
  370. CHECK_QUARTER_MV(2, 1, mx-1, my-1)
  371. CHECK_QUARTER_MV(3, 1, mx-1, my-1)
  372. CHECK_QUARTER_MV(0, 1, mx , my-1)
  373. CHECK_QUARTER_MV(1, 1, mx , my-1)
  374. CHECK_QUARTER_MV(2, 1, mx , my-1)
  375. CHECK_QUARTER_MV(3, 1, mx , my-1)
  376. CHECK_QUARTER_MV(3, 2, mx , my-1)
  377. CHECK_QUARTER_MV(3, 3, mx , my-1)
  378. CHECK_QUARTER_MV(3, 0, mx , my )
  379. CHECK_QUARTER_MV(3, 1, mx , my )
  380. CHECK_QUARTER_MV(3, 2, mx , my )
  381. CHECK_QUARTER_MV(3, 3, mx , my )
  382. CHECK_QUARTER_MV(2, 3, mx , my )
  383. CHECK_QUARTER_MV(1, 3, mx , my )
  384. CHECK_QUARTER_MV(0, 3, mx , my )
  385. CHECK_QUARTER_MV(3, 3, mx-1, my )
  386. CHECK_QUARTER_MV(2, 3, mx-1, my )
  387. CHECK_QUARTER_MV(1, 3, mx-1, my )
  388. CHECK_QUARTER_MV(1, 2, mx-1, my )
  389. CHECK_QUARTER_MV(1, 1, mx-1, my )
  390. CHECK_QUARTER_MV(1, 0, mx-1, my )
  391. #endif
  392. assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
  393. *mx_ptr = bx;
  394. *my_ptr = by;
  395. }else{
  396. *mx_ptr =4*mx;
  397. *my_ptr =4*my;
  398. }
  399. return dmin;
  400. }
  401. #endif /* CMP_QPEL */
  402. #define CHECK_MV(x,y)\
  403. {\
  404. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  405. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  406. if(map[index]!=key){\
  407. CMP(d, x, y, size);\
  408. map[index]= key;\
  409. score_map[index]= d;\
  410. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  411. COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
  412. }\
  413. }
  414. #define CHECK_MV_DIR(x,y,new_dir)\
  415. {\
  416. const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
  417. const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
  418. if(map[index]!=key){\
  419. CMP(d, x, y, size);\
  420. map[index]= key;\
  421. score_map[index]= d;\
  422. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
  423. if(d<dmin){\
  424. best[0]=x;\
  425. best[1]=y;\
  426. dmin=d;\
  427. next_dir= new_dir;\
  428. }\
  429. }\
  430. }
  431. #define check(x,y,S,v)\
  432. if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
  433. if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
  434. if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
  435. if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
  436. static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
  437. Picture *ref_picture,
  438. int const pred_x, int const pred_y, int const penalty_factor,
  439. int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
  440. uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
  441. )
  442. {
  443. me_cmp_func cmp, chroma_cmp;
  444. int next_dir=-1;
  445. LOAD_COMMON(s->mb_x*16, s->mb_y*16);
  446. cmp= s->dsp.me_cmp[size];
  447. chroma_cmp= s->dsp.me_cmp[size+1];
  448. for(;;){
  449. int d;
  450. const int dir= next_dir;
  451. const int x= best[0];
  452. const int y= best[1];
  453. next_dir=-1;
  454. //printf("%d", dir);
  455. if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
  456. if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
  457. if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
  458. if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
  459. if(next_dir==-1){
  460. return dmin;
  461. }
  462. }
  463. }
  464. static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
  465. Picture *ref_picture,
  466. int const pred_x, int const pred_y, int const penalty_factor,
  467. int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
  468. uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
  469. )
  470. {
  471. me_cmp_func cmp, chroma_cmp;
  472. int dia_size=1;
  473. LOAD_COMMON(s->mb_x*16, s->mb_y*16);
  474. cmp= s->dsp.me_cmp[size];
  475. chroma_cmp= s->dsp.me_cmp[size+1];
  476. for(dia_size=1; dia_size<=s->avctx->dia_size; dia_size++){
  477. int dir, start, end;
  478. const int x= best[0];
  479. const int y= best[1];
  480. start= FFMAX(0, y + dia_size - ymax);
  481. end = FFMIN(dia_size, xmax - x);
  482. for(dir= start; dir<end; dir++){
  483. int d;
  484. //check(x + dir,y + dia_size - dir,0, a0)
  485. CHECK_MV(x + dir , y + dia_size - dir);
  486. }
  487. start= FFMAX(0, x + dia_size - xmax);
  488. end = FFMIN(dia_size, y - ymin);
  489. for(dir= start; dir<end; dir++){
  490. int d;
  491. //check(x + dia_size - dir, y - dir,0, a1)
  492. CHECK_MV(x + dia_size - dir, y - dir );
  493. }
  494. start= FFMAX(0, -y + dia_size + ymin );
  495. end = FFMIN(dia_size, x - xmin);
  496. for(dir= start; dir<end; dir++){
  497. int d;
  498. //check(x - dir,y - dia_size + dir,0, a2)
  499. CHECK_MV(x - dir , y - dia_size + dir);
  500. }
  501. start= FFMAX(0, -x + dia_size + xmin );
  502. end = FFMIN(dia_size, ymax - y);
  503. for(dir= start; dir<end; dir++){
  504. int d;
  505. //check(x - dia_size + dir, y + dir,0, a3)
  506. CHECK_MV(x - dia_size + dir, y + dir );
  507. }
  508. if(x!=best[0] || y!=best[1])
  509. dia_size=0;
  510. }
  511. return dmin;
  512. }
  513. static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
  514. int *mx_ptr, int *my_ptr,
  515. int P[10][2], int pred_x, int pred_y,
  516. int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
  517. {
  518. int best[2]={0, 0};
  519. int d, dmin;
  520. const int shift= 1+s->quarter_sample;
  521. uint32_t *map= s->me.map;
  522. int map_generation;
  523. const int penalty_factor= s->me.penalty_factor;
  524. const int size=0;
  525. me_cmp_func cmp, chroma_cmp;
  526. LOAD_COMMON(s->mb_x*16, s->mb_y*16);
  527. cmp= s->dsp.me_cmp[size];
  528. chroma_cmp= s->dsp.me_cmp[size+1];
  529. map_generation= update_map_generation(s);
  530. CMP(dmin, 0, 0, size);
  531. map[0]= map_generation;
  532. score_map[0]= dmin;
  533. /* first line */
  534. if ((s->mb_y == 0 || s->first_slice_line)) {
  535. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  536. CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
  537. }else{
  538. if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
  539. |P_TOP[0] |P_TOP[1]
  540. |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0 && s->avctx->dia_size==0){
  541. *mx_ptr= 0;
  542. *my_ptr= 0;
  543. s->me.skip=1;
  544. return dmin;
  545. }
  546. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  547. if(dmin>256*2){
  548. CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
  549. CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
  550. CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
  551. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  552. }
  553. }
  554. if(dmin>256*4){
  555. CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
  556. CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
  557. }
  558. #if 0 //doest only slow things down
  559. if(dmin>512*3){
  560. int step;
  561. dmin= score_map[0];
  562. best[0]= best[1]=0;
  563. for(step=128; step>0; step>>=1){
  564. const int step2= step;
  565. int y;
  566. for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
  567. int x;
  568. if(y<ymin || y>ymax) continue;
  569. for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
  570. if(x<xmin || x>xmax) continue;
  571. if(x==best[0] && y==best[1]) continue;
  572. CHECK_MV(x,y)
  573. }
  574. }
  575. }
  576. }
  577. #endif
  578. //check(best[0],best[1],0, b0)
  579. if(s->avctx->dia_size<2)
  580. dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
  581. pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
  582. shift, map, map_generation, size, mv_penalty);
  583. else
  584. dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
  585. pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
  586. shift, map, map_generation, size, mv_penalty);
  587. //check(best[0],best[1],0, b1)
  588. *mx_ptr= best[0];
  589. *my_ptr= best[1];
  590. // printf("%d %d %d \n", best[0], best[1], dmin);
  591. return dmin;
  592. }
  593. #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
  594. static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
  595. int *mx_ptr, int *my_ptr,
  596. int P[10][2], int pred_x, int pred_y,
  597. int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
  598. {
  599. int best[2]={0, 0};
  600. int d, dmin;
  601. const int shift= 1+s->quarter_sample;
  602. uint32_t *map= s->me.map;
  603. int map_generation;
  604. const int penalty_factor= s->me.penalty_factor;
  605. const int size=1;
  606. me_cmp_func cmp, chroma_cmp;
  607. LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
  608. cmp= s->dsp.me_cmp[size];
  609. chroma_cmp= s->dsp.me_cmp[size+1];
  610. map_generation= update_map_generation(s);
  611. dmin = 1000000;
  612. //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
  613. /* first line */
  614. if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
  615. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  616. CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
  617. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  618. }else{
  619. CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
  620. //FIXME try some early stop
  621. if(dmin>64*2){
  622. CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
  623. CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
  624. CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
  625. CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
  626. CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
  627. }
  628. }
  629. if(dmin>64*4){
  630. CHECK_MV(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
  631. CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
  632. }
  633. if(s->avctx->dia_size<2)
  634. dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
  635. pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
  636. shift, map, map_generation, size, mv_penalty);
  637. else
  638. dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
  639. pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
  640. shift, map, map_generation, size, mv_penalty);
  641. *mx_ptr= best[0];
  642. *my_ptr= best[1];
  643. // printf("%d %d %d \n", best[0], best[1], dmin);
  644. return dmin;
  645. }
  646. #endif /* !CMP_DIRECT */