You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1073 lines
29KB

  1. /*
  2. * Motion estimation
  3. * Copyright (c) 2000,2001 Gerard Lantau.
  4. *
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. *
  20. * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
  21. */
  22. #include <stdlib.h>
  23. #include <stdio.h>
  24. #include "avcodec.h"
  25. #include "dsputil.h"
  26. #include "mpegvideo.h"
  27. #define ABS(a) ((a)>0 ? (a) : -(a))
  28. #define MAX(a,b) ((a) > (b) ? (a) : (b))
  29. #define INTER_BIAS 257
  30. static void halfpel_motion_search(MpegEncContext * s,
  31. int *mx_ptr, int *my_ptr, int dmin,
  32. int xmin, int ymin, int xmax, int ymax,
  33. int pred_x, int pred_y);
  34. /* config it to test motion vector encoding (send random vectors) */
  35. //#define CONFIG_TEST_MV_ENCODE
  36. static int pix_sum(UINT8 * pix, int line_size)
  37. {
  38. int s, i, j;
  39. s = 0;
  40. for (i = 0; i < 16; i++) {
  41. for (j = 0; j < 16; j += 8) {
  42. s += pix[0];
  43. s += pix[1];
  44. s += pix[2];
  45. s += pix[3];
  46. s += pix[4];
  47. s += pix[5];
  48. s += pix[6];
  49. s += pix[7];
  50. pix += 8;
  51. }
  52. pix += line_size - 16;
  53. }
  54. return s;
  55. }
  56. static int pix_dev(UINT8 * pix, int line_size, int mean)
  57. {
  58. int s, i, j;
  59. s = 0;
  60. for (i = 0; i < 16; i++) {
  61. for (j = 0; j < 16; j += 8) {
  62. s += ABS(pix[0]-mean);
  63. s += ABS(pix[1]-mean);
  64. s += ABS(pix[2]-mean);
  65. s += ABS(pix[3]-mean);
  66. s += ABS(pix[4]-mean);
  67. s += ABS(pix[5]-mean);
  68. s += ABS(pix[6]-mean);
  69. s += ABS(pix[7]-mean);
  70. pix += 8;
  71. }
  72. pix += line_size - 16;
  73. }
  74. return s;
  75. }
  76. static int pix_norm1(UINT8 * pix, int line_size)
  77. {
  78. int s, i, j;
  79. UINT32 *sq = squareTbl + 256;
  80. s = 0;
  81. for (i = 0; i < 16; i++) {
  82. for (j = 0; j < 16; j += 8) {
  83. s += sq[pix[0]];
  84. s += sq[pix[1]];
  85. s += sq[pix[2]];
  86. s += sq[pix[3]];
  87. s += sq[pix[4]];
  88. s += sq[pix[5]];
  89. s += sq[pix[6]];
  90. s += sq[pix[7]];
  91. pix += 8;
  92. }
  93. pix += line_size - 16;
  94. }
  95. return s;
  96. }
  97. static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
  98. {
  99. int s, i, j;
  100. UINT32 *sq = squareTbl + 256;
  101. s = 0;
  102. for (i = 0; i < 16; i++) {
  103. for (j = 0; j < 16; j += 8) {
  104. s += sq[pix1[0] - pix2[0]];
  105. s += sq[pix1[1] - pix2[1]];
  106. s += sq[pix1[2] - pix2[2]];
  107. s += sq[pix1[3] - pix2[3]];
  108. s += sq[pix1[4] - pix2[4]];
  109. s += sq[pix1[5] - pix2[5]];
  110. s += sq[pix1[6] - pix2[6]];
  111. s += sq[pix1[7] - pix2[7]];
  112. pix1 += 8;
  113. pix2 += 8;
  114. }
  115. pix1 += line_size - 16;
  116. pix2 += line_size - 16;
  117. }
  118. return s;
  119. }
  120. static void no_motion_search(MpegEncContext * s,
  121. int *mx_ptr, int *my_ptr)
  122. {
  123. *mx_ptr = 16 * s->mb_x;
  124. *my_ptr = 16 * s->mb_y;
  125. }
  126. static int full_motion_search(MpegEncContext * s,
  127. int *mx_ptr, int *my_ptr, int range,
  128. int xmin, int ymin, int xmax, int ymax)
  129. {
  130. int x1, y1, x2, y2, xx, yy, x, y;
  131. int mx, my, dmin, d;
  132. UINT8 *pix;
  133. xx = 16 * s->mb_x;
  134. yy = 16 * s->mb_y;
  135. x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
  136. if (x1 < xmin)
  137. x1 = xmin;
  138. x2 = xx + range - 1;
  139. if (x2 > xmax)
  140. x2 = xmax;
  141. y1 = yy - range + 1;
  142. if (y1 < ymin)
  143. y1 = ymin;
  144. y2 = yy + range - 1;
  145. if (y2 > ymax)
  146. y2 = ymax;
  147. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  148. dmin = 0x7fffffff;
  149. mx = 0;
  150. my = 0;
  151. for (y = y1; y <= y2; y++) {
  152. for (x = x1; x <= x2; x++) {
  153. d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x,
  154. s->linesize);
  155. if (d < dmin ||
  156. (d == dmin &&
  157. (abs(x - xx) + abs(y - yy)) <
  158. (abs(mx - xx) + abs(my - yy)))) {
  159. dmin = d;
  160. mx = x;
  161. my = y;
  162. }
  163. }
  164. }
  165. *mx_ptr = mx;
  166. *my_ptr = my;
  167. #if 0
  168. if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
  169. *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
  170. fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
  171. }
  172. #endif
  173. return dmin;
  174. }
  175. static int log_motion_search(MpegEncContext * s,
  176. int *mx_ptr, int *my_ptr, int range,
  177. int xmin, int ymin, int xmax, int ymax)
  178. {
  179. int x1, y1, x2, y2, xx, yy, x, y;
  180. int mx, my, dmin, d;
  181. UINT8 *pix;
  182. xx = s->mb_x << 4;
  183. yy = s->mb_y << 4;
  184. /* Left limit */
  185. x1 = xx - range;
  186. if (x1 < xmin)
  187. x1 = xmin;
  188. /* Right limit */
  189. x2 = xx + range;
  190. if (x2 > xmax)
  191. x2 = xmax;
  192. /* Upper limit */
  193. y1 = yy - range;
  194. if (y1 < ymin)
  195. y1 = ymin;
  196. /* Lower limit */
  197. y2 = yy + range;
  198. if (y2 > ymax)
  199. y2 = ymax;
  200. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  201. dmin = 0x7fffffff;
  202. mx = 0;
  203. my = 0;
  204. do {
  205. for (y = y1; y <= y2; y += range) {
  206. for (x = x1; x <= x2; x += range) {
  207. d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
  208. if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  209. dmin = d;
  210. mx = x;
  211. my = y;
  212. }
  213. }
  214. }
  215. range = range >> 1;
  216. x1 = mx - range;
  217. if (x1 < xmin)
  218. x1 = xmin;
  219. x2 = mx + range;
  220. if (x2 > xmax)
  221. x2 = xmax;
  222. y1 = my - range;
  223. if (y1 < ymin)
  224. y1 = ymin;
  225. y2 = my + range;
  226. if (y2 > ymax)
  227. y2 = ymax;
  228. } while (range >= 1);
  229. #ifdef DEBUG
  230. fprintf(stderr, "log - MX: %d\tMY: %d\n", mx, my);
  231. #endif
  232. *mx_ptr = mx;
  233. *my_ptr = my;
  234. return dmin;
  235. }
  236. static int phods_motion_search(MpegEncContext * s,
  237. int *mx_ptr, int *my_ptr, int range,
  238. int xmin, int ymin, int xmax, int ymax)
  239. {
  240. int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
  241. int mx, my, dminx, dminy;
  242. UINT8 *pix;
  243. xx = s->mb_x << 4;
  244. yy = s->mb_y << 4;
  245. /* Left limit */
  246. x1 = xx - range;
  247. if (x1 < xmin)
  248. x1 = xmin;
  249. /* Right limit */
  250. x2 = xx + range;
  251. if (x2 > xmax)
  252. x2 = xmax;
  253. /* Upper limit */
  254. y1 = yy - range;
  255. if (y1 < ymin)
  256. y1 = ymin;
  257. /* Lower limit */
  258. y2 = yy + range;
  259. if (y2 > ymax)
  260. y2 = ymax;
  261. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  262. mx = 0;
  263. my = 0;
  264. x = xx;
  265. y = yy;
  266. do {
  267. dminx = 0x7fffffff;
  268. dminy = 0x7fffffff;
  269. lastx = x;
  270. for (x = x1; x <= x2; x += range) {
  271. d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
  272. if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  273. dminx = d;
  274. mx = x;
  275. }
  276. }
  277. x = lastx;
  278. for (y = y1; y <= y2; y += range) {
  279. d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize);
  280. if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
  281. dminy = d;
  282. my = y;
  283. }
  284. }
  285. range = range >> 1;
  286. x = mx;
  287. y = my;
  288. x1 = mx - range;
  289. if (x1 < xmin)
  290. x1 = xmin;
  291. x2 = mx + range;
  292. if (x2 > xmax)
  293. x2 = xmax;
  294. y1 = my - range;
  295. if (y1 < ymin)
  296. y1 = ymin;
  297. y2 = my + range;
  298. if (y2 > ymax)
  299. y2 = ymax;
  300. } while (range >= 1);
  301. #ifdef DEBUG
  302. fprintf(stderr, "phods - MX: %d\tMY: %d\n", mx, my);
  303. #endif
  304. /* half pixel search */
  305. *mx_ptr = mx;
  306. *my_ptr = my;
  307. return dminy;
  308. }
  309. #define Z_THRESHOLD 256
  310. #define CHECK_MV(x,y)\
  311. {\
  312. d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
  313. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
  314. if(d<dmin){\
  315. best[0]=x;\
  316. best[1]=y;\
  317. dmin=d;\
  318. }\
  319. }
  320. #define CHECK_MV_DIR(x,y,new_dir)\
  321. {\
  322. d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
  323. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
  324. if(d<dmin){\
  325. best[0]=x;\
  326. best[1]=y;\
  327. dmin=d;\
  328. next_dir= new_dir;\
  329. }\
  330. }
  331. #define CHECK_MV4(x,y)\
  332. {\
  333. d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
  334. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
  335. if(d<dmin){\
  336. best[0]=x;\
  337. best[1]=y;\
  338. dmin=d;\
  339. }\
  340. }
  341. #define CHECK_MV4_DIR(x,y,new_dir)\
  342. {\
  343. d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
  344. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
  345. if(d<dmin){\
  346. best[0]=x;\
  347. best[1]=y;\
  348. dmin=d;\
  349. next_dir= new_dir;\
  350. }\
  351. }
  352. #define check(x,y,S,v)\
  353. if( (x)<(xmin<<(S)) ) printf("%d %d %d %d xmin" #v, (x), (y), s->mb_x, s->mb_y);\
  354. if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\
  355. if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\
  356. if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\
  357. static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
  358. UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
  359. int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
  360. int xmin, int ymin, int xmax, int ymax, int shift)
  361. {
  362. int next_dir=-1;
  363. for(;;){
  364. int d;
  365. const int dir= next_dir;
  366. const int x= best[0];
  367. const int y= best[1];
  368. next_dir=-1;
  369. //printf("%d", dir);
  370. if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
  371. if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
  372. if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
  373. if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
  374. if(next_dir==-1){
  375. return dmin;
  376. }
  377. }
  378. /* for(;;){
  379. int d;
  380. const int x= best[0];
  381. const int y= best[1];
  382. const int last_min=dmin;
  383. if(x>xmin) CHECK_MV(x-1, y )
  384. if(y>xmin) CHECK_MV(x , y-1)
  385. if(x<xmax) CHECK_MV(x+1, y )
  386. if(y<xmax) CHECK_MV(x , y+1)
  387. if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
  388. if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
  389. if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
  390. if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
  391. if(x-1>xmin) CHECK_MV(x-2, y )
  392. if(y-1>xmin) CHECK_MV(x , y-2)
  393. if(x+1<xmax) CHECK_MV(x+2, y )
  394. if(y+1<xmax) CHECK_MV(x , y+2)
  395. if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
  396. if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
  397. if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
  398. if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
  399. if(dmin==last_min) return dmin;
  400. }
  401. */
  402. }
  403. static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin,
  404. UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
  405. int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
  406. int xmin, int ymin, int xmax, int ymax, int shift)
  407. {
  408. int next_dir=-1;
  409. for(;;){
  410. int d;
  411. const int dir= next_dir;
  412. const int x= best[0];
  413. const int y= best[1];
  414. next_dir=-1;
  415. //printf("%d", dir);
  416. if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0)
  417. if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1)
  418. if(dir!=0 && x<xmax) CHECK_MV4_DIR(x+1, y , 2)
  419. if(dir!=1 && y<ymax) CHECK_MV4_DIR(x , y+1, 3)
  420. if(next_dir==-1){
  421. return dmin;
  422. }
  423. }
  424. }
  425. static inline int snake_search(MpegEncContext * s, int *best, int dmin,
  426. UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
  427. int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
  428. int xmin, int ymin, int xmax, int ymax, int shift)
  429. {
  430. int dir=0;
  431. int c=1;
  432. static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
  433. static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
  434. int fails=0;
  435. int last_d[2]={dmin, dmin};
  436. /*static int good=0;
  437. static int bad=0;
  438. static int point=0;
  439. point++;
  440. if(256*256*256*64%point==0)
  441. {
  442. printf("%d %d %d\n", good, bad, point);
  443. }*/
  444. for(;;){
  445. int x= best[0];
  446. int y= best[1];
  447. int d;
  448. x+=x_dir[dir];
  449. y+=y_dir[dir];
  450. if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
  451. d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
  452. d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
  453. }else{
  454. d = dmin + 10000; //FIXME smarter boundary handling
  455. }
  456. if(d<dmin){
  457. best[0]=x;
  458. best[1]=y;
  459. dmin=d;
  460. if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
  461. dir+=c;
  462. fails=0;
  463. //good++;
  464. last_d[1]=last_d[0];
  465. last_d[0]=d;
  466. }else{
  467. //bad++;
  468. if(fails){
  469. if(fails>=3) return dmin;
  470. }else{
  471. c= -c;
  472. }
  473. dir+=c*2;
  474. fails++;
  475. }
  476. dir&=7;
  477. }
  478. }
  479. static int epzs_motion_search(MpegEncContext * s,
  480. int *mx_ptr, int *my_ptr,
  481. int P[5][2], int pred_x, int pred_y,
  482. int xmin, int ymin, int xmax, int ymax)
  483. {
  484. int best[2]={0, 0};
  485. int d, dmin;
  486. UINT8 *new_pic, *old_pic;
  487. const int pic_stride= s->linesize;
  488. const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
  489. UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
  490. int quant= s->qscale; // qscale of the prev frame
  491. const int shift= 1+s->quarter_sample;
  492. new_pic = s->new_picture[0] + pic_xy;
  493. old_pic = s->last_picture[0] + pic_xy;
  494. dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
  495. if(dmin<Z_THRESHOLD){
  496. *mx_ptr= 0;
  497. *my_ptr= 0;
  498. //printf("Z");
  499. return dmin;
  500. }
  501. /* first line */
  502. if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
  503. CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
  504. }else{
  505. CHECK_MV(P[4][0]>>shift, P[4][1]>>shift)
  506. if(dmin<Z_THRESHOLD){
  507. *mx_ptr= P[4][0]>>shift;
  508. *my_ptr= P[4][1]>>shift;
  509. //printf("M\n");
  510. return dmin;
  511. }
  512. CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
  513. CHECK_MV(P[2][0]>>shift, P[2][1]>>shift)
  514. CHECK_MV(P[3][0]>>shift, P[3][1]>>shift)
  515. }
  516. CHECK_MV(P[0][0]>>shift, P[0][1]>>shift)
  517. //check(best[0],best[1],0, b0)
  518. if(s->full_search==ME_EPZS)
  519. dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
  520. pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
  521. else
  522. dmin= snake_search(s, best, dmin, new_pic, old_pic, pic_stride,
  523. pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
  524. //check(best[0],best[1],0, b1)
  525. *mx_ptr= best[0];
  526. *my_ptr= best[1];
  527. // printf("%d %d %d \n", best[0], best[1], dmin);
  528. return dmin;
  529. }
  530. static int epzs_motion_search4(MpegEncContext * s, int block,
  531. int *mx_ptr, int *my_ptr,
  532. int P[6][2], int pred_x, int pred_y,
  533. int xmin, int ymin, int xmax, int ymax)
  534. {
  535. int best[2]={0, 0};
  536. int d, dmin;
  537. UINT8 *new_pic, *old_pic;
  538. const int pic_stride= s->linesize;
  539. const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
  540. UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
  541. int quant= s->qscale; // qscale of the prev frame
  542. const int shift= 1+s->quarter_sample;
  543. new_pic = s->new_picture[0] + pic_xy;
  544. old_pic = s->last_picture[0] + pic_xy;
  545. dmin = pix_abs8x8(new_pic, old_pic, pic_stride);
  546. /* first line */
  547. if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
  548. CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
  549. }else{
  550. CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift)
  551. if(dmin<Z_THRESHOLD){
  552. *mx_ptr= P[4][0]>>shift;
  553. *my_ptr= P[4][1]>>shift;
  554. //printf("M\n");
  555. return dmin;
  556. }
  557. CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
  558. CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift)
  559. CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift)
  560. }
  561. CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift)
  562. CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift)
  563. //check(best[0],best[1],0, b0)
  564. dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride,
  565. pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
  566. //check(best[0],best[1],0, b1)
  567. *mx_ptr= best[0];
  568. *my_ptr= best[1];
  569. // printf("%d %d %d \n", best[0], best[1], dmin);
  570. return dmin;
  571. }
  572. #define CHECK_HALF_MV(suffix, x, y) \
  573. d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
  574. d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
  575. if(d<dminh){\
  576. dminh= d;\
  577. mx= mx1 + x;\
  578. my= my1 + y;\
  579. }
  580. #define CHECK_HALF_MV4(suffix, x, y) \
  581. d= pix_abs8x8_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
  582. d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
  583. if(d<dminh){\
  584. dminh= d;\
  585. mx= mx1 + x;\
  586. my= my1 + y;\
  587. }
  588. /* The idea would be to make half pel ME after Inter/Intra decision to
  589. save time. */
  590. static inline void halfpel_motion_search(MpegEncContext * s,
  591. int *mx_ptr, int *my_ptr, int dmin,
  592. int xmin, int ymin, int xmax, int ymax,
  593. int pred_x, int pred_y)
  594. {
  595. UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
  596. const int quant= s->qscale;
  597. int pen_x, pen_y;
  598. int mx, my, mx1, my1, d, xx, yy, dminh;
  599. UINT8 *pix, *ptr;
  600. mx = *mx_ptr;
  601. my = *my_ptr;
  602. ptr = s->last_picture[0] + (my * s->linesize) + mx;
  603. xx = 16 * s->mb_x;
  604. yy = 16 * s->mb_y;
  605. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  606. dminh = dmin;
  607. if (mx > xmin && mx < xmax &&
  608. my > ymin && my < ymax) {
  609. mx= mx1= 2*(mx - xx);
  610. my= my1= 2*(my - yy);
  611. if(dmin < Z_THRESHOLD && mx==0 && my==0){
  612. *mx_ptr = 0;
  613. *my_ptr = 0;
  614. return;
  615. }
  616. pen_x= pred_x + mx;
  617. pen_y= pred_y + my;
  618. ptr-= s->linesize;
  619. CHECK_HALF_MV(xy2, -1, -1)
  620. CHECK_HALF_MV(y2 , 0, -1)
  621. CHECK_HALF_MV(xy2, +1, -1)
  622. ptr+= s->linesize;
  623. CHECK_HALF_MV(x2 , -1, 0)
  624. CHECK_HALF_MV(x2 , +1, 0)
  625. CHECK_HALF_MV(xy2, -1, +1)
  626. CHECK_HALF_MV(y2 , 0, +1)
  627. CHECK_HALF_MV(xy2, +1, +1)
  628. }else{
  629. mx= 2*(mx - xx);
  630. my= 2*(my - yy);
  631. }
  632. *mx_ptr = mx;
  633. *my_ptr = my;
  634. }
  635. static inline void halfpel_motion_search4(MpegEncContext * s,
  636. int *mx_ptr, int *my_ptr, int dmin,
  637. int xmin, int ymin, int xmax, int ymax,
  638. int pred_x, int pred_y, int block_x, int block_y)
  639. {
  640. UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
  641. const int quant= s->qscale;
  642. int pen_x, pen_y;
  643. int mx, my, mx1, my1, d, xx, yy, dminh;
  644. UINT8 *pix, *ptr;
  645. xx = 8 * block_x;
  646. yy = 8 * block_y;
  647. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  648. mx = *mx_ptr;
  649. my = *my_ptr;
  650. ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx;
  651. dminh = dmin;
  652. if (mx > xmin && mx < xmax &&
  653. my > ymin && my < ymax) {
  654. mx= mx1= 2*mx;
  655. my= my1= 2*my;
  656. if(dmin < Z_THRESHOLD && mx==0 && my==0){
  657. *mx_ptr = 0;
  658. *my_ptr = 0;
  659. return;
  660. }
  661. pen_x= pred_x + mx;
  662. pen_y= pred_y + my;
  663. ptr-= s->linesize;
  664. CHECK_HALF_MV4(xy2, -1, -1)
  665. CHECK_HALF_MV4(y2 , 0, -1)
  666. CHECK_HALF_MV4(xy2, +1, -1)
  667. ptr+= s->linesize;
  668. CHECK_HALF_MV4(x2 , -1, 0)
  669. CHECK_HALF_MV4(x2 , +1, 0)
  670. CHECK_HALF_MV4(xy2, -1, +1)
  671. CHECK_HALF_MV4(y2 , 0, +1)
  672. CHECK_HALF_MV4(xy2, +1, +1)
  673. }else{
  674. mx*=2;
  675. my*=2;
  676. }
  677. *mx_ptr = mx;
  678. *my_ptr = my;
  679. }
  680. static inline void set_mv_tables(MpegEncContext * s, int mx, int my)
  681. {
  682. const int xy= s->mb_x + s->mb_y*s->mb_width;
  683. s->mv_table[0][xy] = mx;
  684. s->mv_table[1][xy] = my;
  685. /* has allready been set to the 4 MV if 4MV is done */
  686. if(!(s->flags&CODEC_FLAG_4MV)){
  687. int mot_xy= s->block_index[0];
  688. s->motion_val[mot_xy ][0]= mx;
  689. s->motion_val[mot_xy ][1]= my;
  690. s->motion_val[mot_xy+1][0]= mx;
  691. s->motion_val[mot_xy+1][1]= my;
  692. mot_xy += s->block_wrap[0];
  693. s->motion_val[mot_xy ][0]= mx;
  694. s->motion_val[mot_xy ][1]= my;
  695. s->motion_val[mot_xy+1][0]= mx;
  696. s->motion_val[mot_xy+1][1]= my;
  697. }
  698. }
  699. #ifndef CONFIG_TEST_MV_ENCODE
  700. void estimate_motion(MpegEncContext * s,
  701. int mb_x, int mb_y)
  702. {
  703. UINT8 *pix, *ppix;
  704. int sum, varc, vard, mx, my, range, dmin, xx, yy;
  705. int xmin, ymin, xmax, ymax;
  706. int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
  707. int pred_x=0, pred_y=0;
  708. int P[6][2];
  709. const int shift= 1+s->quarter_sample;
  710. int mb_type=0;
  711. range = 8 * (1 << (s->f_code - 1));
  712. /* XXX: temporary kludge to avoid overflow for msmpeg4 */
  713. if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
  714. range = range * 2;
  715. if (s->unrestricted_mv) {
  716. xmin = -16;
  717. ymin = -16;
  718. if (s->h263_plus)
  719. range *= 2;
  720. if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
  721. xmax = s->mb_width*16;
  722. ymax = s->mb_height*16;
  723. }else {
  724. /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise
  725. (cuz the drawn edge isnt large enough))*/
  726. xmax = s->width;
  727. ymax = s->height;
  728. }
  729. } else {
  730. xmin = 0;
  731. ymin = 0;
  732. xmax = s->mb_width*16 - 16;
  733. ymax = s->mb_height*16 - 16;
  734. }
  735. switch(s->full_search) {
  736. case ME_ZERO:
  737. default:
  738. no_motion_search(s, &mx, &my);
  739. dmin = 0;
  740. break;
  741. case ME_FULL:
  742. dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax);
  743. break;
  744. case ME_LOG:
  745. dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
  746. break;
  747. case ME_PHODS:
  748. dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
  749. break;
  750. case ME_X1:
  751. case ME_EPZS:
  752. {
  753. const int mot_stride = s->block_wrap[0];
  754. const int mot_xy = s->block_index[0];
  755. rel_xmin= xmin - mb_x*16;
  756. rel_xmax= xmax - mb_x*16;
  757. rel_ymin= ymin - mb_y*16;
  758. rel_ymax= ymax - mb_y*16;
  759. P[0][0] = s->motion_val[mot_xy ][0];
  760. P[0][1] = s->motion_val[mot_xy ][1];
  761. P[1][0] = s->motion_val[mot_xy - 1][0];
  762. P[1][1] = s->motion_val[mot_xy - 1][1];
  763. if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
  764. /* special case for first line */
  765. if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
  766. P[4][0] = P[1][0];
  767. P[4][1] = P[1][1];
  768. } else {
  769. P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
  770. P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
  771. P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0];
  772. P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1];
  773. if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
  774. if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
  775. if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
  776. P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
  777. P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
  778. }
  779. if(s->out_format == FMT_H263){
  780. pred_x = P[4][0];
  781. pred_y = P[4][1];
  782. }else { /* mpeg1 at least */
  783. pred_x= P[1][0];
  784. pred_y= P[1][1];
  785. }
  786. }
  787. dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax);
  788. mx+= mb_x*16;
  789. my+= mb_y*16;
  790. break;
  791. }
  792. if(s->flags&CODEC_FLAG_4MV){
  793. int block;
  794. mb_type|= MB_TYPE_INTER4V;
  795. for(block=0; block<4; block++){
  796. int mx4, my4;
  797. int pred_x4, pred_y4;
  798. int dmin4;
  799. static const int off[4]= {2, 1, 1, -1};
  800. const int mot_stride = s->block_wrap[0];
  801. const int mot_xy = s->block_index[block];
  802. const int block_x= mb_x*2 + (block&1);
  803. const int block_y= mb_y*2 + (block>>1);
  804. const int rel_xmin4= xmin - block_x*8;
  805. const int rel_xmax4= xmax - block_x*8 + 8;
  806. const int rel_ymin4= ymin - block_y*8;
  807. const int rel_ymax4= ymax - block_y*8 + 8;
  808. P[0][0] = s->motion_val[mot_xy ][0];
  809. P[0][1] = s->motion_val[mot_xy ][1];
  810. P[1][0] = s->motion_val[mot_xy - 1][0];
  811. P[1][1] = s->motion_val[mot_xy - 1][1];
  812. if(P[1][0] > (rel_xmax4<<shift)) P[1][0]= (rel_xmax4<<shift);
  813. /* special case for first line */
  814. if ((mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
  815. P[4][0] = P[1][0];
  816. P[4][1] = P[1][1];
  817. } else {
  818. P[2][0] = s->motion_val[mot_xy - mot_stride ][0];
  819. P[2][1] = s->motion_val[mot_xy - mot_stride ][1];
  820. P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
  821. P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
  822. if(P[2][1] > (rel_ymax4<<shift)) P[2][1]= (rel_ymax4<<shift);
  823. if(P[3][0] < (rel_xmin4<<shift)) P[3][0]= (rel_xmin4<<shift);
  824. if(P[3][0] > (rel_xmax4<<shift)) P[3][0]= (rel_xmax4<<shift);
  825. if(P[3][1] > (rel_ymax4<<shift)) P[3][1]= (rel_ymax4<<shift);
  826. P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
  827. P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
  828. }
  829. if(s->out_format == FMT_H263){
  830. pred_x4 = P[4][0];
  831. pred_y4 = P[4][1];
  832. }else { /* mpeg1 at least */
  833. pred_x4= P[1][0];
  834. pred_y4= P[1][1];
  835. }
  836. P[5][0]= mx - mb_x*16;
  837. P[5][1]= my - mb_y*16;
  838. dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4);
  839. halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
  840. pred_x4, pred_y4, block_x, block_y);
  841. s->motion_val[ s->block_index[block] ][0]= mx4;
  842. s->motion_val[ s->block_index[block] ][1]= my4;
  843. }
  844. }
  845. /* intra / predictive decision */
  846. xx = mb_x * 16;
  847. yy = mb_y * 16;
  848. pix = s->new_picture[0] + (yy * s->linesize) + xx;
  849. /* At this point (mx,my) are full-pell and the absolute displacement */
  850. ppix = s->last_picture[0] + (my * s->linesize) + mx;
  851. sum = pix_sum(pix, s->linesize);
  852. #if 0
  853. varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS;
  854. vard = pix_abs16x16(pix, ppix, s->linesize);
  855. #else
  856. sum= (sum+8)>>4;
  857. varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8);
  858. vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
  859. #endif
  860. s->mb_var[s->mb_width * mb_y + mb_x] = varc;
  861. s->avg_mb_var+= varc;
  862. s->mc_mb_var += vard;
  863. #if 0
  864. printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
  865. varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
  866. #endif
  867. if(s->flags&CODEC_FLAG_HQ){
  868. if (vard*2 + 200 > varc)
  869. mb_type|= MB_TYPE_INTRA;
  870. if (varc*2 + 200 > vard){
  871. mb_type|= MB_TYPE_INTER;
  872. halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
  873. }
  874. }else{
  875. if (vard <= 64 || vard < varc) {
  876. mb_type|= MB_TYPE_INTER;
  877. if (s->full_search != ME_ZERO) {
  878. halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
  879. } else {
  880. mx -= 16 * mb_x;
  881. my -= 16 * mb_y;
  882. }
  883. }else{
  884. mb_type|= MB_TYPE_INTRA;
  885. mx = 0;//mx*2 - 32 * mb_x;
  886. my = 0;//my*2 - 32 * mb_y;
  887. }
  888. }
  889. s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
  890. set_mv_tables(s, mx, my);
  891. }
  892. #else
  893. /* test version which generates valid random vectors */
  894. int estimate_motion(MpegEncContext * s,
  895. int mb_x, int mb_y,
  896. int *mx_ptr, int *my_ptr)
  897. {
  898. int xx, yy, x1, y1, x2, y2, range;
  899. if ((random() % 10) >= 5) {
  900. range = 8 * (1 << (s->f_code - 1));
  901. if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
  902. range = range * 2;
  903. xx = 16 * s->mb_x;
  904. yy = 16 * s->mb_y;
  905. x1 = xx - range;
  906. if (x1 < 0)
  907. x1 = 0;
  908. x2 = xx + range - 1;
  909. if (x2 > (s->width - 16))
  910. x2 = s->width - 16;
  911. y1 = yy - range;
  912. if (y1 < 0)
  913. y1 = 0;
  914. y2 = yy + range - 1;
  915. if (y2 > (s->height - 16))
  916. y2 = s->height - 16;
  917. *mx_ptr = (random() % (2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
  918. *my_ptr = (random() % (2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
  919. return 0;
  920. } else {
  921. *mx_ptr = 0;
  922. *my_ptr = 0;
  923. return 1;
  924. }
  925. }
  926. #endif