You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1132 lines
33KB

  1. /*
  2. * RoQ Video Encoder.
  3. *
  4. * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
  5. * Copyright (C) 2004-2007 Eric Lasota
  6. * Based on RoQ specs (C) 2001 Tim Ferguson
  7. *
  8. * This file is part of Libav.
  9. *
  10. * Libav is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * Libav is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with Libav; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file
  26. * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
  27. * Switchblade3 Libav glue by Eric Lasota.
  28. */
  29. /*
  30. * COSTS:
  31. * Level 1:
  32. * SKIP - 2 bits
  33. * MOTION - 2 + 8 bits
  34. * CODEBOOK - 2 + 8 bits
  35. * SUBDIVIDE - 2 + combined subcel cost
  36. *
  37. * Level 2:
  38. * SKIP - 2 bits
  39. * MOTION - 2 + 8 bits
  40. * CODEBOOK - 2 + 8 bits
  41. * SUBDIVIDE - 2 + 4*8 bits
  42. *
  43. * Maximum cost: 138 bits per cel
  44. *
  45. * Proper evaluation requires LCD fraction comparison, which requires
  46. * Squared Error (SE) loss * savings increase
  47. *
  48. * Maximum savings increase: 136 bits
  49. * Maximum SE loss without overflow: 31580641
  50. * Components in 8x8 supercel: 192
  51. * Maximum SE precision per component: 164482
  52. * >65025, so no truncation is needed (phew)
  53. */
  54. #include <string.h>
  55. #include "libavutil/attributes.h"
  56. #include "roqvideo.h"
  57. #include "bytestream.h"
  58. #include "elbg.h"
  59. #include "internal.h"
  60. #include "mathops.h"
  61. #define CHROMA_BIAS 1
  62. /**
  63. * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
  64. * Quake 3 bug.
  65. */
  66. #define MAX_CBS_4x4 255
  67. #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
  68. /* The cast is useful when multiplying it by INT_MAX */
  69. #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
  70. /* Macroblock support functions */
  71. static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3])
  72. {
  73. memcpy(u , cell->y, 4);
  74. memset(u+4, cell->u, 4);
  75. memset(u+8, cell->v, 4);
  76. }
  77. static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3])
  78. {
  79. int i,cp;
  80. static const int offsets[4] = {0, 2, 8, 10};
  81. for (cp=0; cp<3; cp++)
  82. for (i=0; i<4; i++) {
  83. u[4*4*cp + offsets[i] ] = cb2[qcell->idx[i]*2*2*3 + 4*cp ];
  84. u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1];
  85. u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2];
  86. u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3];
  87. }
  88. }
  89. static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64])
  90. {
  91. int x,y,cp;
  92. for(cp=0; cp<3; cp++)
  93. for(y=0; y<8; y++)
  94. for(x=0; x<8; x++)
  95. *u++ = base[(y/2)*4 + (x/2) + 16*cp];
  96. }
  97. static inline int square(int x)
  98. {
  99. return x*x;
  100. }
  101. static inline int eval_sse(const uint8_t *a, const uint8_t *b, int count)
  102. {
  103. int diff=0;
  104. while(count--)
  105. diff += square(*b++ - *a++);
  106. return diff;
  107. }
  108. // FIXME Could use DSPContext.sse, but it is not so speed critical (used
  109. // just for motion estimation).
  110. static int block_sse(uint8_t * const *buf1, uint8_t * const *buf2, int x1, int y1,
  111. int x2, int y2, const int *stride1, const int *stride2, int size)
  112. {
  113. int i, k;
  114. int sse=0;
  115. for (k=0; k<3; k++) {
  116. int bias = (k ? CHROMA_BIAS : 4);
  117. for (i=0; i<size; i++)
  118. sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1,
  119. buf2[k] + (y2+i)*stride2[k] + x2, size);
  120. }
  121. return sse;
  122. }
  123. static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect,
  124. int size)
  125. {
  126. int mx=vect.d[0];
  127. int my=vect.d[1];
  128. if (mx < -7 || mx > 7)
  129. return INT_MAX;
  130. if (my < -7 || my > 7)
  131. return INT_MAX;
  132. mx += x;
  133. my += y;
  134. if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size)
  135. return INT_MAX;
  136. return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y,
  137. mx, my,
  138. enc->frame_to_enc->linesize, enc->last_frame->linesize,
  139. size);
  140. }
  141. /**
  142. * @return distortion between two macroblocks
  143. */
  144. static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size)
  145. {
  146. int cp, sdiff=0;
  147. for(cp=0;cp<3;cp++) {
  148. int bias = (cp ? CHROMA_BIAS : 4);
  149. sdiff += bias*eval_sse(a, b, size*size);
  150. a += size*size;
  151. b += size*size;
  152. }
  153. return sdiff;
  154. }
  155. typedef struct
  156. {
  157. int eval_dist[4];
  158. int best_bit_use;
  159. int best_coding;
  160. int subCels[4];
  161. motion_vect motion;
  162. int cbEntry;
  163. } SubcelEvaluation;
  164. typedef struct
  165. {
  166. int eval_dist[4];
  167. int best_coding;
  168. SubcelEvaluation subCels[4];
  169. motion_vect motion;
  170. int cbEntry;
  171. int sourceX, sourceY;
  172. } CelEvaluation;
  173. typedef struct
  174. {
  175. int numCB4;
  176. int numCB2;
  177. int usedCB2[MAX_CBS_2x2];
  178. int usedCB4[MAX_CBS_4x4];
  179. uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
  180. uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
  181. uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
  182. } RoqCodebooks;
  183. /**
  184. * Temporary vars
  185. */
  186. typedef struct RoqTempData
  187. {
  188. CelEvaluation *cel_evals;
  189. int f2i4[MAX_CBS_4x4];
  190. int i2f4[MAX_CBS_4x4];
  191. int f2i2[MAX_CBS_2x2];
  192. int i2f2[MAX_CBS_2x2];
  193. int mainChunkSize;
  194. int numCB4;
  195. int numCB2;
  196. RoqCodebooks codebooks;
  197. int *closest_cb2;
  198. int used_option[4];
  199. } RoqTempdata;
  200. /**
  201. * Initialize cel evaluators and set their source coordinates
  202. */
  203. static int create_cel_evals(RoqContext *enc, RoqTempdata *tempData)
  204. {
  205. int n=0, x, y, i;
  206. tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation));
  207. if (!tempData->cel_evals)
  208. return AVERROR(ENOMEM);
  209. /* Map to the ROQ quadtree order */
  210. for (y=0; y<enc->height; y+=16)
  211. for (x=0; x<enc->width; x+=16)
  212. for(i=0; i<4; i++) {
  213. tempData->cel_evals[n ].sourceX = x + (i&1)*8;
  214. tempData->cel_evals[n++].sourceY = y + (i&2)*4;
  215. }
  216. return 0;
  217. }
  218. /**
  219. * Get macroblocks from parts of the image
  220. */
  221. static void get_frame_mb(const AVFrame *frame, int x, int y, uint8_t mb[], int dim)
  222. {
  223. int i, j, cp;
  224. for (cp=0; cp<3; cp++) {
  225. int stride = frame->linesize[cp];
  226. for (i=0; i<dim; i++)
  227. for (j=0; j<dim; j++)
  228. *mb++ = frame->data[cp][(y+i)*stride + x + j];
  229. }
  230. }
  231. /**
  232. * Find the codebook with the lowest distortion from an image
  233. */
  234. static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB,
  235. int *outIndex, int dim)
  236. {
  237. int i, lDiff = INT_MAX, pick=0;
  238. /* Diff against the others */
  239. for (i=0; i<numCB; i++) {
  240. int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim);
  241. if (diff < lDiff) {
  242. lDiff = diff;
  243. pick = i;
  244. }
  245. }
  246. *outIndex = pick;
  247. return lDiff;
  248. }
  249. #define EVAL_MOTION(MOTION) \
  250. do { \
  251. diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
  252. \
  253. if (diff < lowestdiff) { \
  254. lowestdiff = diff; \
  255. bestpick = MOTION; \
  256. } \
  257. } while(0)
  258. static void motion_search(RoqContext *enc, int blocksize)
  259. {
  260. static const motion_vect offsets[8] = {
  261. {{ 0,-1}},
  262. {{ 0, 1}},
  263. {{-1, 0}},
  264. {{ 1, 0}},
  265. {{-1, 1}},
  266. {{ 1,-1}},
  267. {{-1,-1}},
  268. {{ 1, 1}},
  269. };
  270. int diff, lowestdiff, oldbest;
  271. int off[3];
  272. motion_vect bestpick = {{0,0}};
  273. int i, j, k, offset;
  274. motion_vect *last_motion;
  275. motion_vect *this_motion;
  276. motion_vect vect, vect2;
  277. int max=(enc->width/blocksize)*enc->height/blocksize;
  278. if (blocksize == 4) {
  279. last_motion = enc->last_motion4;
  280. this_motion = enc->this_motion4;
  281. } else {
  282. last_motion = enc->last_motion8;
  283. this_motion = enc->this_motion8;
  284. }
  285. for (i=0; i<enc->height; i+=blocksize)
  286. for (j=0; j<enc->width; j+=blocksize) {
  287. lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}},
  288. blocksize);
  289. bestpick.d[0] = 0;
  290. bestpick.d[1] = 0;
  291. if (blocksize == 4)
  292. EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]);
  293. offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
  294. if (offset < max && offset >= 0)
  295. EVAL_MOTION(last_motion[offset]);
  296. offset++;
  297. if (offset < max && offset >= 0)
  298. EVAL_MOTION(last_motion[offset]);
  299. offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize;
  300. if (offset < max && offset >= 0)
  301. EVAL_MOTION(last_motion[offset]);
  302. off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1;
  303. off[1]= off[0] - enc->width/blocksize + 1;
  304. off[2]= off[1] + 1;
  305. if (i) {
  306. for(k=0; k<2; k++)
  307. vect.d[k]= mid_pred(this_motion[off[0]].d[k],
  308. this_motion[off[1]].d[k],
  309. this_motion[off[2]].d[k]);
  310. EVAL_MOTION(vect);
  311. for(k=0; k<3; k++)
  312. EVAL_MOTION(this_motion[off[k]]);
  313. } else if(j)
  314. EVAL_MOTION(this_motion[off[0]]);
  315. vect = bestpick;
  316. oldbest = -1;
  317. while (oldbest != lowestdiff) {
  318. oldbest = lowestdiff;
  319. for (k=0; k<8; k++) {
  320. vect2 = vect;
  321. vect2.d[0] += offsets[k].d[0];
  322. vect2.d[1] += offsets[k].d[1];
  323. EVAL_MOTION(vect2);
  324. }
  325. vect = bestpick;
  326. }
  327. offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
  328. this_motion[offset] = bestpick;
  329. }
  330. }
  331. /**
  332. * Get distortion for all options available to a subcel
  333. */
  334. static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
  335. int y, RoqContext *enc, RoqTempdata *tempData)
  336. {
  337. uint8_t mb4[4*4*3];
  338. uint8_t mb2[2*2*3];
  339. int cluster_index;
  340. int i, best_dist;
  341. static const int bitsUsed[4] = {2, 10, 10, 34};
  342. if (enc->framesSinceKeyframe >= 1) {
  343. subcel->motion = enc->this_motion4[y*enc->width/16 + x/4];
  344. subcel->eval_dist[RoQ_ID_FCC] =
  345. eval_motion_dist(enc, x, y,
  346. enc->this_motion4[y*enc->width/16 + x/4], 4);
  347. } else
  348. subcel->eval_dist[RoQ_ID_FCC] = INT_MAX;
  349. if (enc->framesSinceKeyframe >= 2)
  350. subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
  351. enc->current_frame->data, x,
  352. y, x, y,
  353. enc->frame_to_enc->linesize,
  354. enc->current_frame->linesize,
  355. 4);
  356. else
  357. subcel->eval_dist[RoQ_ID_MOT] = INT_MAX;
  358. cluster_index = y*enc->width/16 + x/4;
  359. get_frame_mb(enc->frame_to_enc, x, y, mb4, 4);
  360. subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4,
  361. tempData->codebooks.unpacked_cb4,
  362. tempData->codebooks.numCB4,
  363. &subcel->cbEntry, 4);
  364. subcel->eval_dist[RoQ_ID_CCC] = 0;
  365. for(i=0;i<4;i++) {
  366. subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i];
  367. get_frame_mb(enc->frame_to_enc, x+2*(i&1),
  368. y+(i&2), mb2, 2);
  369. subcel->eval_dist[RoQ_ID_CCC] +=
  370. squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2);
  371. }
  372. best_dist = INT_MAX;
  373. for (i=0; i<4; i++)
  374. if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] <
  375. best_dist) {
  376. subcel->best_coding = i;
  377. subcel->best_bit_use = bitsUsed[i];
  378. best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] +
  379. enc->lambda*bitsUsed[i];
  380. }
  381. }
  382. /**
  383. * Get distortion for all options available to a cel
  384. */
  385. static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc,
  386. RoqTempdata *tempData)
  387. {
  388. uint8_t mb8[8*8*3];
  389. int index = cel->sourceY*enc->width/64 + cel->sourceX/8;
  390. int i, j, best_dist, divide_bit_use;
  391. int bitsUsed[4] = {2, 10, 10, 0};
  392. if (enc->framesSinceKeyframe >= 1) {
  393. cel->motion = enc->this_motion8[index];
  394. cel->eval_dist[RoQ_ID_FCC] =
  395. eval_motion_dist(enc, cel->sourceX, cel->sourceY,
  396. enc->this_motion8[index], 8);
  397. } else
  398. cel->eval_dist[RoQ_ID_FCC] = INT_MAX;
  399. if (enc->framesSinceKeyframe >= 2)
  400. cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
  401. enc->current_frame->data,
  402. cel->sourceX, cel->sourceY,
  403. cel->sourceX, cel->sourceY,
  404. enc->frame_to_enc->linesize,
  405. enc->current_frame->linesize,8);
  406. else
  407. cel->eval_dist[RoQ_ID_MOT] = INT_MAX;
  408. get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8);
  409. cel->eval_dist[RoQ_ID_SLD] =
  410. index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged,
  411. tempData->codebooks.numCB4, &cel->cbEntry, 8);
  412. gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData);
  413. gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData);
  414. gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData);
  415. gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData);
  416. cel->eval_dist[RoQ_ID_CCC] = 0;
  417. divide_bit_use = 0;
  418. for (i=0; i<4; i++) {
  419. cel->eval_dist[RoQ_ID_CCC] +=
  420. cel->subCels[i].eval_dist[cel->subCels[i].best_coding];
  421. divide_bit_use += cel->subCels[i].best_bit_use;
  422. }
  423. best_dist = INT_MAX;
  424. bitsUsed[3] = 2 + divide_bit_use;
  425. for (i=0; i<4; i++)
  426. if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] <
  427. best_dist) {
  428. cel->best_coding = i;
  429. best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] +
  430. enc->lambda*bitsUsed[i];
  431. }
  432. tempData->used_option[cel->best_coding]++;
  433. tempData->mainChunkSize += bitsUsed[cel->best_coding];
  434. if (cel->best_coding == RoQ_ID_SLD)
  435. tempData->codebooks.usedCB4[cel->cbEntry]++;
  436. if (cel->best_coding == RoQ_ID_CCC)
  437. for (i=0; i<4; i++) {
  438. if (cel->subCels[i].best_coding == RoQ_ID_SLD)
  439. tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++;
  440. else if (cel->subCels[i].best_coding == RoQ_ID_CCC)
  441. for (j=0; j<4; j++)
  442. tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++;
  443. }
  444. }
  445. static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData)
  446. {
  447. int i, j, idx=0;
  448. /* Make remaps for the final codebook usage */
  449. for (i=0; i<MAX_CBS_4x4; i++) {
  450. if (tempData->codebooks.usedCB4[i]) {
  451. tempData->i2f4[i] = idx;
  452. tempData->f2i4[idx] = i;
  453. for (j=0; j<4; j++)
  454. tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++;
  455. idx++;
  456. }
  457. }
  458. tempData->numCB4 = idx;
  459. idx = 0;
  460. for (i=0; i<MAX_CBS_2x2; i++) {
  461. if (tempData->codebooks.usedCB2[i]) {
  462. tempData->i2f2[i] = idx;
  463. tempData->f2i2[idx] = i;
  464. idx++;
  465. }
  466. }
  467. tempData->numCB2 = idx;
  468. }
  469. /**
  470. * Write codebook chunk
  471. */
  472. static void write_codebooks(RoqContext *enc, RoqTempdata *tempData)
  473. {
  474. int i, j;
  475. uint8_t **outp= &enc->out_buf;
  476. if (tempData->numCB2) {
  477. bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK);
  478. bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4);
  479. bytestream_put_byte(outp, tempData->numCB4);
  480. bytestream_put_byte(outp, tempData->numCB2);
  481. for (i=0; i<tempData->numCB2; i++) {
  482. bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4);
  483. bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u);
  484. bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v);
  485. }
  486. for (i=0; i<tempData->numCB4; i++)
  487. for (j=0; j<4; j++)
  488. bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]);
  489. }
  490. }
  491. static inline uint8_t motion_arg(motion_vect mot)
  492. {
  493. uint8_t ax = 8 - ((uint8_t) mot.d[0]);
  494. uint8_t ay = 8 - ((uint8_t) mot.d[1]);
  495. return ((ax&15)<<4) | (ay&15);
  496. }
  497. typedef struct
  498. {
  499. int typeSpool;
  500. int typeSpoolLength;
  501. uint8_t argumentSpool[64];
  502. uint8_t *args;
  503. uint8_t **pout;
  504. } CodingSpool;
  505. /* NOTE: Typecodes must be spooled AFTER arguments!! */
  506. static void write_typecode(CodingSpool *s, uint8_t type)
  507. {
  508. s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength);
  509. s->typeSpoolLength += 2;
  510. if (s->typeSpoolLength == 16) {
  511. bytestream_put_le16(s->pout, s->typeSpool);
  512. bytestream_put_buffer(s->pout, s->argumentSpool,
  513. s->args - s->argumentSpool);
  514. s->typeSpoolLength = 0;
  515. s->typeSpool = 0;
  516. s->args = s->argumentSpool;
  517. }
  518. }
  519. static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks)
  520. {
  521. int i, j, k;
  522. int x, y;
  523. int subX, subY;
  524. int dist=0;
  525. roq_qcell *qcell;
  526. CelEvaluation *eval;
  527. CodingSpool spool;
  528. spool.typeSpool=0;
  529. spool.typeSpoolLength=0;
  530. spool.args = spool.argumentSpool;
  531. spool.pout = &enc->out_buf;
  532. if (tempData->used_option[RoQ_ID_CCC]%2)
  533. tempData->mainChunkSize+=8; //FIXME
  534. /* Write the video chunk header */
  535. bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ);
  536. bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8);
  537. bytestream_put_byte(&enc->out_buf, 0x0);
  538. bytestream_put_byte(&enc->out_buf, 0x0);
  539. for (i=0; i<numBlocks; i++) {
  540. eval = tempData->cel_evals + i;
  541. x = eval->sourceX;
  542. y = eval->sourceY;
  543. dist += eval->eval_dist[eval->best_coding];
  544. switch (eval->best_coding) {
  545. case RoQ_ID_MOT:
  546. write_typecode(&spool, RoQ_ID_MOT);
  547. break;
  548. case RoQ_ID_FCC:
  549. bytestream_put_byte(&spool.args, motion_arg(eval->motion));
  550. write_typecode(&spool, RoQ_ID_FCC);
  551. ff_apply_motion_8x8(enc, x, y,
  552. eval->motion.d[0], eval->motion.d[1]);
  553. break;
  554. case RoQ_ID_SLD:
  555. bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]);
  556. write_typecode(&spool, RoQ_ID_SLD);
  557. qcell = enc->cb4x4 + eval->cbEntry;
  558. ff_apply_vector_4x4(enc, x , y , enc->cb2x2 + qcell->idx[0]);
  559. ff_apply_vector_4x4(enc, x+4, y , enc->cb2x2 + qcell->idx[1]);
  560. ff_apply_vector_4x4(enc, x , y+4, enc->cb2x2 + qcell->idx[2]);
  561. ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]);
  562. break;
  563. case RoQ_ID_CCC:
  564. write_typecode(&spool, RoQ_ID_CCC);
  565. for (j=0; j<4; j++) {
  566. subX = x + 4*(j&1);
  567. subY = y + 2*(j&2);
  568. switch(eval->subCels[j].best_coding) {
  569. case RoQ_ID_MOT:
  570. break;
  571. case RoQ_ID_FCC:
  572. bytestream_put_byte(&spool.args,
  573. motion_arg(eval->subCels[j].motion));
  574. ff_apply_motion_4x4(enc, subX, subY,
  575. eval->subCels[j].motion.d[0],
  576. eval->subCels[j].motion.d[1]);
  577. break;
  578. case RoQ_ID_SLD:
  579. bytestream_put_byte(&spool.args,
  580. tempData->i2f4[eval->subCels[j].cbEntry]);
  581. qcell = enc->cb4x4 + eval->subCels[j].cbEntry;
  582. ff_apply_vector_2x2(enc, subX , subY ,
  583. enc->cb2x2 + qcell->idx[0]);
  584. ff_apply_vector_2x2(enc, subX+2, subY ,
  585. enc->cb2x2 + qcell->idx[1]);
  586. ff_apply_vector_2x2(enc, subX , subY+2,
  587. enc->cb2x2 + qcell->idx[2]);
  588. ff_apply_vector_2x2(enc, subX+2, subY+2,
  589. enc->cb2x2 + qcell->idx[3]);
  590. break;
  591. case RoQ_ID_CCC:
  592. for (k=0; k<4; k++) {
  593. int cb_idx = eval->subCels[j].subCels[k];
  594. bytestream_put_byte(&spool.args,
  595. tempData->i2f2[cb_idx]);
  596. ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2),
  597. enc->cb2x2 + cb_idx);
  598. }
  599. break;
  600. }
  601. write_typecode(&spool, eval->subCels[j].best_coding);
  602. }
  603. break;
  604. }
  605. }
  606. /* Flush the remainder of the argument/type spool */
  607. while (spool.typeSpoolLength)
  608. write_typecode(&spool, 0x0);
  609. #if 0
  610. uint8_t *fdata[3] = {enc->frame_to_enc->data[0],
  611. enc->frame_to_enc->data[1],
  612. enc->frame_to_enc->data[2]};
  613. uint8_t *cdata[3] = {enc->current_frame->data[0],
  614. enc->current_frame->data[1],
  615. enc->current_frame->data[2]};
  616. av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n",
  617. dist,
  618. block_sse(fdata, cdata, 0, 0, 0, 0,
  619. enc->frame_to_enc->linesize,
  620. enc->current_frame->linesize,
  621. enc->width)); //WARNING: Square dimensions implied...
  622. #endif
  623. }
  624. /**
  625. * Create a single YUV cell from a 2x2 section of the image
  626. */
  627. static inline void frame_block_to_cell(uint8_t *block, uint8_t * const *data,
  628. int top, int left, const int *stride)
  629. {
  630. int i, j, u=0, v=0;
  631. for (i=0; i<2; i++)
  632. for (j=0; j<2; j++) {
  633. int x = (top+i)*stride[0] + left + j;
  634. *block++ = data[0][x];
  635. x = (top+i)*stride[1] + left + j;
  636. u += data[1][x];
  637. v += data[2][x];
  638. }
  639. *block++ = (u+2)/4;
  640. *block++ = (v+2)/4;
  641. }
  642. /**
  643. * Create YUV clusters for the entire image
  644. */
  645. static void create_clusters(const AVFrame *frame, int w, int h, uint8_t *yuvClusters)
  646. {
  647. int i, j, k, l;
  648. for (i=0; i<h; i+=4)
  649. for (j=0; j<w; j+=4) {
  650. for (k=0; k < 2; k++)
  651. for (l=0; l < 2; l++)
  652. frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data,
  653. i+2*k, j+2*l, frame->linesize);
  654. yuvClusters += 24;
  655. }
  656. }
  657. static int generate_codebook(RoqContext *enc, RoqTempdata *tempdata,
  658. int *points, int inputCount, roq_cell *results,
  659. int size, int cbsize)
  660. {
  661. int i, j, k, ret = 0;
  662. int c_size = size*size/4;
  663. int *buf;
  664. int *codebook = av_malloc(6*c_size*cbsize*sizeof(int));
  665. int *closest_cb;
  666. if (!codebook)
  667. return AVERROR(ENOMEM);
  668. if (size == 4) {
  669. closest_cb = av_malloc(6*c_size*inputCount*sizeof(int));
  670. if (!closest_cb) {
  671. ret = AVERROR(ENOMEM);
  672. goto out;
  673. }
  674. } else
  675. closest_cb = tempdata->closest_cb2;
  676. ret = ff_init_elbg(points, 6 * c_size, inputCount, codebook,
  677. cbsize, 1, closest_cb, &enc->randctx);
  678. if (ret < 0)
  679. goto out;
  680. ret = ff_do_elbg(points, 6 * c_size, inputCount, codebook,
  681. cbsize, 1, closest_cb, &enc->randctx);
  682. if (ret < 0)
  683. goto out;
  684. buf = codebook;
  685. for (i=0; i<cbsize; i++)
  686. for (k=0; k<c_size; k++) {
  687. for(j=0; j<4; j++)
  688. results->y[j] = *buf++;
  689. results->u = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
  690. results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
  691. results++;
  692. }
  693. out:
  694. if (size == 4)
  695. av_free(closest_cb);
  696. av_free(codebook);
  697. return ret;
  698. }
  699. static int generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
  700. {
  701. int i, j, ret = 0;
  702. RoqCodebooks *codebooks = &tempData->codebooks;
  703. int max = enc->width*enc->height/16;
  704. uint8_t mb2[3*4];
  705. roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
  706. uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4);
  707. int *points = av_malloc(max*6*4*sizeof(int));
  708. int bias;
  709. if (!results4 || !yuvClusters || !points) {
  710. ret = AVERROR(ENOMEM);
  711. goto out;
  712. }
  713. /* Subsample YUV data */
  714. create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters);
  715. /* Cast to integer and apply chroma bias */
  716. for (i=0; i<max*24; i++) {
  717. bias = ((i%6)<4) ? 1 : CHROMA_BIAS;
  718. points[i] = bias*yuvClusters[i];
  719. }
  720. /* Create 4x4 codebooks */
  721. if ((ret = generate_codebook(enc, tempData, points, max,
  722. results4, 4, MAX_CBS_4x4)) < 0)
  723. goto out;
  724. codebooks->numCB4 = MAX_CBS_4x4;
  725. tempData->closest_cb2 = av_malloc(max*4*sizeof(int));
  726. if (!tempData->closest_cb2) {
  727. ret = AVERROR(ENOMEM);
  728. goto out;
  729. }
  730. /* Create 2x2 codebooks */
  731. if ((ret = generate_codebook(enc, tempData, points, max * 4,
  732. enc->cb2x2, 2, MAX_CBS_2x2)) < 0)
  733. goto out;
  734. codebooks->numCB2 = MAX_CBS_2x2;
  735. /* Unpack 2x2 codebook clusters */
  736. for (i=0; i<codebooks->numCB2; i++)
  737. unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3);
  738. /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
  739. for (i=0; i<codebooks->numCB4; i++) {
  740. for (j=0; j<4; j++) {
  741. unpack_roq_cell(&results4[4*i + j], mb2);
  742. index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2,
  743. &enc->cb4x4[i].idx[j], 2);
  744. }
  745. unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i,
  746. codebooks->unpacked_cb4 + i*4*4*3);
  747. enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3,
  748. codebooks->unpacked_cb4_enlarged + i*8*8*3);
  749. }
  750. out:
  751. av_free(yuvClusters);
  752. av_free(points);
  753. av_free(results4);
  754. return ret;
  755. }
  756. static int roq_encode_video(RoqContext *enc)
  757. {
  758. RoqTempdata *tempData = enc->tmpData;
  759. int i, ret;
  760. memset(tempData, 0, sizeof(*tempData));
  761. ret = create_cel_evals(enc, tempData);
  762. if (ret < 0)
  763. return ret;
  764. ret = generate_new_codebooks(enc, tempData);
  765. if (ret < 0)
  766. return ret;
  767. if (enc->framesSinceKeyframe >= 1) {
  768. motion_search(enc, 8);
  769. motion_search(enc, 4);
  770. }
  771. retry_encode:
  772. for (i=0; i<enc->width*enc->height/64; i++)
  773. gather_data_for_cel(tempData->cel_evals + i, enc, tempData);
  774. /* Quake 3 can't handle chunks bigger than 65535 bytes */
  775. if (tempData->mainChunkSize/8 > 65535) {
  776. av_log(enc->avctx, AV_LOG_ERROR,
  777. "Warning, generated a frame too big (%d > 65535), "
  778. "try using a smaller qscale value.\n",
  779. tempData->mainChunkSize/8);
  780. enc->lambda *= 1.5;
  781. tempData->mainChunkSize = 0;
  782. memset(tempData->used_option, 0, sizeof(tempData->used_option));
  783. memset(tempData->codebooks.usedCB4, 0,
  784. sizeof(tempData->codebooks.usedCB4));
  785. memset(tempData->codebooks.usedCB2, 0,
  786. sizeof(tempData->codebooks.usedCB2));
  787. goto retry_encode;
  788. }
  789. remap_codebooks(enc, tempData);
  790. write_codebooks(enc, tempData);
  791. reconstruct_and_encode_image(enc, tempData, enc->width, enc->height,
  792. enc->width*enc->height/64);
  793. enc->avctx->coded_frame = enc->current_frame;
  794. /* Rotate frame history */
  795. FFSWAP(AVFrame *, enc->current_frame, enc->last_frame);
  796. FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4);
  797. FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8);
  798. av_free(tempData->cel_evals);
  799. av_free(tempData->closest_cb2);
  800. enc->framesSinceKeyframe++;
  801. return 0;
  802. }
  803. static av_cold int roq_encode_end(AVCodecContext *avctx)
  804. {
  805. RoqContext *enc = avctx->priv_data;
  806. av_frame_free(&enc->current_frame);
  807. av_frame_free(&enc->last_frame);
  808. av_free(enc->tmpData);
  809. av_free(enc->this_motion4);
  810. av_free(enc->last_motion4);
  811. av_free(enc->this_motion8);
  812. av_free(enc->last_motion8);
  813. return 0;
  814. }
  815. static av_cold int roq_encode_init(AVCodecContext *avctx)
  816. {
  817. RoqContext *enc = avctx->priv_data;
  818. av_lfg_init(&enc->randctx, 1);
  819. enc->framesSinceKeyframe = 0;
  820. if ((avctx->width & 0xf) || (avctx->height & 0xf)) {
  821. av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n");
  822. return -1;
  823. }
  824. if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
  825. av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n");
  826. enc->width = avctx->width;
  827. enc->height = avctx->height;
  828. enc->framesSinceKeyframe = 0;
  829. enc->first_frame = 1;
  830. enc->last_frame = av_frame_alloc();
  831. enc->current_frame = av_frame_alloc();
  832. if (!enc->last_frame || !enc->current_frame) {
  833. roq_encode_end(avctx);
  834. return AVERROR(ENOMEM);
  835. }
  836. enc->tmpData = av_malloc(sizeof(RoqTempdata));
  837. enc->this_motion4 =
  838. av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect));
  839. enc->last_motion4 =
  840. av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect));
  841. enc->this_motion8 =
  842. av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect));
  843. enc->last_motion8 =
  844. av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect));
  845. return 0;
  846. }
  847. static void roq_write_video_info_chunk(RoqContext *enc)
  848. {
  849. /* ROQ info chunk */
  850. bytestream_put_le16(&enc->out_buf, RoQ_INFO);
  851. /* Size: 8 bytes */
  852. bytestream_put_le32(&enc->out_buf, 8);
  853. /* Unused argument */
  854. bytestream_put_byte(&enc->out_buf, 0x00);
  855. bytestream_put_byte(&enc->out_buf, 0x00);
  856. /* Width */
  857. bytestream_put_le16(&enc->out_buf, enc->width);
  858. /* Height */
  859. bytestream_put_le16(&enc->out_buf, enc->height);
  860. /* Unused in Quake 3, mimics the output of the real encoder */
  861. bytestream_put_byte(&enc->out_buf, 0x08);
  862. bytestream_put_byte(&enc->out_buf, 0x00);
  863. bytestream_put_byte(&enc->out_buf, 0x04);
  864. bytestream_put_byte(&enc->out_buf, 0x00);
  865. }
  866. static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  867. const AVFrame *frame, int *got_packet)
  868. {
  869. RoqContext *enc = avctx->priv_data;
  870. int size, ret;
  871. enc->avctx = avctx;
  872. enc->frame_to_enc = frame;
  873. if (frame->quality)
  874. enc->lambda = frame->quality - 1;
  875. else
  876. enc->lambda = 2*ROQ_LAMBDA_SCALE;
  877. /* 138 bits max per 8x8 block +
  878. * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
  879. size = ((enc->width * enc->height / 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
  880. if ((ret = ff_alloc_packet(pkt, size)) < 0) {
  881. av_log(avctx, AV_LOG_ERROR, "Error getting output packet with size %d.\n", size);
  882. return ret;
  883. }
  884. enc->out_buf = pkt->data;
  885. /* Check for I frame */
  886. if (enc->framesSinceKeyframe == avctx->gop_size)
  887. enc->framesSinceKeyframe = 0;
  888. if (enc->first_frame) {
  889. /* Alloc memory for the reconstruction data (we must know the stride
  890. for that) */
  891. if (ff_get_buffer(avctx, enc->current_frame, 0) ||
  892. ff_get_buffer(avctx, enc->last_frame, 0)) {
  893. av_log(avctx, AV_LOG_ERROR, " RoQ: get_buffer() failed\n");
  894. return -1;
  895. }
  896. /* Before the first video frame, write a "video info" chunk */
  897. roq_write_video_info_chunk(enc);
  898. enc->first_frame = 0;
  899. }
  900. /* Encode the actual frame */
  901. ret = roq_encode_video(enc);
  902. if (ret < 0)
  903. return ret;
  904. pkt->size = enc->out_buf - pkt->data;
  905. if (enc->framesSinceKeyframe == 1)
  906. pkt->flags |= AV_PKT_FLAG_KEY;
  907. *got_packet = 1;
  908. return 0;
  909. }
  910. AVCodec ff_roq_encoder = {
  911. .name = "roqvideo",
  912. .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"),
  913. .type = AVMEDIA_TYPE_VIDEO,
  914. .id = AV_CODEC_ID_ROQ,
  915. .priv_data_size = sizeof(RoqContext),
  916. .init = roq_encode_init,
  917. .encode2 = roq_encode_frame,
  918. .close = roq_encode_end,
  919. .supported_framerates = (const AVRational[]){ {30,1}, {0,0} },
  920. .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P,
  921. AV_PIX_FMT_NONE },
  922. };