You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1070 lines
31KB

  1. /*
  2. * RoQ Video Encoder.
  3. *
  4. * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
  5. * Copyright (C) 2004-2007 Eric Lasota
  6. * Based on RoQ specs (C) 2001 Tim Ferguson
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. /**
  25. * @file libavcodec/roqvideoenc.c
  26. * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
  27. * Switchblade3 FFmpeg glue by Eric Lasota.
  28. */
  29. /*
  30. * COSTS:
  31. * Level 1:
  32. * SKIP - 2 bits
  33. * MOTION - 2 + 8 bits
  34. * CODEBOOK - 2 + 8 bits
  35. * SUBDIVIDE - 2 + combined subcel cost
  36. *
  37. * Level 2:
  38. * SKIP - 2 bits
  39. * MOTION - 2 + 8 bits
  40. * CODEBOOK - 2 + 8 bits
  41. * SUBDIVIDE - 2 + 4*8 bits
  42. *
  43. * Maximum cost: 138 bits per cel
  44. *
  45. * Proper evaluation requires LCD fraction comparison, which requires
  46. * Squared Error (SE) loss * savings increase
  47. *
  48. * Maximum savings increase: 136 bits
  49. * Maximum SE loss without overflow: 31580641
  50. * Components in 8x8 supercel: 192
  51. * Maximum SE precision per component: 164482
  52. * >65025, so no truncation is needed (phew)
  53. */
  54. #include <string.h>
  55. #include "roqvideo.h"
  56. #include "bytestream.h"
  57. #include "elbg.h"
  58. #include "mathops.h"
  59. #define CHROMA_BIAS 1
  60. /**
  61. * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
  62. * Quake 3 bug.
  63. */
  64. #define MAX_CBS_4x4 255
  65. #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
  66. /* The cast is useful when multiplying it by INT_MAX */
  67. #define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
  68. /* Macroblock support functions */
  69. static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3])
  70. {
  71. memcpy(u , cell->y, 4);
  72. memset(u+4, cell->u, 4);
  73. memset(u+8, cell->v, 4);
  74. }
  75. static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3])
  76. {
  77. int i,cp;
  78. static const int offsets[4] = {0, 2, 8, 10};
  79. for (cp=0; cp<3; cp++)
  80. for (i=0; i<4; i++) {
  81. u[4*4*cp + offsets[i] ] = cb2[qcell->idx[i]*2*2*3 + 4*cp ];
  82. u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1];
  83. u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2];
  84. u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3];
  85. }
  86. }
  87. static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64])
  88. {
  89. int x,y,cp;
  90. for(cp=0; cp<3; cp++)
  91. for(y=0; y<8; y++)
  92. for(x=0; x<8; x++)
  93. *u++ = base[(y/2)*4 + (x/2) + 16*cp];
  94. }
  95. static inline int square(int x)
  96. {
  97. return x*x;
  98. }
  99. static inline int eval_sse(uint8_t *a, uint8_t *b, int count)
  100. {
  101. int diff=0;
  102. while(count--)
  103. diff += square(*b++ - *a++);
  104. return diff;
  105. }
  106. // FIXME Could use DSPContext.sse, but it is not so speed critical (used
  107. // just for motion estimation).
  108. static int block_sse(uint8_t **buf1, uint8_t **buf2, int x1, int y1, int x2,
  109. int y2, int *stride1, int *stride2, int size)
  110. {
  111. int i, k;
  112. int sse=0;
  113. for (k=0; k<3; k++) {
  114. int bias = (k ? CHROMA_BIAS : 4);
  115. for (i=0; i<size; i++)
  116. sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1,
  117. buf2[k] + (y2+i)*stride2[k] + x2, size);
  118. }
  119. return sse;
  120. }
  121. static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect,
  122. int size)
  123. {
  124. int mx=vect.d[0];
  125. int my=vect.d[1];
  126. if (mx < -7 || mx > 7)
  127. return INT_MAX;
  128. if (my < -7 || my > 7)
  129. return INT_MAX;
  130. mx += x;
  131. my += y;
  132. if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size)
  133. return INT_MAX;
  134. return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y,
  135. mx, my,
  136. enc->frame_to_enc->linesize, enc->last_frame->linesize,
  137. size);
  138. }
  139. /**
  140. * Returns distortion between two macroblocks
  141. */
  142. static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size)
  143. {
  144. int cp, sdiff=0;
  145. for(cp=0;cp<3;cp++) {
  146. int bias = (cp ? CHROMA_BIAS : 4);
  147. sdiff += bias*eval_sse(a, b, size*size);
  148. a += size*size;
  149. b += size*size;
  150. }
  151. return sdiff;
  152. }
  153. typedef struct
  154. {
  155. int eval_dist[4];
  156. int best_bit_use;
  157. int best_coding;
  158. int subCels[4];
  159. motion_vect motion;
  160. int cbEntry;
  161. } SubcelEvaluation;
  162. typedef struct
  163. {
  164. int eval_dist[4];
  165. int best_coding;
  166. SubcelEvaluation subCels[4];
  167. motion_vect motion;
  168. int cbEntry;
  169. int sourceX, sourceY;
  170. } CelEvaluation;
  171. typedef struct
  172. {
  173. int numCB4;
  174. int numCB2;
  175. int usedCB2[MAX_CBS_2x2];
  176. int usedCB4[MAX_CBS_4x4];
  177. uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
  178. uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
  179. uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
  180. } RoqCodebooks;
  181. /**
  182. * Temporary vars
  183. */
  184. typedef struct RoqTempData
  185. {
  186. CelEvaluation *cel_evals;
  187. int f2i4[MAX_CBS_4x4];
  188. int i2f4[MAX_CBS_4x4];
  189. int f2i2[MAX_CBS_2x2];
  190. int i2f2[MAX_CBS_2x2];
  191. int mainChunkSize;
  192. int numCB4;
  193. int numCB2;
  194. RoqCodebooks codebooks;
  195. int *closest_cb2;
  196. int used_option[4];
  197. } RoqTempdata;
  198. /**
  199. * Initializes cel evaluators and sets their source coordinates
  200. */
  201. static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData)
  202. {
  203. int n=0, x, y, i;
  204. tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation));
  205. /* Map to the ROQ quadtree order */
  206. for (y=0; y<enc->height; y+=16)
  207. for (x=0; x<enc->width; x+=16)
  208. for(i=0; i<4; i++) {
  209. tempData->cel_evals[n ].sourceX = x + (i&1)*8;
  210. tempData->cel_evals[n++].sourceY = y + (i&2)*4;
  211. }
  212. }
  213. /**
  214. * Get macroblocks from parts of the image
  215. */
  216. static void get_frame_mb(AVFrame *frame, int x, int y, uint8_t mb[], int dim)
  217. {
  218. int i, j, cp;
  219. for (cp=0; cp<3; cp++) {
  220. int stride = frame->linesize[cp];
  221. for (i=0; i<dim; i++)
  222. for (j=0; j<dim; j++)
  223. *mb++ = frame->data[cp][(y+i)*stride + x + j];
  224. }
  225. }
  226. /**
  227. * Find the codebook with the lowest distortion from an image
  228. */
  229. static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB,
  230. int *outIndex, int dim)
  231. {
  232. int i, lDiff = INT_MAX, pick=0;
  233. /* Diff against the others */
  234. for (i=0; i<numCB; i++) {
  235. int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim);
  236. if (diff < lDiff) {
  237. lDiff = diff;
  238. pick = i;
  239. }
  240. }
  241. *outIndex = pick;
  242. return lDiff;
  243. }
  244. #define EVAL_MOTION(MOTION) \
  245. do { \
  246. diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
  247. \
  248. if (diff < lowestdiff) { \
  249. lowestdiff = diff; \
  250. bestpick = MOTION; \
  251. } \
  252. } while(0)
  253. static void motion_search(RoqContext *enc, int blocksize)
  254. {
  255. static const motion_vect offsets[8] = {
  256. {{ 0,-1}},
  257. {{ 0, 1}},
  258. {{-1, 0}},
  259. {{ 1, 0}},
  260. {{-1, 1}},
  261. {{ 1,-1}},
  262. {{-1,-1}},
  263. {{ 1, 1}},
  264. };
  265. int diff, lowestdiff, oldbest;
  266. int off[3];
  267. motion_vect bestpick = {{0,0}};
  268. int i, j, k, offset;
  269. motion_vect *last_motion;
  270. motion_vect *this_motion;
  271. motion_vect vect, vect2;
  272. int max=(enc->width/blocksize)*enc->height/blocksize;
  273. if (blocksize == 4) {
  274. last_motion = enc->last_motion4;
  275. this_motion = enc->this_motion4;
  276. } else {
  277. last_motion = enc->last_motion8;
  278. this_motion = enc->this_motion8;
  279. }
  280. for (i=0; i<enc->height; i+=blocksize)
  281. for (j=0; j<enc->width; j+=blocksize) {
  282. lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}},
  283. blocksize);
  284. bestpick.d[0] = 0;
  285. bestpick.d[1] = 0;
  286. if (blocksize == 4)
  287. EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]);
  288. offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
  289. if (offset < max && offset >= 0)
  290. EVAL_MOTION(last_motion[offset]);
  291. offset++;
  292. if (offset < max && offset >= 0)
  293. EVAL_MOTION(last_motion[offset]);
  294. offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize;
  295. if (offset < max && offset >= 0)
  296. EVAL_MOTION(last_motion[offset]);
  297. off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1;
  298. off[1]= off[0] - enc->width/blocksize + 1;
  299. off[2]= off[1] + 1;
  300. if (i) {
  301. for(k=0; k<2; k++)
  302. vect.d[k]= mid_pred(this_motion[off[0]].d[k],
  303. this_motion[off[1]].d[k],
  304. this_motion[off[2]].d[k]);
  305. EVAL_MOTION(vect);
  306. for(k=0; k<3; k++)
  307. EVAL_MOTION(this_motion[off[k]]);
  308. } else if(j)
  309. EVAL_MOTION(this_motion[off[0]]);
  310. vect = bestpick;
  311. oldbest = -1;
  312. while (oldbest != lowestdiff) {
  313. oldbest = lowestdiff;
  314. for (k=0; k<8; k++) {
  315. vect2 = vect;
  316. vect2.d[0] += offsets[k].d[0];
  317. vect2.d[1] += offsets[k].d[1];
  318. EVAL_MOTION(vect2);
  319. }
  320. vect = bestpick;
  321. }
  322. offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
  323. this_motion[offset] = bestpick;
  324. }
  325. }
  326. /**
  327. * Gets distortion for all options available to a subcel
  328. */
  329. static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
  330. int y, RoqContext *enc, RoqTempdata *tempData)
  331. {
  332. uint8_t mb4[4*4*3];
  333. uint8_t mb2[2*2*3];
  334. int cluster_index;
  335. int i, best_dist;
  336. static const int bitsUsed[4] = {2, 10, 10, 34};
  337. if (enc->framesSinceKeyframe >= 1) {
  338. subcel->motion = enc->this_motion4[y*enc->width/16 + x/4];
  339. subcel->eval_dist[RoQ_ID_FCC] =
  340. eval_motion_dist(enc, x, y,
  341. enc->this_motion4[y*enc->width/16 + x/4], 4);
  342. } else
  343. subcel->eval_dist[RoQ_ID_FCC] = INT_MAX;
  344. if (enc->framesSinceKeyframe >= 2)
  345. subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
  346. enc->current_frame->data, x,
  347. y, x, y,
  348. enc->frame_to_enc->linesize,
  349. enc->current_frame->linesize,
  350. 4);
  351. else
  352. subcel->eval_dist[RoQ_ID_MOT] = INT_MAX;
  353. cluster_index = y*enc->width/16 + x/4;
  354. get_frame_mb(enc->frame_to_enc, x, y, mb4, 4);
  355. subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4,
  356. tempData->codebooks.unpacked_cb4,
  357. tempData->codebooks.numCB4,
  358. &subcel->cbEntry, 4);
  359. subcel->eval_dist[RoQ_ID_CCC] = 0;
  360. for(i=0;i<4;i++) {
  361. subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i];
  362. get_frame_mb(enc->frame_to_enc, x+2*(i&1),
  363. y+(i&2), mb2, 2);
  364. subcel->eval_dist[RoQ_ID_CCC] +=
  365. squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2);
  366. }
  367. best_dist = INT_MAX;
  368. for (i=0; i<4; i++)
  369. if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] <
  370. best_dist) {
  371. subcel->best_coding = i;
  372. subcel->best_bit_use = bitsUsed[i];
  373. best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] +
  374. enc->lambda*bitsUsed[i];
  375. }
  376. }
  377. /**
  378. * Gets distortion for all options available to a cel
  379. */
  380. static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc,
  381. RoqTempdata *tempData)
  382. {
  383. uint8_t mb8[8*8*3];
  384. int index = cel->sourceY*enc->width/64 + cel->sourceX/8;
  385. int i, j, best_dist, divide_bit_use;
  386. int bitsUsed[4] = {2, 10, 10, 0};
  387. if (enc->framesSinceKeyframe >= 1) {
  388. cel->motion = enc->this_motion8[index];
  389. cel->eval_dist[RoQ_ID_FCC] =
  390. eval_motion_dist(enc, cel->sourceX, cel->sourceY,
  391. enc->this_motion8[index], 8);
  392. } else
  393. cel->eval_dist[RoQ_ID_FCC] = INT_MAX;
  394. if (enc->framesSinceKeyframe >= 2)
  395. cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
  396. enc->current_frame->data,
  397. cel->sourceX, cel->sourceY,
  398. cel->sourceX, cel->sourceY,
  399. enc->frame_to_enc->linesize,
  400. enc->current_frame->linesize,8);
  401. else
  402. cel->eval_dist[RoQ_ID_MOT] = INT_MAX;
  403. get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8);
  404. cel->eval_dist[RoQ_ID_SLD] =
  405. index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged,
  406. tempData->codebooks.numCB4, &cel->cbEntry, 8);
  407. gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData);
  408. gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData);
  409. gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData);
  410. gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData);
  411. cel->eval_dist[RoQ_ID_CCC] = 0;
  412. divide_bit_use = 0;
  413. for (i=0; i<4; i++) {
  414. cel->eval_dist[RoQ_ID_CCC] +=
  415. cel->subCels[i].eval_dist[cel->subCels[i].best_coding];
  416. divide_bit_use += cel->subCels[i].best_bit_use;
  417. }
  418. best_dist = INT_MAX;
  419. bitsUsed[3] = 2 + divide_bit_use;
  420. for (i=0; i<4; i++)
  421. if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] <
  422. best_dist) {
  423. cel->best_coding = i;
  424. best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] +
  425. enc->lambda*bitsUsed[i];
  426. }
  427. tempData->used_option[cel->best_coding]++;
  428. tempData->mainChunkSize += bitsUsed[cel->best_coding];
  429. if (cel->best_coding == RoQ_ID_SLD)
  430. tempData->codebooks.usedCB4[cel->cbEntry]++;
  431. if (cel->best_coding == RoQ_ID_CCC)
  432. for (i=0; i<4; i++) {
  433. if (cel->subCels[i].best_coding == RoQ_ID_SLD)
  434. tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++;
  435. else if (cel->subCels[i].best_coding == RoQ_ID_CCC)
  436. for (j=0; j<4; j++)
  437. tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++;
  438. }
  439. }
  440. static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData)
  441. {
  442. int i, j, idx=0;
  443. /* Make remaps for the final codebook usage */
  444. for (i=0; i<MAX_CBS_4x4; i++) {
  445. if (tempData->codebooks.usedCB4[i]) {
  446. tempData->i2f4[i] = idx;
  447. tempData->f2i4[idx] = i;
  448. for (j=0; j<4; j++)
  449. tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++;
  450. idx++;
  451. }
  452. }
  453. tempData->numCB4 = idx;
  454. idx = 0;
  455. for (i=0; i<MAX_CBS_2x2; i++) {
  456. if (tempData->codebooks.usedCB2[i]) {
  457. tempData->i2f2[i] = idx;
  458. tempData->f2i2[idx] = i;
  459. idx++;
  460. }
  461. }
  462. tempData->numCB2 = idx;
  463. }
  464. /**
  465. * Write codebook chunk
  466. */
  467. static void write_codebooks(RoqContext *enc, RoqTempdata *tempData)
  468. {
  469. int i, j;
  470. uint8_t **outp= &enc->out_buf;
  471. if (tempData->numCB2) {
  472. bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK);
  473. bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4);
  474. bytestream_put_byte(outp, tempData->numCB4);
  475. bytestream_put_byte(outp, tempData->numCB2);
  476. for (i=0; i<tempData->numCB2; i++) {
  477. bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4);
  478. bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u);
  479. bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v);
  480. }
  481. for (i=0; i<tempData->numCB4; i++)
  482. for (j=0; j<4; j++)
  483. bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]);
  484. }
  485. }
  486. static inline uint8_t motion_arg(motion_vect mot)
  487. {
  488. uint8_t ax = 8 - ((uint8_t) mot.d[0]);
  489. uint8_t ay = 8 - ((uint8_t) mot.d[1]);
  490. return ((ax&15)<<4) | (ay&15);
  491. }
  492. typedef struct
  493. {
  494. int typeSpool;
  495. int typeSpoolLength;
  496. uint8_t argumentSpool[64];
  497. uint8_t *args;
  498. uint8_t **pout;
  499. } CodingSpool;
  500. /* NOTE: Typecodes must be spooled AFTER arguments!! */
  501. static void write_typecode(CodingSpool *s, uint8_t type)
  502. {
  503. s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength);
  504. s->typeSpoolLength += 2;
  505. if (s->typeSpoolLength == 16) {
  506. bytestream_put_le16(s->pout, s->typeSpool);
  507. bytestream_put_buffer(s->pout, s->argumentSpool,
  508. s->args - s->argumentSpool);
  509. s->typeSpoolLength = 0;
  510. s->typeSpool = 0;
  511. s->args = s->argumentSpool;
  512. }
  513. }
  514. static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks)
  515. {
  516. int i, j, k;
  517. int x, y;
  518. int subX, subY;
  519. int dist=0;
  520. roq_qcell *qcell;
  521. CelEvaluation *eval;
  522. CodingSpool spool;
  523. spool.typeSpool=0;
  524. spool.typeSpoolLength=0;
  525. spool.args = spool.argumentSpool;
  526. spool.pout = &enc->out_buf;
  527. if (tempData->used_option[RoQ_ID_CCC]%2)
  528. tempData->mainChunkSize+=8; //FIXME
  529. /* Write the video chunk header */
  530. bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ);
  531. bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8);
  532. bytestream_put_byte(&enc->out_buf, 0x0);
  533. bytestream_put_byte(&enc->out_buf, 0x0);
  534. for (i=0; i<numBlocks; i++) {
  535. eval = tempData->cel_evals + i;
  536. x = eval->sourceX;
  537. y = eval->sourceY;
  538. dist += eval->eval_dist[eval->best_coding];
  539. switch (eval->best_coding) {
  540. case RoQ_ID_MOT:
  541. write_typecode(&spool, RoQ_ID_MOT);
  542. break;
  543. case RoQ_ID_FCC:
  544. bytestream_put_byte(&spool.args, motion_arg(eval->motion));
  545. write_typecode(&spool, RoQ_ID_FCC);
  546. ff_apply_motion_8x8(enc, x, y,
  547. eval->motion.d[0], eval->motion.d[1]);
  548. break;
  549. case RoQ_ID_SLD:
  550. bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]);
  551. write_typecode(&spool, RoQ_ID_SLD);
  552. qcell = enc->cb4x4 + eval->cbEntry;
  553. ff_apply_vector_4x4(enc, x , y , enc->cb2x2 + qcell->idx[0]);
  554. ff_apply_vector_4x4(enc, x+4, y , enc->cb2x2 + qcell->idx[1]);
  555. ff_apply_vector_4x4(enc, x , y+4, enc->cb2x2 + qcell->idx[2]);
  556. ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]);
  557. break;
  558. case RoQ_ID_CCC:
  559. write_typecode(&spool, RoQ_ID_CCC);
  560. for (j=0; j<4; j++) {
  561. subX = x + 4*(j&1);
  562. subY = y + 2*(j&2);
  563. switch(eval->subCels[j].best_coding) {
  564. case RoQ_ID_MOT:
  565. break;
  566. case RoQ_ID_FCC:
  567. bytestream_put_byte(&spool.args,
  568. motion_arg(eval->subCels[j].motion));
  569. ff_apply_motion_4x4(enc, subX, subY,
  570. eval->subCels[j].motion.d[0],
  571. eval->subCels[j].motion.d[1]);
  572. break;
  573. case RoQ_ID_SLD:
  574. bytestream_put_byte(&spool.args,
  575. tempData->i2f4[eval->subCels[j].cbEntry]);
  576. qcell = enc->cb4x4 + eval->subCels[j].cbEntry;
  577. ff_apply_vector_2x2(enc, subX , subY ,
  578. enc->cb2x2 + qcell->idx[0]);
  579. ff_apply_vector_2x2(enc, subX+2, subY ,
  580. enc->cb2x2 + qcell->idx[1]);
  581. ff_apply_vector_2x2(enc, subX , subY+2,
  582. enc->cb2x2 + qcell->idx[2]);
  583. ff_apply_vector_2x2(enc, subX+2, subY+2,
  584. enc->cb2x2 + qcell->idx[3]);
  585. break;
  586. case RoQ_ID_CCC:
  587. for (k=0; k<4; k++) {
  588. int cb_idx = eval->subCels[j].subCels[k];
  589. bytestream_put_byte(&spool.args,
  590. tempData->i2f2[cb_idx]);
  591. ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2),
  592. enc->cb2x2 + cb_idx);
  593. }
  594. break;
  595. }
  596. write_typecode(&spool, eval->subCels[j].best_coding);
  597. }
  598. break;
  599. }
  600. }
  601. /* Flush the remainder of the argument/type spool */
  602. while (spool.typeSpoolLength)
  603. write_typecode(&spool, 0x0);
  604. #if 0
  605. uint8_t *fdata[3] = {enc->frame_to_enc->data[0],
  606. enc->frame_to_enc->data[1],
  607. enc->frame_to_enc->data[2]};
  608. uint8_t *cdata[3] = {enc->current_frame->data[0],
  609. enc->current_frame->data[1],
  610. enc->current_frame->data[2]};
  611. av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n",
  612. dist,
  613. block_sse(fdata, cdata, 0, 0, 0, 0,
  614. enc->frame_to_enc->linesize,
  615. enc->current_frame->linesize,
  616. enc->width)); //WARNING: Square dimensions implied...
  617. #endif
  618. }
  619. /**
  620. * Create a single YUV cell from a 2x2 section of the image
  621. */
  622. static inline void frame_block_to_cell(uint8_t *block, uint8_t **data,
  623. int top, int left, int *stride)
  624. {
  625. int i, j, u=0, v=0;
  626. for (i=0; i<2; i++)
  627. for (j=0; j<2; j++) {
  628. int x = (top+i)*stride[0] + left + j;
  629. *block++ = data[0][x];
  630. x = (top+i)*stride[1] + left + j;
  631. u += data[1][x];
  632. v += data[2][x];
  633. }
  634. *block++ = (u+2)/4;
  635. *block++ = (v+2)/4;
  636. }
  637. /**
  638. * Creates YUV clusters for the entire image
  639. */
  640. static void create_clusters(AVFrame *frame, int w, int h, uint8_t *yuvClusters)
  641. {
  642. int i, j, k, l;
  643. for (i=0; i<h; i+=4)
  644. for (j=0; j<w; j+=4) {
  645. for (k=0; k < 2; k++)
  646. for (l=0; l < 2; l++)
  647. frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data,
  648. i+2*k, j+2*l, frame->linesize);
  649. yuvClusters += 24;
  650. }
  651. }
  652. static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata,
  653. int *points, int inputCount, roq_cell *results,
  654. int size, int cbsize)
  655. {
  656. int i, j, k;
  657. int c_size = size*size/4;
  658. int *buf;
  659. int *codebook = av_malloc(6*c_size*cbsize*sizeof(int));
  660. int *closest_cb;
  661. if (size == 4)
  662. closest_cb = av_malloc(6*c_size*inputCount*sizeof(int));
  663. else
  664. closest_cb = tempdata->closest_cb2;
  665. ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
  666. ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
  667. if (size == 4)
  668. av_free(closest_cb);
  669. buf = codebook;
  670. for (i=0; i<cbsize; i++)
  671. for (k=0; k<c_size; k++) {
  672. for(j=0; j<4; j++)
  673. results->y[j] = *buf++;
  674. results->u = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
  675. results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
  676. results++;
  677. }
  678. av_free(codebook);
  679. }
  680. static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
  681. {
  682. int i,j;
  683. RoqCodebooks *codebooks = &tempData->codebooks;
  684. int max = enc->width*enc->height/16;
  685. uint8_t mb2[3*4];
  686. roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
  687. uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4);
  688. int *points = av_malloc(max*6*4*sizeof(int));
  689. int bias;
  690. /* Subsample YUV data */
  691. create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters);
  692. /* Cast to integer and apply chroma bias */
  693. for (i=0; i<max*24; i++) {
  694. bias = ((i%6)<4) ? 1 : CHROMA_BIAS;
  695. points[i] = bias*yuvClusters[i];
  696. }
  697. /* Create 4x4 codebooks */
  698. generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4);
  699. codebooks->numCB4 = MAX_CBS_4x4;
  700. tempData->closest_cb2 = av_malloc(max*4*sizeof(int));
  701. /* Create 2x2 codebooks */
  702. generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2);
  703. codebooks->numCB2 = MAX_CBS_2x2;
  704. /* Unpack 2x2 codebook clusters */
  705. for (i=0; i<codebooks->numCB2; i++)
  706. unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3);
  707. /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
  708. for (i=0; i<codebooks->numCB4; i++) {
  709. for (j=0; j<4; j++) {
  710. unpack_roq_cell(&results4[4*i + j], mb2);
  711. index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2,
  712. &enc->cb4x4[i].idx[j], 2);
  713. }
  714. unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i,
  715. codebooks->unpacked_cb4 + i*4*4*3);
  716. enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3,
  717. codebooks->unpacked_cb4_enlarged + i*8*8*3);
  718. }
  719. av_free(yuvClusters);
  720. av_free(points);
  721. av_free(results4);
  722. }
  723. static void roq_encode_video(RoqContext *enc)
  724. {
  725. RoqTempdata *tempData = enc->tmpData;
  726. int i;
  727. memset(tempData, 0, sizeof(*tempData));
  728. create_cel_evals(enc, tempData);
  729. generate_new_codebooks(enc, tempData);
  730. if (enc->framesSinceKeyframe >= 1) {
  731. motion_search(enc, 8);
  732. motion_search(enc, 4);
  733. }
  734. retry_encode:
  735. for (i=0; i<enc->width*enc->height/64; i++)
  736. gather_data_for_cel(tempData->cel_evals + i, enc, tempData);
  737. /* Quake 3 can't handle chunks bigger than 65536 bytes */
  738. if (tempData->mainChunkSize/8 > 65536) {
  739. enc->lambda *= .8;
  740. goto retry_encode;
  741. }
  742. remap_codebooks(enc, tempData);
  743. write_codebooks(enc, tempData);
  744. reconstruct_and_encode_image(enc, tempData, enc->width, enc->height,
  745. enc->width*enc->height/64);
  746. enc->avctx->coded_frame = enc->current_frame;
  747. /* Rotate frame history */
  748. FFSWAP(AVFrame *, enc->current_frame, enc->last_frame);
  749. FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4);
  750. FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8);
  751. av_free(tempData->cel_evals);
  752. av_free(tempData->closest_cb2);
  753. enc->framesSinceKeyframe++;
  754. }
  755. static int roq_encode_init(AVCodecContext *avctx)
  756. {
  757. RoqContext *enc = avctx->priv_data;
  758. av_lfg_init(&enc->randctx, 1);
  759. enc->framesSinceKeyframe = 0;
  760. if ((avctx->width & 0xf) || (avctx->height & 0xf)) {
  761. av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n");
  762. return -1;
  763. }
  764. if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
  765. av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n");
  766. enc->width = avctx->width;
  767. enc->height = avctx->height;
  768. enc->framesSinceKeyframe = 0;
  769. enc->first_frame = 1;
  770. enc->last_frame = &enc->frames[0];
  771. enc->current_frame = &enc->frames[1];
  772. enc->tmpData = av_malloc(sizeof(RoqTempdata));
  773. enc->this_motion4 =
  774. av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect));
  775. enc->last_motion4 =
  776. av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect));
  777. enc->this_motion8 =
  778. av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect));
  779. enc->last_motion8 =
  780. av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect));
  781. return 0;
  782. }
  783. static void roq_write_video_info_chunk(RoqContext *enc)
  784. {
  785. /* ROQ info chunk */
  786. bytestream_put_le16(&enc->out_buf, RoQ_INFO);
  787. /* Size: 8 bytes */
  788. bytestream_put_le32(&enc->out_buf, 8);
  789. /* Unused argument */
  790. bytestream_put_byte(&enc->out_buf, 0x00);
  791. bytestream_put_byte(&enc->out_buf, 0x00);
  792. /* Width */
  793. bytestream_put_le16(&enc->out_buf, enc->width);
  794. /* Height */
  795. bytestream_put_le16(&enc->out_buf, enc->height);
  796. /* Unused in Quake 3, mimics the output of the real encoder */
  797. bytestream_put_byte(&enc->out_buf, 0x08);
  798. bytestream_put_byte(&enc->out_buf, 0x00);
  799. bytestream_put_byte(&enc->out_buf, 0x04);
  800. bytestream_put_byte(&enc->out_buf, 0x00);
  801. }
  802. static int roq_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
  803. {
  804. RoqContext *enc = avctx->priv_data;
  805. AVFrame *frame= data;
  806. uint8_t *buf_start = buf;
  807. enc->out_buf = buf;
  808. enc->avctx = avctx;
  809. enc->frame_to_enc = frame;
  810. if (frame->quality)
  811. enc->lambda = frame->quality - 1;
  812. else
  813. enc->lambda = 2*ROQ_LAMBDA_SCALE;
  814. /* 138 bits max per 8x8 block +
  815. * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
  816. if (((enc->width*enc->height/64)*138+7)/8 + 256*(6+4) + 8 > buf_size) {
  817. av_log(avctx, AV_LOG_ERROR, " RoQ: Output buffer too small!\n");
  818. return -1;
  819. }
  820. /* Check for I frame */
  821. if (enc->framesSinceKeyframe == avctx->gop_size)
  822. enc->framesSinceKeyframe = 0;
  823. if (enc->first_frame) {
  824. /* Alloc memory for the reconstruction data (we must know the stride
  825. for that) */
  826. if (avctx->get_buffer(avctx, enc->current_frame) ||
  827. avctx->get_buffer(avctx, enc->last_frame)) {
  828. av_log(avctx, AV_LOG_ERROR, " RoQ: get_buffer() failed\n");
  829. return -1;
  830. }
  831. /* Before the first video frame, write a "video info" chunk */
  832. roq_write_video_info_chunk(enc);
  833. enc->first_frame = 0;
  834. }
  835. /* Encode the actual frame */
  836. roq_encode_video(enc);
  837. return enc->out_buf - buf_start;
  838. }
  839. static int roq_encode_end(AVCodecContext *avctx)
  840. {
  841. RoqContext *enc = avctx->priv_data;
  842. avctx->release_buffer(avctx, enc->last_frame);
  843. avctx->release_buffer(avctx, enc->current_frame);
  844. av_free(enc->tmpData);
  845. av_free(enc->this_motion4);
  846. av_free(enc->last_motion4);
  847. av_free(enc->this_motion8);
  848. av_free(enc->last_motion8);
  849. return 0;
  850. }
  851. AVCodec roq_encoder =
  852. {
  853. "roqvideo",
  854. CODEC_TYPE_VIDEO,
  855. CODEC_ID_ROQ,
  856. sizeof(RoqContext),
  857. roq_encode_init,
  858. roq_encode_frame,
  859. roq_encode_end,
  860. .supported_framerates = (const AVRational[]){{30,1}, {0,0}},
  861. .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV444P, PIX_FMT_NONE},
  862. .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"),
  863. };