You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

503 lines
19KB

  1. /*
  2. * Generic DCT based hybrid video encoder
  3. * Copyright (c) 2000,2001 Gerard Lantau.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. /* Macros for picture code type. */
  20. #define I_TYPE 1
  21. #define P_TYPE 2
  22. #define B_TYPE 3
  23. #define S_TYPE 4 //S(GMC)-VOP MPEG4
  24. #define FRAME_SKIPED 100 // return value for header parsers if frame is not coded
  25. enum OutputFormat {
  26. FMT_MPEG1,
  27. FMT_H263,
  28. FMT_MJPEG,
  29. };
  30. #define MPEG_BUF_SIZE (16 * 1024)
  31. #define QMAT_SHIFT_MMX 19
  32. #define QMAT_SHIFT 25
  33. #define MAX_FCODE 7
  34. #define MAX_MV 2048
  35. #define REORDER_BUFFER_SIZE (FF_MAX_B_FRAMES+2)
  36. typedef struct Predictor{
  37. double coeff;
  38. double count;
  39. double decay;
  40. } Predictor;
  41. typedef struct RateControlEntry{
  42. int pict_type;
  43. int qscale;
  44. int mv_bits;
  45. int i_tex_bits;
  46. int p_tex_bits;
  47. int misc_bits;
  48. uint64_t expected_bits;
  49. int new_pict_type;
  50. float new_qscale;
  51. }RateControlEntry;
  52. typedef struct RateControlContext{
  53. FILE *stats_file;
  54. int num_entries;
  55. RateControlEntry *entry;
  56. }RateControlContext;
  57. typedef struct ReorderBuffer{
  58. UINT8 *picture[3];
  59. int pict_type;
  60. int qscale;
  61. int force_type;
  62. int picture_number;
  63. int picture_in_gop_number;
  64. } ReorderBuffer;
  65. typedef struct MpegEncContext {
  66. struct AVCodecContext *avctx;
  67. /* the following parameters must be initialized before encoding */
  68. int width, height; /* picture size. must be a multiple of 16 */
  69. int gop_size;
  70. int frame_rate; /* number of frames per second */
  71. int intra_only; /* if true, only intra pictures are generated */
  72. int bit_rate; /* wanted bit rate */
  73. int bit_rate_tolerance; /* amount of +- bits (>0)*/
  74. enum OutputFormat out_format; /* output format */
  75. int h263_plus; /* h263 plus headers */
  76. int h263_rv10; /* use RV10 variation for H263 */
  77. int h263_pred; /* use mpeg4/h263 ac/dc predictions */
  78. int h263_msmpeg4; /* generate MSMPEG4 compatible stream */
  79. int h263_intel; /* use I263 intel h263 header */
  80. int fixed_qscale; /* fixed qscale if non zero */
  81. float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */
  82. float qblur; /* amount of qscale smoothing over time (0.0-1.0) */
  83. int qmin; /* min qscale */
  84. int qmax; /* max qscale */
  85. int max_qdiff; /* max qscale difference between frames */
  86. int encoding; /* true if we are encoding (vs decoding) */
  87. int flags; /* AVCodecContext.flags (HQ, MV4, ...) */
  88. int force_input_type;/* 0= no force, otherwise I_TYPE, P_TYPE, ... */
  89. int max_b_frames; /* max number of b-frames for encoding */
  90. float b_quant_factor;/* qscale factor between ips and b frames */
  91. int rc_strategy;
  92. int b_frame_strategy;
  93. /* the following fields are managed internally by the encoder */
  94. /* bit output */
  95. PutBitContext pb;
  96. /* sequence parameters */
  97. int context_initialized;
  98. int input_picture_number;
  99. int input_picture_in_gop_number; /* 0-> first pic in gop, ... */
  100. int picture_number;
  101. int fake_picture_number; /* picture number at the bitstream frame rate */
  102. int gop_picture_number; /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */
  103. int picture_in_gop_number; /* 0-> first pic in gop, ... */
  104. int b_frames_since_non_b; /* used for encoding, relative to not yet reordered input */
  105. int mb_width, mb_height; /* number of MBs horizontally & vertically */
  106. int mb_num; /* number of MBs of a picture */
  107. int linesize; /* line size, in bytes, may be different from width */
  108. UINT8 *new_picture[3]; /* picture to be compressed */
  109. UINT8 *picture_buffer[REORDER_BUFFER_SIZE][3]; /* internal buffers used for reordering of input pictures */
  110. int picture_buffer_index;
  111. ReorderBuffer coded_order[REORDER_BUFFER_SIZE];
  112. UINT8 *last_picture[3]; /* previous picture */
  113. UINT8 *last_picture_base[3]; /* real start of the picture */
  114. UINT8 *next_picture[3]; /* previous picture (for bidir pred) */
  115. UINT8 *next_picture_base[3]; /* real start of the picture */
  116. UINT8 *aux_picture[3]; /* aux picture (for B frames only) */
  117. UINT8 *aux_picture_base[3]; /* real start of the picture */
  118. UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */
  119. int num_available_buffers; /* is 0 at the start & after seeking, after the first I frame its 1 after next I/P 2 */
  120. int last_dc[3]; /* last DC values for MPEG1 */
  121. INT16 *dc_val[3]; /* used for mpeg4 DC prediction, all 3 arrays must be continuous */
  122. int y_dc_scale, c_dc_scale;
  123. UINT8 *coded_block; /* used for coded block pattern prediction (msmpeg4v3, wmv1)*/
  124. INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction, all 3 arrays must be continuous */
  125. int ac_pred;
  126. int mb_skiped; /* MUST BE SET only during DECODING */
  127. UINT8 *mbskip_table; /* used to avoid copy if macroblock skipped (for black regions for example)
  128. and used for b-frame encoding & decoding (contains skip table of next P Frame) */
  129. UINT8 *mbintra_table; /* used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding */
  130. int input_qscale; /* qscale prior to reordering of frames */
  131. int input_pict_type; /* pict_type prior to reordering of frames */
  132. int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
  133. int qscale; /* QP */
  134. int last_non_b_qscale; /* QP of last non b frame used for b frame qscale*/
  135. int pict_type; /* I_TYPE, P_TYPE, B_TYPE, ... */
  136. int last_non_b_pict_type; /* used for mpeg4 gmc b-frames & ratecontrol */
  137. int frame_rate_index;
  138. /* motion compensation */
  139. int unrestricted_mv;
  140. int h263_long_vectors; /* use horrible h263v1 long vector mode */
  141. int f_code; /* forward MV resolution */
  142. int b_code; /* backward MV resolution for B Frames (mpeg4) */
  143. INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */
  144. INT16 (*p_mv_table)[2]; /* MV table (1MV per MB) p-frame encoding */
  145. INT16 (*last_p_mv_table)[2]; /* MV table (1MV per MB) p-frame encoding */
  146. INT16 (*b_forw_mv_table)[2]; /* MV table (1MV per MB) forward mode b-frame encoding */
  147. INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */
  148. INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
  149. INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
  150. INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
  151. INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
  152. INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */
  153. int me_method; /* ME algorithm */
  154. uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
  155. int mv_dir;
  156. #define MV_DIR_BACKWARD 1
  157. #define MV_DIR_FORWARD 2
  158. #define MV_DIRECT 4 // bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4)
  159. int mv_type;
  160. #define MV_TYPE_16X16 0 /* 1 vector for the whole mb */
  161. #define MV_TYPE_8X8 1 /* 4 vectors (h263, mpeg4 4MV) */
  162. #define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */
  163. #define MV_TYPE_FIELD 3 /* 2 vectors, one per field */
  164. #define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */
  165. /* motion vectors for a macroblock
  166. first coordinate : 0 = forward 1 = backward
  167. second " : depend on type
  168. third " : 0 = x, 1 = y
  169. */
  170. int mv[2][4][2];
  171. int field_select[2][2];
  172. int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
  173. UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
  174. UINT8 *fcode_tab; /* smallest fcode needed for each MV */
  175. int has_b_frames;
  176. int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
  177. for b-frames rounding mode is allways 0 */
  178. /* macroblock layer */
  179. int mb_x, mb_y;
  180. int mb_incr;
  181. int mb_intra;
  182. UINT16 *mb_var; /* Table for MB variances */
  183. UINT8 *mb_type; /* Table for MB type */
  184. #define MB_TYPE_INTRA 0x01
  185. #define MB_TYPE_INTER 0x02
  186. #define MB_TYPE_INTER4V 0x04
  187. #define MB_TYPE_SKIPED 0x08
  188. #define MB_TYPE_DIRECT 0x10
  189. #define MB_TYPE_FORWARD 0x20
  190. #define MB_TYPE_BACKWARD 0x40
  191. #define MB_TYPE_BIDIR 0x80
  192. int block_index[6]; /* index to current MB in block based arrays with edges*/
  193. int block_wrap[6];
  194. /* matrix transmitted in the bitstream */
  195. UINT16 intra_matrix[64];
  196. UINT16 chroma_intra_matrix[64];
  197. UINT16 non_intra_matrix[64];
  198. UINT16 chroma_non_intra_matrix[64];
  199. /* precomputed matrix (combine qscale and DCT renorm) */
  200. int q_intra_matrix[64];
  201. int q_non_intra_matrix[64];
  202. /* identical to the above but for MMX & these are not permutated */
  203. UINT16 __align8 q_intra_matrix16[64];
  204. UINT16 __align8 q_non_intra_matrix16[64];
  205. int block_last_index[6]; /* last non zero coefficient in block */
  206. void *opaque; /* private data for the user */
  207. /* bit rate control */
  208. int I_frame_bits; //FIXME used in mpeg12 ...
  209. int avg_mb_var; /* average MB variance for current frame */
  210. int mc_mb_var; /* motion compensated MB variance for current frame */
  211. int last_non_b_mc_mb_var;/* motion compensated MB variance for last non b frame */
  212. INT64 wanted_bits;
  213. INT64 total_bits;
  214. int frame_bits; /* bits used for the current frame */
  215. int pb_frame_bits; /* bits of the last b...bp group */
  216. Predictor i_pred;
  217. Predictor p_pred;
  218. double qsum; /* sum of qscales */
  219. double qcount; /* count of qscales */
  220. double short_term_qsum; /* sum of recent qscales */
  221. double short_term_qcount; /* count of recent qscales */
  222. RateControlContext rc_context;
  223. /* statistics, used for 2-pass encoding */
  224. int mv_bits;
  225. int header_bits;
  226. int i_tex_bits;
  227. int p_tex_bits;
  228. int i_count;
  229. int p_count;
  230. int skip_count;
  231. int misc_bits; // cbp, mb_type
  232. int last_bits; //temp var used for calculating the above vars
  233. /* H.263 specific */
  234. int gob_number;
  235. int gob_index;
  236. int first_gob_line;
  237. /* H.263+ specific */
  238. int umvplus;
  239. int umvplus_dec;
  240. int h263_aic; /* Advanded INTRA Coding (AIC) */
  241. int h263_aic_dir; /* AIC direction: 0 = left, 1 = top */
  242. /* mpeg4 specific */
  243. int time_increment_resolution;
  244. int time_increment_bits; /* number of bits to represent the fractional part of time */
  245. int last_time_base;
  246. int time_base; /* time in seconds of last I,P,S Frame */
  247. int64_t time; /* time of current frame */
  248. int64_t last_non_b_time;
  249. uint16_t pp_time; /* time distance between the last 2 p,s,i frames */
  250. uint16_t bp_time; /* time distance between the last b and p,s,i frame */
  251. int shape;
  252. int vol_sprite_usage;
  253. int sprite_width;
  254. int sprite_height;
  255. int sprite_left;
  256. int sprite_top;
  257. int sprite_brightness_change;
  258. int num_sprite_warping_points;
  259. int real_sprite_warping_points;
  260. int sprite_offset[2][2];
  261. int sprite_delta[2][2][2];
  262. int sprite_shift[2][2];
  263. int mcsel;
  264. int quant_precision;
  265. int quarter_sample; /* 1->qpel, 0->half pel ME/MC */
  266. int scalability;
  267. int new_pred;
  268. int reduced_res_vop;
  269. int aspect_ratio_info;
  270. int sprite_warping_accuracy;
  271. int low_latency_sprite;
  272. int data_partioning;
  273. int resync_marker;
  274. int resync_x_pos;
  275. int low_delay; /* no reordering needed / has no b-frames */
  276. int vo_type;
  277. /* divx specific, used to workaround (many) bugs in divx5 */
  278. int divx_version;
  279. int divx_build;
  280. #define BITSTREAM_BUFFER_SIZE 1024*256
  281. uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
  282. int bitstream_buffer_size;
  283. /* RV10 specific */
  284. int rv10_version; /* RV10 version: 0 or 3 */
  285. int rv10_first_dc_coded[3];
  286. /* MJPEG specific */
  287. struct MJpegContext *mjpeg_ctx;
  288. int mjpeg_vsample[3]; /* vertical sampling factors, default = {2, 1, 1} */
  289. int mjpeg_hsample[3]; /* horizontal sampling factors, default = {2, 1, 1} */
  290. int mjpeg_write_tables; /* do we want to have quantisation- and
  291. huffmantables in the jpeg file ? */
  292. /* MSMPEG4 specific */
  293. int mv_table_index;
  294. int rl_table_index;
  295. int rl_chroma_table_index;
  296. int dc_table_index;
  297. int use_skip_mb_code;
  298. int slice_height; /* in macroblocks */
  299. int first_slice_line; /* used in mpeg4 too to handle resync markers */
  300. int flipflop_rounding;
  301. int bitrate;
  302. int msmpeg4_version; /* 1=mp41, 2=mp42, 3=mp43/divx3 */
  303. /* decompression specific */
  304. GetBitContext gb;
  305. /* MPEG2 specific - I wish I had not to support this mess. */
  306. int progressive_sequence;
  307. int mpeg_f_code[2][2];
  308. int picture_structure;
  309. /* picture type */
  310. #define PICT_TOP_FIELD 1
  311. #define PICT_BOTTOM_FIELD 2
  312. #define PICT_FRAME 3
  313. int intra_dc_precision;
  314. int frame_pred_frame_dct;
  315. int top_field_first;
  316. int concealment_motion_vectors;
  317. int q_scale_type;
  318. int intra_vlc_format;
  319. int alternate_scan;
  320. int repeat_first_field;
  321. int chroma_420_type;
  322. int progressive_frame;
  323. int mpeg2;
  324. int full_pel[2];
  325. int interlaced_dct;
  326. int last_qscale;
  327. int first_slice;
  328. /* RTP specific */
  329. /* These are explained on avcodec.h */
  330. int rtp_mode;
  331. int rtp_payload_size;
  332. void (*rtp_callback)(void *data, int size, int packet_number);
  333. UINT8 *ptr_lastgob;
  334. UINT8 *ptr_last_mb_line;
  335. UINT32 mb_line_avgsize;
  336. DCTELEM (*block)[64]; /* points to one of the following blocks */
  337. DCTELEM blocks[2][6][64] __align8; // for HQ mode we need to keep the best block
  338. void (*dct_unquantize_mpeg1)(struct MpegEncContext *s,
  339. DCTELEM *block, int n, int qscale);
  340. void (*dct_unquantize_mpeg2)(struct MpegEncContext *s,
  341. DCTELEM *block, int n, int qscale);
  342. void (*dct_unquantize_h263)(struct MpegEncContext *s,
  343. DCTELEM *block, int n, int qscale);
  344. void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
  345. DCTELEM *block, int n, int qscale);
  346. } MpegEncContext;
  347. int MPV_common_init(MpegEncContext *s);
  348. void MPV_common_end(MpegEncContext *s);
  349. void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
  350. void MPV_frame_start(MpegEncContext *s);
  351. void MPV_frame_end(MpegEncContext *s);
  352. #ifdef HAVE_MMX
  353. void MPV_common_init_mmx(MpegEncContext *s);
  354. #endif
  355. /* motion_est.c */
  356. void ff_estimate_p_frame_motion(MpegEncContext * s,
  357. int mb_x, int mb_y);
  358. void ff_estimate_b_frame_motion(MpegEncContext * s,
  359. int mb_x, int mb_y);
  360. int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
  361. void ff_fix_long_p_mvs(MpegEncContext * s);
  362. void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
  363. /* mpeg12.c */
  364. extern INT16 default_intra_matrix[64];
  365. extern INT16 default_non_intra_matrix[64];
  366. void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
  367. void mpeg1_encode_mb(MpegEncContext *s,
  368. DCTELEM block[6][64],
  369. int motion_x, int motion_y);
  370. void mpeg1_encode_init(MpegEncContext *s);
  371. /* h263enc.c */
  372. /* run length table */
  373. #define MAX_RUN 64
  374. #define MAX_LEVEL 64
  375. typedef struct RLTable {
  376. int n; /* number of entries of table_vlc minus 1 */
  377. int last; /* number of values for last = 0 */
  378. const UINT16 (*table_vlc)[2];
  379. const INT8 *table_run;
  380. const INT8 *table_level;
  381. UINT8 *index_run[2]; /* encoding only */
  382. INT8 *max_level[2]; /* encoding & decoding */
  383. INT8 *max_run[2]; /* encoding & decoding */
  384. VLC vlc; /* decoding only */
  385. } RLTable;
  386. void init_rl(RLTable *rl);
  387. void init_vlc_rl(RLTable *rl);
  388. static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
  389. {
  390. int index;
  391. index = rl->index_run[last][run];
  392. if (index >= rl->n)
  393. return rl->n;
  394. if (level > rl->max_level[last][run])
  395. return rl->n;
  396. return index + level - 1;
  397. }
  398. void h263_encode_mb(MpegEncContext *s,
  399. DCTELEM block[6][64],
  400. int motion_x, int motion_y);
  401. void mpeg4_encode_mb(MpegEncContext *s,
  402. DCTELEM block[6][64],
  403. int motion_x, int motion_y);
  404. void h263_encode_picture_header(MpegEncContext *s, int picture_number);
  405. int h263_encode_gob_header(MpegEncContext * s, int mb_line);
  406. void h263_dc_scale(MpegEncContext *s);
  407. INT16 *h263_pred_motion(MpegEncContext * s, int block,
  408. int *px, int *py);
  409. void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
  410. int dir);
  411. void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
  412. void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
  413. void h263_encode_init(MpegEncContext *s);
  414. void h263_decode_init_vlc(MpegEncContext *s);
  415. int h263_decode_picture_header(MpegEncContext *s);
  416. int h263_decode_gob_header(MpegEncContext *s);
  417. int mpeg4_decode_picture_header(MpegEncContext * s);
  418. int intel_h263_decode_picture_header(MpegEncContext *s);
  419. int h263_decode_mb(MpegEncContext *s,
  420. DCTELEM block[6][64]);
  421. int h263_get_picture_format(int width, int height);
  422. /* rv10.c */
  423. void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
  424. int rv_decode_dc(MpegEncContext *s, int n);
  425. /* msmpeg4.c */
  426. void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number);
  427. void msmpeg4_encode_ext_header(MpegEncContext * s);
  428. void msmpeg4_encode_mb(MpegEncContext * s,
  429. DCTELEM block[6][64],
  430. int motion_x, int motion_y);
  431. void msmpeg4_dc_scale(MpegEncContext * s);
  432. int msmpeg4_decode_picture_header(MpegEncContext * s);
  433. int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
  434. int msmpeg4_decode_mb(MpegEncContext *s,
  435. DCTELEM block[6][64]);
  436. int msmpeg4_decode_init_vlc(MpegEncContext *s);
  437. /* mjpegenc.c */
  438. int mjpeg_init(MpegEncContext *s);
  439. void mjpeg_close(MpegEncContext *s);
  440. void mjpeg_encode_mb(MpegEncContext *s,
  441. DCTELEM block[6][64]);
  442. void mjpeg_picture_header(MpegEncContext *s);
  443. void mjpeg_picture_trailer(MpegEncContext *s);
  444. /* rate control */
  445. int ff_rate_control_init(MpegEncContext *s);
  446. int ff_rate_estimate_qscale(MpegEncContext *s);
  447. int ff_rate_estimate_qscale_pass2(MpegEncContext *s);
  448. void ff_write_pass1_stats(MpegEncContext *s);
  449. void ff_rate_control_uninit(MpegEncContext *s);