|
|
@@ -170,97 +170,97 @@ static void FUNC(transform_4x4_luma)(int16_t *coeffs) |
|
|
|
|
|
|
|
#undef TR_4x4_LUMA |
|
|
|
|
|
|
|
#define TR_4(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ |
|
|
|
const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ |
|
|
|
const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ |
|
|
|
const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ |
|
|
|
\ |
|
|
|
assign(dst[0 * dstep], e0 + o0); \ |
|
|
|
assign(dst[1 * dstep], e1 + o1); \ |
|
|
|
assign(dst[2 * dstep], e1 - o1); \ |
|
|
|
assign(dst[3 * dstep], e0 - o0); \ |
|
|
|
#define TR_4(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ |
|
|
|
const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ |
|
|
|
const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ |
|
|
|
const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ |
|
|
|
\ |
|
|
|
assign(dst[0 * dstep], e0 + o0); \ |
|
|
|
assign(dst[1 * dstep], e1 + o1); \ |
|
|
|
assign(dst[2 * dstep], e1 - o1); \ |
|
|
|
assign(dst[3 * dstep], e0 - o0); \ |
|
|
|
} while (0) |
|
|
|
|
|
|
|
#define TR_8(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_8[4]; \ |
|
|
|
int o_8[4] = { 0 }; \ |
|
|
|
for (i = 0; i < 4; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_8[i] += transform[4 * j][i] * src[j * sstep]; \ |
|
|
|
TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 4; i++) { \ |
|
|
|
assign(dst[i * dstep], e_8[i] + o_8[i]); \ |
|
|
|
assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ |
|
|
|
} \ |
|
|
|
#define TR_8(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_8[4]; \ |
|
|
|
int o_8[4] = { 0 }; \ |
|
|
|
for (i = 0; i < 4; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_8[i] += transform[4 * j][i] * src[j * sstep]; \ |
|
|
|
TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 4; i++) { \ |
|
|
|
assign(dst[i * dstep], e_8[i] + o_8[i]); \ |
|
|
|
assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ |
|
|
|
} \ |
|
|
|
} while (0) |
|
|
|
|
|
|
|
#define TR_16(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_16[8]; \ |
|
|
|
int o_16[8] = { 0 }; \ |
|
|
|
for (i = 0; i < 8; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_16[i] += transform[2 * j][i] * src[j * sstep]; \ |
|
|
|
TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 8; i++) { \ |
|
|
|
assign(dst[i * dstep], e_16[i] + o_16[i]); \ |
|
|
|
assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ |
|
|
|
} \ |
|
|
|
#define TR_16(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_16[8]; \ |
|
|
|
int o_16[8] = { 0 }; \ |
|
|
|
for (i = 0; i < 8; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_16[i] += transform[2 * j][i] * src[j * sstep]; \ |
|
|
|
TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 8; i++) { \ |
|
|
|
assign(dst[i * dstep], e_16[i] + o_16[i]); \ |
|
|
|
assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ |
|
|
|
} \ |
|
|
|
} while (0) |
|
|
|
|
|
|
|
#define TR_32(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_32[16]; \ |
|
|
|
int o_32[16] = { 0 }; \ |
|
|
|
for (i = 0; i < 16; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_32[i] += transform[j][i] * src[j * sstep]; \ |
|
|
|
TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 16; i++) { \ |
|
|
|
assign(dst[i * dstep], e_32[i] + o_32[i]); \ |
|
|
|
assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ |
|
|
|
} \ |
|
|
|
#define TR_32(dst, src, dstep, sstep, assign, end) \ |
|
|
|
do { \ |
|
|
|
int i, j; \ |
|
|
|
int e_32[16]; \ |
|
|
|
int o_32[16] = { 0 }; \ |
|
|
|
for (i = 0; i < 16; i++) \ |
|
|
|
for (j = 1; j < end; j += 2) \ |
|
|
|
o_32[i] += transform[j][i] * src[j * sstep]; \ |
|
|
|
TR_16(e_32, src, 1, 2 * sstep, SET, end/2); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < 16; i++) { \ |
|
|
|
assign(dst[i * dstep], e_32[i] + o_32[i]); \ |
|
|
|
assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ |
|
|
|
} \ |
|
|
|
} while (0) |
|
|
|
|
|
|
|
#define IDCT_VAR4(H) \ |
|
|
|
#define IDCT_VAR4(H) \ |
|
|
|
int limit2 = FFMIN(col_limit + 4, H) |
|
|
|
#define IDCT_VAR8(H) \ |
|
|
|
int limit = FFMIN(col_limit, H); \ |
|
|
|
int limit2 = FFMIN(col_limit + 4, H) |
|
|
|
#define IDCT_VAR8(H) \ |
|
|
|
int limit = FFMIN(col_limit, H); \ |
|
|
|
int limit2 = FFMIN(col_limit + 4, H) |
|
|
|
#define IDCT_VAR16(H) IDCT_VAR8(H) |
|
|
|
#define IDCT_VAR32(H) IDCT_VAR8(H) |
|
|
|
|
|
|
|
#define IDCT(H) \ |
|
|
|
static void FUNC(idct_##H ##x ##H )( \ |
|
|
|
int16_t *coeffs, int col_limit) { \ |
|
|
|
int i; \ |
|
|
|
int shift = 7; \ |
|
|
|
int add = 1 << (shift - 1); \ |
|
|
|
int16_t *src = coeffs; \ |
|
|
|
IDCT_VAR ##H(H); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < H; i++) { \ |
|
|
|
TR_ ## H(src, src, H, H, SCALE, limit2); \ |
|
|
|
if (limit2 < H && i%4 == 0 && !!i) \ |
|
|
|
limit2 -= 4; \ |
|
|
|
src++; \ |
|
|
|
} \ |
|
|
|
\ |
|
|
|
shift = 20 - BIT_DEPTH; \ |
|
|
|
add = 1 << (shift - 1); \ |
|
|
|
for (i = 0; i < H; i++) { \ |
|
|
|
TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ |
|
|
|
coeffs += H; \ |
|
|
|
} \ |
|
|
|
#define IDCT(H) \ |
|
|
|
static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ |
|
|
|
int col_limit) { \ |
|
|
|
int i; \ |
|
|
|
int shift = 7; \ |
|
|
|
int add = 1 << (shift - 1); \ |
|
|
|
int16_t *src = coeffs; \ |
|
|
|
IDCT_VAR ##H(H); \ |
|
|
|
\ |
|
|
|
for (i = 0; i < H; i++) { \ |
|
|
|
TR_ ## H(src, src, H, H, SCALE, limit2); \ |
|
|
|
if (limit2 < H && i%4 == 0 && !!i) \ |
|
|
|
limit2 -= 4; \ |
|
|
|
src++; \ |
|
|
|
} \ |
|
|
|
\ |
|
|
|
shift = 20 - BIT_DEPTH; \ |
|
|
|
add = 1 << (shift - 1); \ |
|
|
|
for (i = 0; i < H; i++) { \ |
|
|
|
TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ |
|
|
|
coeffs += H; \ |
|
|
|
} \ |
|
|
|
} |
|
|
|
|
|
|
|
#define IDCT_DC(H) \ |
|
|
|