omse goes from 0.03060703 (which fails for dct-test) to 0.01663750. This also actually improve the error of decoding the sample generated by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which goes (when resampled to yuv422p) from: stddev: 0.06 PSNR: 72.28 MAXDIFF: 1 to identical. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>tags/n3.0
@@ -36,6 +36,11 @@ | |||||
#define BIT_DEPTH 10 | #define BIT_DEPTH 10 | ||||
#include "simple_idct_template.c" | #include "simple_idct_template.c" | ||||
#define EXTRA_SHIFT 2 | |||||
#include "simple_idct_template.c" | |||||
#undef EXTRA_SHIFT | |||||
#undef BIT_DEPTH | #undef BIT_DEPTH | ||||
#define BIT_DEPTH 12 | #define BIT_DEPTH 12 | ||||
@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat) | |||||
block[i] *= qmat[i]; | block[i] *= qmat[i]; | ||||
for (i = 0; i < 8; i++) | for (i = 0; i < 8; i++) | ||||
idctRowCondDC_10(block + i*8, 2); | |||||
idctRowCondDC_extrashift_10(block + i*8, 2); | |||||
for (i = 0; i < 8; i++) { | for (i = 0; i < 8; i++) { | ||||
block[i] += 8192; | block[i] += 8192; | ||||
idctSparseCol_10(block + i); | |||||
idctSparseCol_extrashift_10(block + i); | |||||
} | } | ||||
} | } |
@@ -66,19 +66,26 @@ | |||||
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12 | #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 | ||||
#if BIT_DEPTH == 10 | |||||
#define W1 (22725*4) // 90901 | |||||
#define W2 (21407*4) // 85627 | |||||
#define W3 (19265*4) // 77062 | |||||
#define W4 (16384*4) // 65535 | |||||
#define W5 (12873*4) // 51491 | |||||
#define W6 ( 8867*4) // 35468 | |||||
#define W7 ( 4520*4) // 18081 | |||||
#define ROW_SHIFT 15 | |||||
#define COL_SHIFT 20 | |||||
#define DC_SHIFT 1 | |||||
#else | |||||
# if BIT_DEPTH == 10 | |||||
#define W1 22725 // 90901 | |||||
#define W2 21407 // 85627 | |||||
#define W3 19265 // 77062 | |||||
#define W4 16384 // 65535 | |||||
#define W5 12873 // 51491 | |||||
#define W6 8867 // 35468 | |||||
#define W7 4520 // 18081 | |||||
# ifdef EXTRA_SHIFT | |||||
#define ROW_SHIFT 13 | |||||
#define COL_SHIFT 18 | |||||
#define DC_SHIFT 1 | |||||
# else | |||||
#define ROW_SHIFT 12 | |||||
#define COL_SHIFT 19 | |||||
#define DC_SHIFT 2 | |||||
# endif | |||||
# else | |||||
#define W1 45451 | #define W1 45451 | ||||
#define W2 42813 | #define W2 42813 | ||||
#define W3 38531 | #define W3 38531 | ||||
@@ -90,7 +97,7 @@ | |||||
#define ROW_SHIFT 16 | #define ROW_SHIFT 16 | ||||
#define COL_SHIFT 17 | #define COL_SHIFT 17 | ||||
#define DC_SHIFT -1 | #define DC_SHIFT -1 | ||||
#endif | |||||
# endif | |||||
#define MUL(a, b) ((a) * (b)) | #define MUL(a, b) ((a) * (b)) | ||||
#define MAC(a, b, c) ((a) += (b) * (c)) | #define MAC(a, b, c) ((a) += (b) * (c)) | ||||
@@ -101,7 +108,11 @@ | |||||
#endif | #endif | ||||
#ifdef EXTRA_SHIFT | |||||
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) | |||||
#else | |||||
static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) | static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) | ||||
#endif | |||||
{ | { | ||||
int a0, a1, a2, a3, b0, b1, b2, b3; | int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) | |||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
#ifdef EXTRA_SHIFT | |||||
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) | |||||
#else | |||||
static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, | ||||
int16_t *col) | int16_t *col) | ||||
{ | { | ||||
@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, | |||||
} | } | ||||
static inline void FUNC(idctSparseCol)(int16_t *col) | static inline void FUNC(idctSparseCol)(int16_t *col) | ||||
#endif | |||||
{ | { | ||||
int a0, a1, a2, a3, b0, b1, b2, b3; | int a0, a1, a2, a3, b0, b1, b2, b3; | ||||
@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col) | |||||
col[56] = ((a0 - b0) >> COL_SHIFT); | col[56] = ((a0 - b0) >> COL_SHIFT); | ||||
} | } | ||||
#ifndef EXTRA_SHIFT | |||||
void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) | ||||
{ | { | ||||
pixel *dest = (pixel *)dest_; | pixel *dest = (pixel *)dest_; | ||||
@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block) | |||||
for (i = 0; i < 8; i++) | for (i = 0; i < 8; i++) | ||||
FUNC(idctSparseCol)(block + i); | FUNC(idctSparseCol)(block + i); | ||||
} | } | ||||
#endif |
@@ -1,2 +1,2 @@ | |||||
#tb 0: 1/24 | #tb 0: 1/24 | ||||
0, 0, 0, 1, 9665280, 0x238a023e | |||||
0, 0, 0, 1, 9665280, 0x19ef4057 |
@@ -1,4 +1,4 @@ | |||||
f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd | f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd | ||||
2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd | 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd | ||||
3cc84f9e8d2e704475b410de27dd9951 *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo | |||||
87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo | |||||
stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320 | stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320 |
@@ -1,4 +1,4 @@ | |||||
e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd | e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd | ||||
2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd | 2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd | ||||
a98c4b69d4d036089a455e147d6922a7 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo | |||||
1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo | |||||
stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320 | stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320 |
@@ -1,4 +1,4 @@ | |||||
e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd | e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd | ||||
2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd | 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd | ||||
2b497215c57558910a605ff8c78430d9 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo | |||||
0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo | |||||
stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320 | stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320 |