Browse Source

avcodec/lagarith: switch to planar rgb

Speed goes from 363 fps to 428 fps for 640x480 video.
tags/n4.1
Paul B Mahol 6 years ago
parent
commit
b2ffecbd0c
8 changed files with 118 additions and 164 deletions
  1. +31
    -77
      libavcodec/lagarith.c
  2. +25
    -25
      tests/ref/fate/lagarith-red
  3. +4
    -4
      tests/ref/fate/lagarith-rgb24
  4. +2
    -2
      tests/ref/fate/lagarith-ticket4119
  5. +50
    -50
      tests/ref/fate/lagarith-ticket4119-cfr
  6. +2
    -2
      tests/ref/fate/lagarith-ticket4119-drop
  7. +2
    -2
      tests/ref/fate/lagarith-ticket4119-pass
  8. +2
    -2
      tests/ref/fate/lagarith-ticket4119-vfr

+ 31
- 77
libavcodec/lagarith.c View File

@@ -53,9 +53,6 @@ typedef struct LagarithContext {
LLVidDSPContext llviddsp; LLVidDSPContext llviddsp;
int zeros; /**< number of consecutive zero bytes encountered */ int zeros; /**< number of consecutive zero bytes encountered */
int zeros_rem; /**< number of zero bytes remaining to output */ int zeros_rem; /**< number of zero bytes remaining to output */
uint8_t *rgb_planes;
int rgb_planes_allocated;
int rgb_stride;
} LagarithContext; } LagarithContext;


/** /**
@@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
uint8_t frametype; uint8_t frametype;
uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9; uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
uint32_t offs[4]; uint32_t offs[4];
uint8_t *srcs[4], *dst;
uint8_t *srcs[4];
int i, j, planes = 3; int i, j, planes = 3;
int ret; int ret;


@@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx,


switch (frametype) { switch (frametype) {
case FRAME_SOLID_RGBA: case FRAME_SOLID_RGBA:
avctx->pix_fmt = AV_PIX_FMT_RGB32;
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
case FRAME_SOLID_GRAY: case FRAME_SOLID_GRAY:
if (frametype == FRAME_SOLID_GRAY) if (frametype == FRAME_SOLID_GRAY)
if (avctx->bits_per_coded_sample == 24) { if (avctx->bits_per_coded_sample == 24) {
avctx->pix_fmt = AV_PIX_FMT_RGB24;
avctx->pix_fmt = AV_PIX_FMT_GBRP;
} else { } else {
avctx->pix_fmt = AV_PIX_FMT_0RGB32;
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
planes = 4; planes = 4;
} }


if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
return ret; return ret;


dst = p->data[0];
if (frametype == FRAME_SOLID_RGBA) { if (frametype == FRAME_SOLID_RGBA) {
int qwidth = avctx->width>>2;
uint64_t c = ((uint64_t)offset_gu << 32) | offset_gu;
for (j = 0; j < avctx->height; j++) {
for (i = 0; i < qwidth; i++) {
AV_WN64(dst + i * 16 , c);
AV_WN64(dst + i * 16 + 8, c);
for (i = 0; i < avctx->height; i++) {
memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width);
} }
for (i = 4*qwidth; i < avctx->width; i++)
AV_WN32(dst + i * 4, offset_gu);
dst += p->linesize[0];
}
} else { } else {
for (j = 0; j < avctx->height; j++) {
memset(dst, buf[1], avctx->width * planes);
dst += p->linesize[0];
for (i = 0; i < avctx->height; i++) {
for (j = 0; j < planes; j++)
memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width);
} }
} }
break; break;
case FRAME_SOLID_COLOR: case FRAME_SOLID_COLOR:
if (avctx->bits_per_coded_sample == 24) { if (avctx->bits_per_coded_sample == 24) {
avctx->pix_fmt = AV_PIX_FMT_RGB24;
avctx->pix_fmt = AV_PIX_FMT_GBRP;
} else { } else {
avctx->pix_fmt = AV_PIX_FMT_RGB32;
offset_gu |= 0xFFU << 24;
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
} }


if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
return ret; return ret;


dst = p->data[0];
for (j = 0; j < avctx->height; j++) {
for (i = 0; i < avctx->width; i++)
if (avctx->bits_per_coded_sample == 24) {
AV_WB24(dst + i * 3, offset_gu);
} else {
AV_WN32(dst + i * 4, offset_gu);
}
dst += p->linesize[0];
for (i = 0; i < avctx->height; i++) {
memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
if (avctx->pix_fmt == AV_PIX_FMT_GBRAP)
memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width);
} }
break; break;
case FRAME_ARITH_RGBA: case FRAME_ARITH_RGBA:
avctx->pix_fmt = AV_PIX_FMT_RGB32;
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
planes = 4; planes = 4;
offset_ry += 4; offset_ry += 4;
offs[3] = AV_RL32(buf + 9); offs[3] = AV_RL32(buf + 9);
case FRAME_ARITH_RGB24: case FRAME_ARITH_RGB24:
case FRAME_U_RGB24: case FRAME_U_RGB24:
if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24) if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
avctx->pix_fmt = AV_PIX_FMT_RGB24;
avctx->pix_fmt = AV_PIX_FMT_GBRP;


if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
return ret; return ret;
@@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
offs[1] = offset_gu; offs[1] = offset_gu;
offs[2] = offset_ry; offs[2] = offset_ry;


l->rgb_stride = FFALIGN(avctx->width, 16);
av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
l->rgb_stride * avctx->height * planes + 1);
if (!l->rgb_planes) {
av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
return AVERROR(ENOMEM);
}
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i];
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
if (buf_size <= offs[i]) { if (buf_size <= offs[i]) {
av_log(avctx, AV_LOG_ERROR, av_log(avctx, AV_LOG_ERROR,
@@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx,
for (i = 0; i < planes; i++) for (i = 0; i < planes; i++)
lag_decode_arith_plane(l, srcs[i], lag_decode_arith_plane(l, srcs[i],
avctx->width, avctx->height, avctx->width, avctx->height,
-l->rgb_stride, buf + offs[i],
-p->linesize[i], buf + offs[i],
buf_size - offs[i]); buf_size - offs[i]);
dst = p->data[0];
for (i = 0; i < planes; i++)
srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
for (j = 0; j < avctx->height; j++) {
for (i = 0; i < avctx->width; i++) {
uint8_t r, g, b, a;
r = srcs[0][i];
g = srcs[1][i];
b = srcs[2][i];
r += g;
b += g;
if (frametype == FRAME_ARITH_RGBA) {
a = srcs[3][i];
AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
} else {
dst[i * 3 + 0] = r;
dst[i * 3 + 1] = g;
dst[i * 3 + 2] = b;
}
}
dst += p->linesize[0];
for (i = 0; i < planes; i++)
srcs[i] += l->rgb_stride;
for (i = 0; i < avctx->height; i++) {
l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width);
l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width);
} }
FFSWAP(uint8_t*, p->data[0], p->data[1]);
FFSWAP(int, p->linesize[0], p->linesize[1]);
FFSWAP(uint8_t*, p->data[2], p->data[1]);
FFSWAP(int, p->linesize[2], p->linesize[1]);
break; break;
case FRAME_ARITH_YUY2: case FRAME_ARITH_YUY2:
avctx->pix_fmt = AV_PIX_FMT_YUV422P; avctx->pix_fmt = AV_PIX_FMT_YUV422P;
@@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
} }
#endif #endif


static av_cold int lag_decode_end(AVCodecContext *avctx)
{
LagarithContext *l = avctx->priv_data;

av_freep(&l->rgb_planes);

return 0;
}

AVCodec ff_lagarith_decoder = { AVCodec ff_lagarith_decoder = {
.name = "lagarith", .name = "lagarith",
.long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"), .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
@@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = {
.priv_data_size = sizeof(LagarithContext), .priv_data_size = sizeof(LagarithContext),
.init = lag_decode_init, .init = lag_decode_init,
.init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy), .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
.close = lag_decode_end,
.decode = lag_decode_frame, .decode = lag_decode_frame,
.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
}; };

+ 25
- 25
tests/ref/fate/lagarith-red View File

@@ -3,28 +3,28 @@
#codec_id 0: rawvideo #codec_id 0: rawvideo
#dimensions 0: 320x240 #dimensions 0: 320x240
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 230400, 0x67dfe576
0, 1, 1, 1, 230400, 0x67dfe576
0, 2, 2, 1, 230400, 0x67dfe576
0, 3, 3, 1, 230400, 0x67dfe576
0, 4, 4, 1, 230400, 0x67dfe576
0, 5, 5, 1, 230400, 0x67dfe576
0, 6, 6, 1, 230400, 0x67dfe576
0, 7, 7, 1, 230400, 0x67dfe576
0, 8, 8, 1, 230400, 0x67dfe576
0, 9, 9, 1, 230400, 0x67dfe576
0, 10, 10, 1, 230400, 0x67dfe576
0, 11, 11, 1, 230400, 0x67dfe576
0, 12, 12, 1, 230400, 0x67dfe576
0, 13, 13, 1, 230400, 0x67dfe576
0, 14, 14, 1, 230400, 0x67dfe576
0, 15, 15, 1, 230400, 0x67dfe576
0, 16, 16, 1, 230400, 0x67dfe576
0, 17, 17, 1, 230400, 0x67dfe576
0, 18, 18, 1, 230400, 0x67dfe576
0, 19, 19, 1, 230400, 0x67dfe576
0, 20, 20, 1, 230400, 0x67dfe576
0, 21, 21, 1, 230400, 0x67dfe576
0, 22, 22, 1, 230400, 0x67dfe576
0, 23, 23, 1, 230400, 0x67dfe576
0, 24, 24, 1, 230400, 0x67dfe576
0, 0, 0, 1, 230400, 0x77f0e576
0, 1, 1, 1, 230400, 0x77f0e576
0, 2, 2, 1, 230400, 0x77f0e576
0, 3, 3, 1, 230400, 0x77f0e576
0, 4, 4, 1, 230400, 0x77f0e576
0, 5, 5, 1, 230400, 0x77f0e576
0, 6, 6, 1, 230400, 0x77f0e576
0, 7, 7, 1, 230400, 0x77f0e576
0, 8, 8, 1, 230400, 0x77f0e576
0, 9, 9, 1, 230400, 0x77f0e576
0, 10, 10, 1, 230400, 0x77f0e576
0, 11, 11, 1, 230400, 0x77f0e576
0, 12, 12, 1, 230400, 0x77f0e576
0, 13, 13, 1, 230400, 0x77f0e576
0, 14, 14, 1, 230400, 0x77f0e576
0, 15, 15, 1, 230400, 0x77f0e576
0, 16, 16, 1, 230400, 0x77f0e576
0, 17, 17, 1, 230400, 0x77f0e576
0, 18, 18, 1, 230400, 0x77f0e576
0, 19, 19, 1, 230400, 0x77f0e576
0, 20, 20, 1, 230400, 0x77f0e576
0, 21, 21, 1, 230400, 0x77f0e576
0, 22, 22, 1, 230400, 0x77f0e576
0, 23, 23, 1, 230400, 0x77f0e576
0, 24, 24, 1, 230400, 0x77f0e576

+ 4
- 4
tests/ref/fate/lagarith-rgb24 View File

@@ -3,7 +3,7 @@
#codec_id 0: rawvideo #codec_id 0: rawvideo
#dimensions 0: 480x256 #dimensions 0: 480x256
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 368640, 0x26f74db2
0, 1, 1, 1, 368640, 0x63b29ea4
0, 2, 2, 1, 368640, 0x19467f03
0, 3, 3, 1, 368640, 0x5fdc3575
0, 0, 0, 1, 368640, 0x18364db2
0, 1, 1, 1, 368640, 0x60e79ea4
0, 2, 2, 1, 368640, 0xb28a7f03
0, 3, 3, 1, 368640, 0x2ed83575

+ 2
- 2
tests/ref/fate/lagarith-ticket4119 View File

@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24
0, 50, 50, 1, 691200, 0x906d474c
0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x1fa0474c

+ 50
- 50
tests/ref/fate/lagarith-ticket4119-cfr View File

@@ -27,53 +27,53 @@
0, 21, 21, 1, 691200, 0x00000000 0, 21, 21, 1, 691200, 0x00000000
0, 22, 22, 1, 691200, 0x00000000 0, 22, 22, 1, 691200, 0x00000000
0, 23, 23, 1, 691200, 0x00000000 0, 23, 23, 1, 691200, 0x00000000
0, 24, 24, 1, 691200, 0xc88a6f24
0, 25, 25, 1, 691200, 0xc88a6f24
0, 26, 26, 1, 691200, 0xc88a6f24
0, 27, 27, 1, 691200, 0xc88a6f24
0, 28, 28, 1, 691200, 0xc88a6f24
0, 29, 29, 1, 691200, 0xc88a6f24
0, 30, 30, 1, 691200, 0xc88a6f24
0, 31, 31, 1, 691200, 0xc88a6f24
0, 32, 32, 1, 691200, 0xc88a6f24
0, 33, 33, 1, 691200, 0xc88a6f24
0, 34, 34, 1, 691200, 0xc88a6f24
0, 35, 35, 1, 691200, 0xc88a6f24
0, 36, 36, 1, 691200, 0xc88a6f24
0, 37, 37, 1, 691200, 0xc88a6f24
0, 38, 38, 1, 691200, 0xc88a6f24
0, 39, 39, 1, 691200, 0xc88a6f24
0, 40, 40, 1, 691200, 0xc88a6f24
0, 41, 41, 1, 691200, 0xc88a6f24
0, 42, 42, 1, 691200, 0xc88a6f24
0, 43, 43, 1, 691200, 0xc88a6f24
0, 44, 44, 1, 691200, 0xc88a6f24
0, 45, 45, 1, 691200, 0xc88a6f24
0, 46, 46, 1, 691200, 0xc88a6f24
0, 47, 47, 1, 691200, 0xc88a6f24
0, 48, 48, 1, 691200, 0xc88a6f24
0, 49, 49, 1, 691200, 0x906d474c
0, 50, 50, 1, 691200, 0x906d474c
0, 51, 51, 1, 691200, 0x906d474c
0, 52, 52, 1, 691200, 0x906d474c
0, 53, 53, 1, 691200, 0x906d474c
0, 54, 54, 1, 691200, 0x906d474c
0, 55, 55, 1, 691200, 0x906d474c
0, 56, 56, 1, 691200, 0x906d474c
0, 57, 57, 1, 691200, 0x906d474c
0, 58, 58, 1, 691200, 0x906d474c
0, 59, 59, 1, 691200, 0x906d474c
0, 60, 60, 1, 691200, 0x906d474c
0, 61, 61, 1, 691200, 0x906d474c
0, 62, 62, 1, 691200, 0x906d474c
0, 63, 63, 1, 691200, 0x906d474c
0, 64, 64, 1, 691200, 0x906d474c
0, 65, 65, 1, 691200, 0x906d474c
0, 66, 66, 1, 691200, 0x906d474c
0, 67, 67, 1, 691200, 0x906d474c
0, 68, 68, 1, 691200, 0x906d474c
0, 69, 69, 1, 691200, 0x906d474c
0, 70, 70, 1, 691200, 0x906d474c
0, 71, 71, 1, 691200, 0x906d474c
0, 72, 72, 1, 691200, 0x906d474c
0, 73, 73, 1, 691200, 0x906d474c
0, 24, 24, 1, 691200, 0x1c4a6f24
0, 25, 25, 1, 691200, 0x1c4a6f24
0, 26, 26, 1, 691200, 0x1c4a6f24
0, 27, 27, 1, 691200, 0x1c4a6f24
0, 28, 28, 1, 691200, 0x1c4a6f24
0, 29, 29, 1, 691200, 0x1c4a6f24
0, 30, 30, 1, 691200, 0x1c4a6f24
0, 31, 31, 1, 691200, 0x1c4a6f24
0, 32, 32, 1, 691200, 0x1c4a6f24
0, 33, 33, 1, 691200, 0x1c4a6f24
0, 34, 34, 1, 691200, 0x1c4a6f24
0, 35, 35, 1, 691200, 0x1c4a6f24
0, 36, 36, 1, 691200, 0x1c4a6f24
0, 37, 37, 1, 691200, 0x1c4a6f24
0, 38, 38, 1, 691200, 0x1c4a6f24
0, 39, 39, 1, 691200, 0x1c4a6f24
0, 40, 40, 1, 691200, 0x1c4a6f24
0, 41, 41, 1, 691200, 0x1c4a6f24
0, 42, 42, 1, 691200, 0x1c4a6f24
0, 43, 43, 1, 691200, 0x1c4a6f24
0, 44, 44, 1, 691200, 0x1c4a6f24
0, 45, 45, 1, 691200, 0x1c4a6f24
0, 46, 46, 1, 691200, 0x1c4a6f24
0, 47, 47, 1, 691200, 0x1c4a6f24
0, 48, 48, 1, 691200, 0x1c4a6f24
0, 49, 49, 1, 691200, 0x1fa0474c
0, 50, 50, 1, 691200, 0x1fa0474c
0, 51, 51, 1, 691200, 0x1fa0474c
0, 52, 52, 1, 691200, 0x1fa0474c
0, 53, 53, 1, 691200, 0x1fa0474c
0, 54, 54, 1, 691200, 0x1fa0474c
0, 55, 55, 1, 691200, 0x1fa0474c
0, 56, 56, 1, 691200, 0x1fa0474c
0, 57, 57, 1, 691200, 0x1fa0474c
0, 58, 58, 1, 691200, 0x1fa0474c
0, 59, 59, 1, 691200, 0x1fa0474c
0, 60, 60, 1, 691200, 0x1fa0474c
0, 61, 61, 1, 691200, 0x1fa0474c
0, 62, 62, 1, 691200, 0x1fa0474c
0, 63, 63, 1, 691200, 0x1fa0474c
0, 64, 64, 1, 691200, 0x1fa0474c
0, 65, 65, 1, 691200, 0x1fa0474c
0, 66, 66, 1, 691200, 0x1fa0474c
0, 67, 67, 1, 691200, 0x1fa0474c
0, 68, 68, 1, 691200, 0x1fa0474c
0, 69, 69, 1, 691200, 0x1fa0474c
0, 70, 70, 1, 691200, 0x1fa0474c
0, 71, 71, 1, 691200, 0x1fa0474c
0, 72, 72, 1, 691200, 0x1fa0474c
0, 73, 73, 1, 691200, 0x1fa0474c

+ 2
- 2
tests/ref/fate/lagarith-ticket4119-drop View File

@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 1, 1, 1, 691200, 0xc88a6f24
0, 2, 2, 1, 691200, 0x906d474c
0, 1, 1, 1, 691200, 0x1c4a6f24
0, 2, 2, 1, 691200, 0x1fa0474c

+ 2
- 2
tests/ref/fate/lagarith-ticket4119-pass View File

@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24
0, 50, 50, 1, 691200, 0x906d474c
0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x1fa0474c

+ 2
- 2
tests/ref/fate/lagarith-ticket4119-vfr View File

@@ -4,5 +4,5 @@
#dimensions 0: 640x360 #dimensions 0: 640x360
#sar 0: 0/1 #sar 0: 0/1
0, 0, 0, 1, 691200, 0x00000000 0, 0, 0, 1, 691200, 0x00000000
0, 25, 25, 1, 691200, 0xc88a6f24
0, 50, 50, 1, 691200, 0x906d474c
0, 25, 25, 1, 691200, 0x1c4a6f24
0, 50, 50, 1, 691200, 0x1fa0474c

Loading…
Cancel
Save