Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.3
@@ -2115,6 +2115,7 @@ CONFIG_EXTRA=" | |||
libx262 | |||
llauddsp | |||
llviddsp | |||
llvidencdsp | |||
lpc | |||
lzf | |||
me_cmp | |||
@@ -2366,7 +2367,7 @@ amv_decoder_select="sp5x_decoder exif" | |||
amv_encoder_select="aandcttables jpegtables mpegvideoenc" | |||
ape_decoder_select="bswapdsp llauddsp" | |||
apng_decoder_select="zlib" | |||
apng_encoder_select="huffyuvencdsp zlib" | |||
apng_encoder_select="llvidencdsp zlib" | |||
asv1_decoder_select="blockdsp bswapdsp idctdsp" | |||
asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||
asv2_decoder_select="blockdsp bswapdsp idctdsp" | |||
@@ -2430,7 +2431,7 @@ hap_encoder_deps="libsnappy" | |||
hap_encoder_select="texturedspenc" | |||
hevc_decoder_select="bswapdsp cabac golomb videodsp" | |||
huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp" | |||
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp" | |||
huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp" | |||
iac_decoder_select="imc_decoder" | |||
imc_decoder_select="bswapdsp fft mdct sinewin" | |||
indeo3_decoder_select="hpeldsp" | |||
@@ -2491,7 +2492,7 @@ on2avc_decoder_select="mdct" | |||
opus_decoder_deps="swresample" | |||
opus_decoder_select="imdct15" | |||
png_decoder_select="zlib" | |||
png_encoder_select="huffyuvencdsp zlib" | |||
png_encoder_select="llvidencdsp zlib" | |||
prores_decoder_select="blockdsp idctdsp" | |||
prores_encoder_select="fdctdsp" | |||
qcelp_decoder_select="lsp" | |||
@@ -2534,7 +2535,7 @@ tscc_decoder_select="zlib" | |||
twinvq_decoder_select="mdct lsp sinewin" | |||
txd_decoder_select="texturedsp" | |||
utvideo_decoder_select="bswapdsp llviddsp" | |||
utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" | |||
utvideo_encoder_select="bswapdsp huffman llvidencdsp" | |||
vble_decoder_select="llviddsp" | |||
vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp" | |||
vc1_qsv_decoder_deps="libmfx" | |||
@@ -91,6 +91,7 @@ OBJS-$(CONFIG_JPEGTABLES) += jpegtables.o | |||
OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | |||
OBJS-$(CONFIG_LLAUDDSP) += lossless_audiodsp.o | |||
OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o | |||
OBJS-$(CONFIG_LLVIDENCDSP) += lossless_videoencdsp.o | |||
OBJS-$(CONFIG_LPC) += lpc.o | |||
OBJS-$(CONFIG_LSP) += lsp.o | |||
OBJS-$(CONFIG_LZF) += lzf.o | |||
@@ -38,6 +38,7 @@ | |||
#include "huffyuvencdsp.h" | |||
#include "put_bits.h" | |||
#include "lossless_videodsp.h" | |||
#include "lossless_videoencdsp.h" | |||
#define VLC_BITS 12 | |||
@@ -89,6 +90,7 @@ typedef struct HYuvContext { | |||
HuffYUVDSPContext hdsp; | |||
HuffYUVEncDSPContext hencdsp; | |||
LLVidDSPContext llviddsp; | |||
LLVidEncDSPContext llvidencdsp; | |||
int non_determ; // non-deterministic, multi-threaded encoder allowed | |||
} HYuvContext; | |||
@@ -33,6 +33,7 @@ | |||
#include "huffman.h" | |||
#include "huffyuvencdsp.h" | |||
#include "internal.h" | |||
#include "lossless_videoencdsp.h" | |||
#include "put_bits.h" | |||
#include "libavutil/opt.h" | |||
#include "libavutil/pixdesc.h" | |||
@@ -41,7 +42,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst, | |||
const uint8_t *src0, const uint8_t *src1, int w) | |||
{ | |||
if (s->bps <= 8) { | |||
s->hencdsp.diff_bytes(dst, src0, src1, w); | |||
s->llvidencdsp.diff_bytes(dst, src0, src1, w); | |||
} else { | |||
s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); | |||
} | |||
@@ -65,7 +66,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, | |||
dst[i] = temp - left; | |||
left = temp; | |||
} | |||
s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | |||
s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | |||
return src[w-1]; | |||
} | |||
} else { | |||
@@ -117,7 +118,7 @@ static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, | |||
a = at; | |||
} | |||
s->hencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | |||
s->llvidencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | |||
*red = src[(w - 1) * 4 + R]; | |||
*green = src[(w - 1) * 4 + G]; | |||
@@ -146,7 +147,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | |||
b = bt; | |||
} | |||
s->hencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | |||
s->llvidencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | |||
*red = src[(w - 1) * 3 + 0]; | |||
*green = src[(w - 1) * 3 + 1]; | |||
@@ -156,7 +157,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | |||
static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top) | |||
{ | |||
if (s->bps <= 8) { | |||
s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top); | |||
s->llvidencdsp.sub_median_pred(dst, src1, src2, w , left, left_top); | |||
} else { | |||
s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top); | |||
} | |||
@@ -218,6 +219,7 @@ static av_cold int encode_init(AVCodecContext *avctx) | |||
ff_huffyuv_common_init(avctx); | |||
ff_huffyuvencdsp_init(&s->hencdsp, avctx); | |||
ff_llvidencdsp_init(&s->llvidencdsp); | |||
avctx->extradata = av_mallocz(3*MAX_N + 4); | |||
if (s->flags&AV_CODEC_FLAG_PASS1) { | |||
@@ -823,9 +825,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
lefttopy = p->data[0][3]; | |||
lefttopu = p->data[1][1]; | |||
lefttopv = p->data[2][1]; | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | |||
s->llvidencdsp.sub_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); | |||
s->llvidencdsp.sub_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | |||
s->llvidencdsp.sub_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | |||
encode_422_bitstream(s, 0, width - 4); | |||
y++; cy++; | |||
@@ -835,7 +837,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
if (s->bitstream_bpp == 12) { | |||
while (2 * cy > y) { | |||
ydst = p->data[0] + p->linesize[0] * y; | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
encode_gray_bitstream(s, width); | |||
y++; | |||
} | |||
@@ -845,9 +847,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
udst = p->data[1] + p->linesize[1] * cy; | |||
vdst = p->data[2] + p->linesize[2] * cy; | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
s->hencdsp.sub_hfyu_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
s->llvidencdsp.sub_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
s->llvidencdsp.sub_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
encode_422_bitstream(s, 0, width); | |||
} | |||
@@ -860,7 +862,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
ydst = p->data[0] + p->linesize[0] * y; | |||
if (s->predictor == PLANE && s->interlaced < y) { | |||
s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | |||
} else { | |||
@@ -876,9 +878,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
vdst = p->data[2] + p->linesize[2] * cy; | |||
if (s->predictor == PLANE && s->interlaced < cy) { | |||
s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
s->hencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); | |||
s->hencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | |||
s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
s->llvidencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); | |||
s->llvidencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | |||
lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | |||
leftu = sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); | |||
@@ -911,7 +913,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
for (y = 1; y < s->height; y++) { | |||
uint8_t *dst = data + y*stride; | |||
if (s->predictor == PLANE && s->interlaced < y) { | |||
s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | |||
s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | |||
sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, | |||
&leftr, &leftg, &leftb, &lefta); | |||
} else { | |||
@@ -939,7 +941,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
for (y = 1; y < s->height; y++) { | |||
uint8_t *dst = data + y * stride; | |||
if (s->predictor == PLANE && s->interlaced < y) { | |||
s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | |||
s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | |||
width * 3); | |||
sub_left_prediction_rgb24(s, s->temp[0], s->temp[1], width, | |||
&leftr, &leftg, &leftb); | |||
@@ -21,38 +21,6 @@ | |||
#include "huffyuvencdsp.h" | |||
#include "mathops.h" | |||
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
#define pb_7f (~0UL / 255 * 0x7f) | |||
#define pb_80 (~0UL / 255 * 0x80) | |||
static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | |||
{ | |||
long i; | |||
#if !HAVE_FAST_UNALIGNED | |||
if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | |||
for (i = 0; i + 7 < w; i += 8) { | |||
dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
dst[i + 1] = src1[i + 1] - src2[i + 1]; | |||
dst[i + 2] = src1[i + 2] - src2[i + 2]; | |||
dst[i + 3] = src1[i + 3] - src2[i + 3]; | |||
dst[i + 4] = src1[i + 4] - src2[i + 4]; | |||
dst[i + 5] = src1[i + 5] - src2[i + 5]; | |||
dst[i + 6] = src1[i + 6] - src2[i + 6]; | |||
dst[i + 7] = src1[i + 7] - src2[i + 7]; | |||
} | |||
} else | |||
#endif | |||
for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
long a = *(long *) (src1 + i); | |||
long b = *(long *) (src2 + i); | |||
*(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | |||
((a ^ b ^ pb_80) & pb_80); | |||
} | |||
for (; i < w; i++) | |||
dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
} | |||
static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ | |||
long i; | |||
#if !HAVE_FAST_UNALIGNED | |||
@@ -79,27 +47,6 @@ static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *sr | |||
dst[i] = (src1[i] - src2[i]) & mask; | |||
} | |||
static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top) | |||
{ | |||
int i; | |||
uint8_t l, lt; | |||
l = *left; | |||
lt = *left_top; | |||
for (i = 0; i < w; i++) { | |||
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | |||
lt = src1[i]; | |||
l = src2[i]; | |||
dst[i] = l - pred; | |||
} | |||
*left = l; | |||
*left_top = lt; | |||
} | |||
static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){ | |||
int i; | |||
uint16_t l, lt; | |||
@@ -120,9 +67,7 @@ static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, co | |||
av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
{ | |||
c->diff_bytes = diff_bytes_c; | |||
c->diff_int16 = diff_int16_c; | |||
c->sub_hfyu_median_pred = sub_hfyu_median_pred_c; | |||
c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c; | |||
if (ARCH_X86) | |||
@@ -24,22 +24,11 @@ | |||
#include "avcodec.h" | |||
typedef struct HuffYUVEncDSPContext { | |||
void (*diff_bytes)(uint8_t *dst /* align 16 */, | |||
const uint8_t *src1 /* align 16 */, | |||
const uint8_t *src2 /* align 1 */, | |||
intptr_t w); | |||
void (*diff_int16)(uint16_t *dst /* align 16 */, | |||
const uint16_t *src1 /* align 16 */, | |||
const uint16_t *src2 /* align 1 */, | |||
unsigned mask, int w); | |||
/** | |||
* Subtract HuffYUV's variant of median prediction. | |||
* Note, this might read from src1[-1], src2[-1]. | |||
*/ | |||
void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top); | |||
void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, | |||
const uint16_t *src2, unsigned mask, | |||
int w, int *left, int *left_top); | |||
@@ -0,0 +1,84 @@ | |||
/* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* FFmpeg is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with FFmpeg; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#include "config.h" | |||
#include "libavutil/attributes.h" | |||
#include "lossless_videoencdsp.h" | |||
#include "mathops.h" | |||
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
#define pb_7f (~0UL / 255 * 0x7f) | |||
#define pb_80 (~0UL / 255 * 0x80) | |||
static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | |||
{ | |||
long i; | |||
#if !HAVE_FAST_UNALIGNED | |||
if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | |||
for (i = 0; i + 7 < w; i += 8) { | |||
dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
dst[i + 1] = src1[i + 1] - src2[i + 1]; | |||
dst[i + 2] = src1[i + 2] - src2[i + 2]; | |||
dst[i + 3] = src1[i + 3] - src2[i + 3]; | |||
dst[i + 4] = src1[i + 4] - src2[i + 4]; | |||
dst[i + 5] = src1[i + 5] - src2[i + 5]; | |||
dst[i + 6] = src1[i + 6] - src2[i + 6]; | |||
dst[i + 7] = src1[i + 7] - src2[i + 7]; | |||
} | |||
} else | |||
#endif | |||
for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
long a = *(long *) (src1 + i); | |||
long b = *(long *) (src2 + i); | |||
*(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | |||
((a ^ b ^ pb_80) & pb_80); | |||
} | |||
for (; i < w; i++) | |||
dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
} | |||
static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top) | |||
{ | |||
int i; | |||
uint8_t l, lt; | |||
l = *left; | |||
lt = *left_top; | |||
for (i = 0; i < w; i++) { | |||
const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | |||
lt = src1[i]; | |||
l = src2[i]; | |||
dst[i] = l - pred; | |||
} | |||
*left = l; | |||
*left_top = lt; | |||
} | |||
av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) | |||
{ | |||
c->diff_bytes = diff_bytes_c; | |||
c->sub_median_pred = sub_median_pred_c; | |||
if (ARCH_X86) | |||
ff_llvidencdsp_init_x86(c); | |||
} |
@@ -0,0 +1,41 @@ | |||
/* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* FFmpeg is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with FFmpeg; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#ifndef AVCODEC_LOSSLESS_VIDEOENCDSP_H | |||
#define AVCODEC_LOSSLESS_VIDEOENCDSP_H | |||
#include <stdint.h> | |||
typedef struct LLVidEncDSPContext { | |||
void (*diff_bytes)(uint8_t *dst /* align 16 */, | |||
const uint8_t *src1 /* align 16 */, | |||
const uint8_t *src2 /* align 1 */, | |||
intptr_t w); | |||
/** | |||
* Subtract HuffYUV's variant of median prediction. | |||
* Note, this might read from src1[-1], src2[-1]. | |||
*/ | |||
void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top); | |||
} LLVidEncDSPContext; | |||
void ff_llvidencdsp_init(LLVidEncDSPContext *c); | |||
void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c); | |||
#endif /* AVCODEC_LOSSLESS_VIDEOENCDSP_H */ |
@@ -22,7 +22,7 @@ | |||
#include "avcodec.h" | |||
#include "internal.h" | |||
#include "bytestream.h" | |||
#include "huffyuvencdsp.h" | |||
#include "lossless_videoencdsp.h" | |||
#include "png.h" | |||
#include "apng.h" | |||
@@ -47,7 +47,7 @@ typedef struct APNGFctlChunk { | |||
typedef struct PNGEncContext { | |||
AVClass *class; | |||
HuffYUVEncDSPContext hdsp; | |||
LLVidEncDSPContext llvidencdsp; | |||
uint8_t *bytestream; | |||
uint8_t *bytestream_start; | |||
@@ -159,7 +159,7 @@ static void sub_left_prediction(PNGEncContext *c, uint8_t *dst, const uint8_t *s | |||
for (x = 0; x < unaligned_w; x++) | |||
*dst++ = *src1++ - *src2++; | |||
size -= unaligned_w; | |||
c->hdsp.diff_bytes(dst, src1, src2, size); | |||
c->llvidencdsp.diff_bytes(dst, src1, src2, size); | |||
} | |||
static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | |||
@@ -175,7 +175,7 @@ static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | |||
sub_left_prediction(c, dst, src, bpp, size); | |||
break; | |||
case PNG_FILTER_VALUE_UP: | |||
c->hdsp.diff_bytes(dst, src, top, size); | |||
c->llvidencdsp.diff_bytes(dst, src, top, size); | |||
break; | |||
case PNG_FILTER_VALUE_AVG: | |||
for (i = 0; i < bpp; i++) | |||
@@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS | |||
FF_ENABLE_DEPRECATION_WARNINGS | |||
#endif | |||
ff_huffyuvencdsp_init(&s->hdsp, avctx); | |||
ff_llvidencdsp_init(&s->llvidencdsp); | |||
#if FF_API_PRIVATE_OPT | |||
FF_DISABLE_DEPRECATION_WARNINGS | |||
@@ -30,8 +30,8 @@ | |||
#include "libavutil/common.h" | |||
#include "avcodec.h" | |||
#include "bswapdsp.h" | |||
#include "huffyuvencdsp.h" | |||
#include "lossless_videodsp.h" | |||
#include "lossless_videoencdsp.h" | |||
enum { | |||
PRED_NONE = 0, | |||
@@ -70,8 +70,8 @@ typedef struct UtvideoContext { | |||
const AVClass *class; | |||
AVCodecContext *avctx; | |||
BswapDSPContext bdsp; | |||
HuffYUVEncDSPContext hdsp; | |||
LLVidDSPContext llviddsp; | |||
LLVidEncDSPContext llvidencdsp; | |||
uint32_t frame_info_size, flags, frame_info; | |||
int planes; | |||
@@ -33,7 +33,6 @@ | |||
#include "bswapdsp.h" | |||
#include "bytestream.h" | |||
#include "put_bits.h" | |||
#include "huffyuvencdsp.h" | |||
#include "mathops.h" | |||
#include "utvideo.h" | |||
#include "huffman.h" | |||
@@ -120,7 +119,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) | |||
} | |||
ff_bswapdsp_init(&c->bdsp); | |||
ff_huffyuvencdsp_init(&c->hdsp, avctx); | |||
ff_llvidencdsp_init(&c->llvidencdsp); | |||
#if FF_API_PRIVATE_OPT | |||
FF_DISABLE_DEPRECATION_WARNINGS | |||
@@ -324,7 +323,7 @@ static void median_predict(UtvideoContext *c, uint8_t *src, uint8_t *dst, int st | |||
/* Rest of the coded part uses median prediction */ | |||
for (j = 1; j < height; j++) { | |||
c->hdsp.sub_hfyu_median_pred(dst, src - stride, src, width, &A, &B); | |||
c->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &A, &B); | |||
dst += width; | |||
src += stride; | |||
} | |||
@@ -20,8 +20,9 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | |||
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | |||
OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp_init.o | |||
OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o | |||
OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp_init.o | |||
OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | |||
OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o | |||
OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_init.o | |||
OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o | |||
OBJS-$(CONFIG_LPC) += x86/lpc.o | |||
OBJS-$(CONFIG_ME_CMP) += x86/me_cmp_init.o | |||
@@ -114,6 +115,7 @@ YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o | |||
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o | |||
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o | |||
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o | |||
YASM-OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp.o | |||
YASM-OBJS-$(CONFIG_ME_CMP) += x86/me_cmp.o | |||
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | |||
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o | |||
@@ -27,128 +27,8 @@ | |||
section .text | |||
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
; intptr_t w); | |||
%macro DIFF_BYTES_PROLOGUE 0 | |||
%if ARCH_X86_32 | |||
cglobal diff_bytes, 3,5,2, dst, src1, src2 | |||
%define wq r4q | |||
DECLARE_REG_TMP 3 | |||
mov wq, r3mp | |||
%else | |||
cglobal diff_bytes, 4,5,2, dst, src1, src2, w | |||
DECLARE_REG_TMP 4 | |||
%endif ; ARCH_X86_32 | |||
%define i t0q | |||
%endmacro | |||
; label to jump to if w < regsize | |||
%macro DIFF_BYTES_LOOP_PREP 1 | |||
mov i, wq | |||
and i, -2 * regsize | |||
jz %1 | |||
add dstq, i | |||
add src1q, i | |||
add src2q, i | |||
neg i | |||
%endmacro | |||
; mov type used for src1q, dstq, first reg, second reg | |||
%macro DIFF_BYTES_LOOP_CORE 4 | |||
%if mmsize != 16 | |||
mov%1 %3, [src1q + i] | |||
mov%1 %4, [src1q + i + regsize] | |||
psubb %3, [src2q + i] | |||
psubb %4, [src2q + i + regsize] | |||
mov%2 [dstq + i], %3 | |||
mov%2 [regsize + dstq + i], %4 | |||
%else | |||
; SSE enforces alignment of psubb operand | |||
mov%1 %3, [src1q + i] | |||
movu %4, [src2q + i] | |||
psubb %3, %4 | |||
mov%2 [dstq + i], %3 | |||
mov%1 %3, [src1q + i + regsize] | |||
movu %4, [src2q + i + regsize] | |||
psubb %3, %4 | |||
mov%2 [regsize + dstq + i], %3 | |||
%endif | |||
%endmacro | |||
%macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | |||
%define regsize mmsize | |||
.loop_%1%2: | |||
DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | |||
add i, 2 * regsize | |||
jl .loop_%1%2 | |||
.skip_main_%1%2: | |||
and wq, 2 * regsize - 1 | |||
jz .end_%1%2 | |||
%if mmsize > 16 | |||
; fall back to narrower xmm | |||
%define regsize mmsize / 2 | |||
DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | |||
.loop2_%1%2: | |||
DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | |||
add i, 2 * regsize | |||
jl .loop2_%1%2 | |||
.setup_loop_gpr_%1%2: | |||
and wq, 2 * regsize - 1 | |||
jz .end_%1%2 | |||
%endif | |||
add dstq, wq | |||
add src1q, wq | |||
add src2q, wq | |||
neg wq | |||
.loop_gpr_%1%2: | |||
mov t0b, [src1q + wq] | |||
sub t0b, [src2q + wq] | |||
mov [dstq + wq], t0b | |||
inc wq | |||
jl .loop_gpr_%1%2 | |||
.end_%1%2: | |||
REP_RET | |||
%endmacro | |||
%if ARCH_X86_32 | |||
INIT_MMX mmx | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
DIFF_BYTES_BODY a, a | |||
%undef i | |||
%endif | |||
INIT_XMM sse2 | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
test dstq, regsize - 1 | |||
jnz .loop_uu | |||
test src1q, regsize - 1 | |||
jnz .loop_ua | |||
DIFF_BYTES_BODY a, a | |||
DIFF_BYTES_BODY u, a | |||
DIFF_BYTES_BODY u, u | |||
%undef i | |||
%if HAVE_AVX2_EXTERNAL | |||
INIT_YMM avx2 | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
; Directly using unaligned SSE2 version is marginally faster than | |||
; branching based on arguments. | |||
DIFF_BYTES_LOOP_PREP .skip_main_uu | |||
test dstq, regsize - 1 | |||
jnz .loop_uu | |||
test src1q, regsize - 1 | |||
jnz .loop_ua | |||
DIFF_BYTES_BODY a, a | |||
DIFF_BYTES_BODY u, a | |||
DIFF_BYTES_BODY u, u | |||
%undef i | |||
%endif | |||
; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
; unsigned mask, int w); | |||
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | |||
movd m4, maskd | |||
SPLATW m4, m4 | |||
@@ -0,0 +1,54 @@ | |||
/* | |||
* SIMD-optimized HuffYUV encoding functions | |||
* Copyright (c) 2000, 2001 Fabrice Bellard | |||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
* | |||
* MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||
* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* FFmpeg is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with FFmpeg; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#include "libavutil/attributes.h" | |||
#include "libavutil/cpu.h" | |||
#include "libavutil/pixdesc.h" | |||
#include "libavutil/x86/cpu.h" | |||
#include "libavcodec/huffyuvencdsp.h" | |||
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w); | |||
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w); | |||
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w, int *left, int *left_top); | |||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
{ | |||
av_unused int cpu_flags = av_get_cpu_flags(); | |||
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | |||
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | |||
c->diff_int16 = ff_diff_int16_mmx; | |||
} | |||
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | |||
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | |||
} | |||
if (EXTERNAL_SSE2(cpu_flags)) { | |||
c->diff_int16 = ff_diff_int16_sse2; | |||
} | |||
} |
@@ -0,0 +1,150 @@ | |||
;************************************************************************ | |||
;* SIMD-optimized lossless video encoding functions | |||
;* Copyright (c) 2000, 2001 Fabrice Bellard | |||
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
;* | |||
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||
;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com> | |||
;* | |||
;* This file is part of FFmpeg. | |||
;* | |||
;* FFmpeg is free software; you can redistribute it and/or | |||
;* modify it under the terms of the GNU Lesser General Public | |||
;* License as published by the Free Software Foundation; either | |||
;* version 2.1 of the License, or (at your option) any later version. | |||
;* | |||
;* FFmpeg is distributed in the hope that it will be useful, | |||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
;* Lesser General Public License for more details. | |||
;* | |||
;* You should have received a copy of the GNU Lesser General Public | |||
;* License along with FFmpeg; if not, write to the Free Software | |||
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
;****************************************************************************** | |||
%include "libavutil/x86/x86util.asm" | |||
section .text | |||
; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
; intptr_t w); | |||
%macro DIFF_BYTES_PROLOGUE 0 | |||
%if ARCH_X86_32 | |||
cglobal diff_bytes, 3,5,2, dst, src1, src2 | |||
%define wq r4q | |||
DECLARE_REG_TMP 3 | |||
mov wq, r3mp | |||
%else | |||
cglobal diff_bytes, 4,5,2, dst, src1, src2, w | |||
DECLARE_REG_TMP 4 | |||
%endif ; ARCH_X86_32 | |||
%define i t0q | |||
%endmacro | |||
; label to jump to if w < regsize | |||
%macro DIFF_BYTES_LOOP_PREP 1 | |||
mov i, wq | |||
and i, -2 * regsize | |||
jz %1 | |||
add dstq, i | |||
add src1q, i | |||
add src2q, i | |||
neg i | |||
%endmacro | |||
; mov type used for src1q, dstq, first reg, second reg | |||
%macro DIFF_BYTES_LOOP_CORE 4 | |||
%if mmsize != 16 | |||
mov%1 %3, [src1q + i] | |||
mov%1 %4, [src1q + i + regsize] | |||
psubb %3, [src2q + i] | |||
psubb %4, [src2q + i + regsize] | |||
mov%2 [dstq + i], %3 | |||
mov%2 [regsize + dstq + i], %4 | |||
%else | |||
; SSE enforces alignment of psubb operand | |||
mov%1 %3, [src1q + i] | |||
movu %4, [src2q + i] | |||
psubb %3, %4 | |||
mov%2 [dstq + i], %3 | |||
mov%1 %3, [src1q + i + regsize] | |||
movu %4, [src2q + i + regsize] | |||
psubb %3, %4 | |||
mov%2 [regsize + dstq + i], %3 | |||
%endif | |||
%endmacro | |||
%macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | |||
%define regsize mmsize | |||
.loop_%1%2: | |||
DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | |||
add i, 2 * regsize | |||
jl .loop_%1%2 | |||
.skip_main_%1%2: | |||
and wq, 2 * regsize - 1 | |||
jz .end_%1%2 | |||
%if mmsize > 16 | |||
; fall back to narrower xmm | |||
%define regsize mmsize / 2 | |||
DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | |||
.loop2_%1%2: | |||
DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | |||
add i, 2 * regsize | |||
jl .loop2_%1%2 | |||
.setup_loop_gpr_%1%2: | |||
and wq, 2 * regsize - 1 | |||
jz .end_%1%2 | |||
%endif | |||
add dstq, wq | |||
add src1q, wq | |||
add src2q, wq | |||
neg wq | |||
.loop_gpr_%1%2: | |||
mov t0b, [src1q + wq] | |||
sub t0b, [src2q + wq] | |||
mov [dstq + wq], t0b | |||
inc wq | |||
jl .loop_gpr_%1%2 | |||
.end_%1%2: | |||
REP_RET | |||
%endmacro | |||
%if ARCH_X86_32 | |||
INIT_MMX mmx | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
DIFF_BYTES_BODY a, a | |||
%undef i | |||
%endif | |||
INIT_XMM sse2 | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
test dstq, regsize - 1 | |||
jnz .loop_uu | |||
test src1q, regsize - 1 | |||
jnz .loop_ua | |||
DIFF_BYTES_BODY a, a | |||
DIFF_BYTES_BODY u, a | |||
DIFF_BYTES_BODY u, u | |||
%undef i | |||
%if HAVE_AVX2_EXTERNAL | |||
INIT_YMM avx2 | |||
DIFF_BYTES_PROLOGUE | |||
%define regsize mmsize | |||
; Directly using unaligned SSE2 version is marginally faster than | |||
; branching based on arguments. | |||
DIFF_BYTES_LOOP_PREP .skip_main_uu | |||
test dstq, regsize - 1 | |||
jnz .loop_uu | |||
test src1q, regsize - 1 | |||
jnz .loop_ua | |||
DIFF_BYTES_BODY a, a | |||
DIFF_BYTES_BODY u, a | |||
DIFF_BYTES_BODY u, u | |||
%undef i | |||
%endif |
@@ -1,5 +1,5 @@ | |||
/* | |||
* SIMD-optimized HuffYUV encoding functions | |||
* SIMD-optimized lossless video encoding functions | |||
* Copyright (c) 2000, 2001 Fabrice Bellard | |||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
* | |||
@@ -24,10 +24,9 @@ | |||
#include "libavutil/attributes.h" | |||
#include "libavutil/cpu.h" | |||
#include "libavutil/pixdesc.h" | |||
#include "libavutil/x86/asm.h" | |||
#include "libavutil/x86/cpu.h" | |||
#include "libavcodec/huffyuvencdsp.h" | |||
#include "libavcodec/lossless_videoencdsp.h" | |||
#include "libavcodec/mathops.h" | |||
void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
@@ -36,18 +35,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
intptr_t w); | |||
void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
intptr_t w); | |||
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w); | |||
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w); | |||
void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
unsigned mask, int w, int *left, int *left_top); | |||
#if HAVE_INLINE_ASM | |||
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top) | |||
static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
const uint8_t *src2, intptr_t w, | |||
int *left, int *left_top) | |||
{ | |||
x86_reg i = 0; | |||
uint8_t l, lt; | |||
@@ -87,29 +80,22 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
#endif /* HAVE_INLINE_ASM */ | |||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) | |||
{ | |||
av_unused int cpu_flags = av_get_cpu_flags(); | |||
const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | |||
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | |||
c->diff_bytes = ff_diff_bytes_mmx; | |||
c->diff_int16 = ff_diff_int16_mmx; | |||
} | |||
#if HAVE_INLINE_ASM | |||
if (INLINE_MMXEXT(cpu_flags)) { | |||
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext; | |||
c->sub_median_pred = sub_median_pred_mmxext; | |||
} | |||
#endif /* HAVE_INLINE_ASM */ | |||
if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | |||
c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | |||
} | |||
if (EXTERNAL_SSE2(cpu_flags)) { | |||
c->diff_bytes = ff_diff_bytes_sse2; | |||
c->diff_int16 = ff_diff_int16_sse2; | |||
} | |||
if (EXTERNAL_AVX2_FAST(cpu_flags)) { |