* qatar/master: libx264: fix indentation. vorbis: fix overflows in floor1[] vector and inverse db table index. win64: add a XMM clobber test configure option. movdec: Parse the dvc1 atom ARM: ac3: fix ac3_bit_alloc_calc_bap_armv6 swscale: K&R formatting cosmetics for Blackfin code frwu: lowercase the FRWU codec name movdec: fix dts generation in fragmented files fate: make acodec-ac3_fixed test output raw AC3 APIchanges: add missing commit hashes swscale: implement MMX, SSE2 and AVX functions for RGB32 input. ra144enc: drop pointless "encoder" from .long_name bethsoftvideo: fix palette reading. mpc7: use av_fast_padded_malloc() mpc7: simplify handling of packet sizes that are not a multiple of 4 bytes doc: decoding Forward Uncompressed is supported Fix a typo in the x86 asm version of ff_vector_clip_int32() pcmenc: Do not set avpkt->size. ff_alloc_packet: modify the size of the packet to match the requested size Conflicts: doc/APIchanges libavcodec/libx264.c libavcodec/mpc7.c libavformat/isom.h libswscale/Makefile libswscale/bfin/yuv2rgb_bfin.c tests/ref/fate/bethsoft-vid tests/ref/seek/ac3_ac3 Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n0.11
| @@ -276,6 +276,8 @@ Developer options (useful when working on FFmpeg itself): | |||||
| Cannot be combined with --target-exec | Cannot be combined with --target-exec | ||||
| --samples=PATH location of test samples for FATE, if not set use | --samples=PATH location of test samples for FATE, if not set use | ||||
| \$FATE_SAMPLES at make invocation time. | \$FATE_SAMPLES at make invocation time. | ||||
| --enable-xmm-clobber-test check XMM registers for clobbering (Win64-only; | |||||
| should be used only for debugging purposes) | |||||
| NOTE: Object files are built at the place where configure is launched. | NOTE: Object files are built at the place where configure is launched. | ||||
| EOF | EOF | ||||
| @@ -1085,6 +1087,7 @@ CONFIG_LIST=" | |||||
| vda | vda | ||||
| vdpau | vdpau | ||||
| version3 | version3 | ||||
| xmm_clobber_test | |||||
| x11grab | x11grab | ||||
| zlib | zlib | ||||
| " | " | ||||
| @@ -1779,7 +1782,7 @@ test_deps _muxer _demuxer \ | |||||
| wav \ | wav \ | ||||
| yuv4mpegpipe=yuv4mpeg \ | yuv4mpegpipe=yuv4mpeg \ | ||||
| ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder rm_muxer rm_demuxer" | |||||
| ac3_fixed_test_deps="ac3_fixed_encoder ac3_decoder" | |||||
| mpg_test_deps="mpeg1system_muxer mpegps_demuxer" | mpg_test_deps="mpeg1system_muxer mpegps_demuxer" | ||||
| # default parameters | # default parameters | ||||
| @@ -3304,6 +3307,17 @@ check_ldflags -Wl,--warn-common | |||||
| check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil | check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil | ||||
| test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic | test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic | ||||
| enabled xmm_clobber_test && \ | |||||
| check_ldflags -Wl,--wrap,avcodec_open2 \ | |||||
| -Wl,--wrap,avcodec_decode_audio4 \ | |||||
| -Wl,--wrap,avcodec_decode_video2 \ | |||||
| -Wl,--wrap,avcodec_decode_subtitle2 \ | |||||
| -Wl,--wrap,avcodec_encode_audio2 \ | |||||
| -Wl,--wrap,avcodec_encode_video \ | |||||
| -Wl,--wrap,avcodec_encode_subtitle \ | |||||
| -Wl,--wrap,sws_scale || \ | |||||
| disable xmm_clobber_test | |||||
| echo "X{};" > $TMPV | echo "X{};" > $TMPV | ||||
| if test_ldflags -Wl,--version-script,$TMPV; then | if test_ldflags -Wl,--version-script,$TMPV; then | ||||
| append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver' | append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver' | ||||
| @@ -19,18 +19,18 @@ API changes, most recent first: | |||||
| 2012-01-24 - xxxxxxx - lavfi 2.60.100 | 2012-01-24 - xxxxxxx - lavfi 2.60.100 | ||||
| Add avfilter_graph_dump. | Add avfilter_graph_dump. | ||||
| 2012-02-01 - xxxxxxx - lavc 54.01.0 | |||||
| 2012-02-01 - 316fc74 - lavc 54.01.0 | |||||
| Add av_fast_padded_malloc() as alternative for av_realloc() when aligned | Add av_fast_padded_malloc() as alternative for av_realloc() when aligned | ||||
| memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE | memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE | ||||
| zero-padded bytes at the end. | zero-padded bytes at the end. | ||||
| 2012-01-31 - xxxxxxx - lavf 54.01.0 | |||||
| 2012-01-31 - dd6d3b0 - lavf 54.01.0 | |||||
| Add avformat_get_riff_video_tags() and avformat_get_riff_audio_tags(). | Add avformat_get_riff_video_tags() and avformat_get_riff_audio_tags(). | ||||
| 2012-01-31 - xxxxxxx - lavc 54.01.0 | |||||
| 2012-01-31 - af08d9a - lavc 54.01.0 | |||||
| Add avcodec_is_open() function. | Add avcodec_is_open() function. | ||||
| 2012-01-30 - xxxxxxx - lavu 51.22.0 - intfloat.h | |||||
| 2012-01-30 - 8b93312 - lavu 51.22.0 - intfloat.h | |||||
| Add a new installed header libavutil/intfloat.h with int/float punning | Add a new installed header libavutil/intfloat.h with int/float punning | ||||
| functions. | functions. | ||||
| @@ -497,6 +497,7 @@ following image formats are supported: | |||||
| @item Flash Screen Video v2 @tab X @tab X | @item Flash Screen Video v2 @tab X @tab X | ||||
| @item Flash Video (FLV) @tab X @tab X | @item Flash Video (FLV) @tab X @tab X | ||||
| @tab Sorenson H.263 used in Flash | @tab Sorenson H.263 used in Flash | ||||
| @item Forward Uncompressed @tab @tab X | |||||
| @item Fraps @tab @tab X | @item Fraps @tab @tab X | ||||
| @item H.261 @tab X @tab X | @item H.261 @tab X @tab X | ||||
| @item H.263 / H.263-1996 @tab X @tab X | @item H.263 / H.263-1996 @tab X @tab X | ||||
| @@ -34,24 +34,23 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 | |||||
| add r0, r0, r4, lsl #1 @ mask + band | add r0, r0, r4, lsl #1 @ mask + band | ||||
| add r4, lr, r4 | add r4, lr, r4 | ||||
| add r7, r7, r2 @ bap + start | add r7, r7, r2 @ bap + start | ||||
| ldrb r10, [r4], #1 | |||||
| 1: | 1: | ||||
| ldrsh r9, [r0], #2 @ mask[band] | ldrsh r9, [r0], #2 @ mask[band] | ||||
| mov r8, #0xff0 | mov r8, #0xff0 | ||||
| sub r9, r9, r12 @ - snr_offset | sub r9, r9, r12 @ - snr_offset | ||||
| mov r11, r10 | |||||
| ldrb r10, [r4], #1 @ band_start_tab[band++] | |||||
| ldrb r10, [r4, #1]! @ band_start_tab[++band] | |||||
| subs r9, r9, r5 @ - floor | subs r9, r9, r5 @ - floor | ||||
| it lt | it lt | ||||
| movlt r9, #0 | movlt r9, #0 | ||||
| cmp r10, r3 @ - end | cmp r10, r3 @ - end | ||||
| and r9, r9, r8, lsl #1 @ & 0x1fe0 | and r9, r9, r8, lsl #1 @ & 0x1fe0 | ||||
| ite gt | ite gt | ||||
| subgt r8, r3, r11 | |||||
| suble r8, r10, r11 | |||||
| subgt r8, r3, r2 | |||||
| suble r8, r10, r2 | |||||
| mov r2, r10 | |||||
| add r9, r9, r5 @ + floor => m | add r9, r9, r5 @ + floor => m | ||||
| tst r8, #1 | tst r8, #1 | ||||
| add r2, r7, r8 | |||||
| add r11, r7, r8 | |||||
| bne 3f | bne 3f | ||||
| b 5f | b 5f | ||||
| 2: | 2: | ||||
| @@ -65,9 +64,9 @@ function ff_ac3_bit_alloc_calc_bap_armv6, export=1 | |||||
| ldrb lr, [r6, lr] | ldrb lr, [r6, lr] | ||||
| strb r8, [r7], #1 @ bap[bin] | strb r8, [r7], #1 @ bap[bin] | ||||
| strb lr, [r7], #1 | strb lr, [r7], #1 | ||||
| 5: cmp r7, r2 | |||||
| 5: cmp r7, r11 | |||||
| blo 2b | blo 2b | ||||
| cmp r3, r11 | |||||
| cmp r3, r10 | |||||
| bgt 1b | bgt 1b | ||||
| pop {r4-r11,pc} | pop {r4-r11,pc} | ||||
| 3: | 3: | ||||
| @@ -61,7 +61,7 @@ static int set_palette(BethsoftvidContext *ctx) | |||||
| palette[a] |= palette[a] >> 6 & 0x30303; | palette[a] |= palette[a] >> 6 & 0x30303; | ||||
| } | } | ||||
| ctx->frame.palette_has_changed = 1; | ctx->frame.palette_has_changed = 1; | ||||
| return 256*3; | |||||
| return 0; | |||||
| } | } | ||||
| static int bethsoftvid_decode_frame(AVCodecContext *avctx, | static int bethsoftvid_decode_frame(AVCodecContext *avctx, | ||||
| @@ -88,7 +88,13 @@ static int bethsoftvid_decode_frame(AVCodecContext *avctx, | |||||
| switch(block_type = bytestream2_get_byte(&vid->g)){ | switch(block_type = bytestream2_get_byte(&vid->g)){ | ||||
| case PALETTE_BLOCK: { | case PALETTE_BLOCK: { | ||||
| return set_palette(vid); | |||||
| int ret; | |||||
| *data_size = 0; | |||||
| if ((ret = set_palette(vid)) < 0) { | |||||
| av_log(avctx, AV_LOG_ERROR, "error reading palette\n"); | |||||
| return ret; | |||||
| } | |||||
| return bytestream2_tell(&vid->g); | |||||
| } | } | ||||
| case VIDEO_YOFF_P_FRAME: | case VIDEO_YOFF_P_FRAME: | ||||
| yoffset = bytestream2_get_le16(&vid->g); | yoffset = bytestream2_get_le16(&vid->g); | ||||
| @@ -130,6 +130,7 @@ int avpriv_unlock_avformat(void); | |||||
| * If avpkt->data is already set, avpkt->size is checked | * If avpkt->data is already set, avpkt->size is checked | ||||
| * to ensure it is large enough. | * to ensure it is large enough. | ||||
| * If avpkt->data is NULL, a new buffer is allocated. | * If avpkt->data is NULL, a new buffer is allocated. | ||||
| * avpkt->size is set to the specified size. | |||||
| * All other AVPacket fields will be reset with av_init_packet(). | * All other AVPacket fields will be reset with av_init_packet(). | ||||
| * @param size the minimum required packet size | * @param size the minimum required packet size | ||||
| * @return 0 on success, negative error code on failure | * @return 0 on success, negative error code on failure | ||||
| @@ -188,12 +188,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf, | |||||
| do { | do { | ||||
| bufsize = orig_bufsize; | bufsize = orig_bufsize; | ||||
| if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0) | |||||
| return -1; | |||||
| if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0) | |||||
| return -1; | |||||
| bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0); | |||||
| if (bufsize < 0) | |||||
| return -1; | |||||
| bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0); | |||||
| if (bufsize < 0) | |||||
| return -1; | |||||
| } while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc)); | } while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc)); | ||||
| /* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */ | /* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */ | ||||
| @@ -66,8 +66,6 @@ typedef struct { | |||||
| int buf_size; | int buf_size; | ||||
| AVLFG rnd; | AVLFG rnd; | ||||
| int frames_to_skip; | int frames_to_skip; | ||||
| uint8_t *buffer; | |||||
| int buffer_size; | |||||
| /* for synthesis */ | /* for synthesis */ | ||||
| DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2]; | DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2]; | ||||
| int synth_buf_offset[MPA_MAX_CHANNELS]; | int synth_buf_offset[MPA_MAX_CHANNELS]; | ||||
| @@ -200,34 +200,46 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, | |||||
| int *got_frame_ptr, AVPacket *avpkt) | int *got_frame_ptr, AVPacket *avpkt) | ||||
| { | { | ||||
| const uint8_t *buf = avpkt->data; | const uint8_t *buf = avpkt->data; | ||||
| int buf_size = avpkt->size; | |||||
| int buf_size; | |||||
| MPCContext *c = avctx->priv_data; | MPCContext *c = avctx->priv_data; | ||||
| GetBitContext gb; | GetBitContext gb; | ||||
| int i, ch; | int i, ch; | ||||
| int mb = -1; | int mb = -1; | ||||
| Band *bands = c->bands; | Band *bands = c->bands; | ||||
| int off, ret; | |||||
| int off, ret, last_frame, skip; | |||||
| int bits_used, bits_avail; | int bits_used, bits_avail; | ||||
| memset(bands, 0, sizeof(*bands) * (c->maxbands + 1)); | memset(bands, 0, sizeof(*bands) * (c->maxbands + 1)); | ||||
| if(buf_size <= 4){ | |||||
| av_log(avctx, AV_LOG_ERROR, "Too small buffer passed (%i bytes)\n", buf_size); | |||||
| return AVERROR(EINVAL); | |||||
| buf_size = avpkt->size & ~3; | |||||
| if (buf_size <= 0) { | |||||
| av_log(avctx, AV_LOG_ERROR, "packet size is too small (%i bytes)\n", | |||||
| avpkt->size); | |||||
| return AVERROR_INVALIDDATA; | |||||
| } | |||||
| if (buf_size != avpkt->size) { | |||||
| av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. " | |||||
| "extra bytes at the end will be skipped.\n"); | |||||
| } | } | ||||
| skip = buf[0]; | |||||
| last_frame = buf[1]; | |||||
| buf += 4; | |||||
| buf_size -= 4; | |||||
| /* get output buffer */ | /* get output buffer */ | ||||
| c->frame.nb_samples = buf[1] ? c->lastframelen : MPC_FRAME_SIZE; | |||||
| c->frame.nb_samples = last_frame ? c->lastframelen : MPC_FRAME_SIZE; | |||||
| if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) { | if ((ret = avctx->get_buffer(avctx, &c->frame)) < 0) { | ||||
| av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| av_fast_padded_malloc(&c->buffer, &c->buffer_size, FFALIGN(buf_size - 1, 4)); | |||||
| if (!c->buffer) | |||||
| av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size); | |||||
| if (!c->bits) | |||||
| return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
| c->dsp.bswap_buf((uint32_t*)c->buffer, (const uint32_t*)(buf + 4), (buf_size - 4) >> 2); | |||||
| init_get_bits(&gb, c->buffer, (buf_size - 4)* 8); | |||||
| skip_bits_long(&gb, buf[0]); | |||||
| c->dsp.bswap_buf((uint32_t *)c->bits, (const uint32_t *)buf, buf_size >> 2); | |||||
| init_get_bits(&gb, c->bits, buf_size * 8); | |||||
| skip_bits_long(&gb, skip); | |||||
| /* read subband indexes */ | /* read subband indexes */ | ||||
| for(i = 0; i <= c->maxbands; i++){ | for(i = 0; i <= c->maxbands; i++){ | ||||
| @@ -284,21 +296,21 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data, | |||||
| ff_mpc_dequantize_and_synth(c, mb, c->frame.data[0], 2); | ff_mpc_dequantize_and_synth(c, mb, c->frame.data[0], 2); | ||||
| bits_used = get_bits_count(&gb); | bits_used = get_bits_count(&gb); | ||||
| bits_avail = (buf_size - 4) * 8; | |||||
| if(!buf[1] && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))){ | |||||
| bits_avail = buf_size * 8; | |||||
| if (!last_frame && ((bits_avail < bits_used) || (bits_used + 32 <= bits_avail))) { | |||||
| av_log(NULL,0, "Error decoding frame: used %i of %i bits\n", bits_used, bits_avail); | av_log(NULL,0, "Error decoding frame: used %i of %i bits\n", bits_used, bits_avail); | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| if(c->frames_to_skip){ | if(c->frames_to_skip){ | ||||
| c->frames_to_skip--; | c->frames_to_skip--; | ||||
| *got_frame_ptr = 0; | *got_frame_ptr = 0; | ||||
| return buf_size; | |||||
| return avpkt->size; | |||||
| } | } | ||||
| *got_frame_ptr = 1; | *got_frame_ptr = 1; | ||||
| *(AVFrame *)data = c->frame; | *(AVFrame *)data = c->frame; | ||||
| return buf_size; | |||||
| return avpkt->size; | |||||
| } | } | ||||
| static void mpc7_decode_flush(AVCodecContext *avctx) | static void mpc7_decode_flush(AVCodecContext *avctx) | ||||
| @@ -312,8 +324,8 @@ static void mpc7_decode_flush(AVCodecContext *avctx) | |||||
| static av_cold int mpc7_decode_close(AVCodecContext *avctx) | static av_cold int mpc7_decode_close(AVCodecContext *avctx) | ||||
| { | { | ||||
| MPCContext *c = avctx->priv_data; | MPCContext *c = avctx->priv_data; | ||||
| av_freep(&c->buffer); | |||||
| c->buffer_size = 0; | |||||
| av_freep(&c->bits); | |||||
| c->buf_size = 0; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -194,7 +194,6 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, | |||||
| return -1; | return -1; | ||||
| } | } | ||||
| avpkt->size = frame->nb_samples * avctx->channels * sample_size; | |||||
| *got_packet_ptr = 1; | *got_packet_ptr = 1; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -521,5 +521,5 @@ AVCodec ff_ra_144_encoder = { | |||||
| .close = ra144_encode_close, | .close = ra144_encode_close, | ||||
| .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, | ||||
| AV_SAMPLE_FMT_NONE }, | AV_SAMPLE_FMT_NONE }, | ||||
| .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K) encoder"), | |||||
| .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"), | |||||
| }; | }; | ||||
| @@ -919,16 +919,14 @@ int ff_alloc_packet(AVPacket *avpkt, int size) | |||||
| if (avpkt->data) { | if (avpkt->data) { | ||||
| uint8_t *pkt_data; | uint8_t *pkt_data; | ||||
| int pkt_size; | |||||
| if (avpkt->size < size) | if (avpkt->size < size) | ||||
| return AVERROR(EINVAL); | return AVERROR(EINVAL); | ||||
| pkt_data = avpkt->data; | pkt_data = avpkt->data; | ||||
| pkt_size = avpkt->size; | |||||
| av_init_packet(avpkt); | av_init_packet(avpkt); | ||||
| avpkt->data = pkt_data; | avpkt->data = pkt_data; | ||||
| avpkt->size = pkt_size; | |||||
| avpkt->size = size; | |||||
| return 0; | return 0; | ||||
| } else { | } else { | ||||
| return av_new_packet(avpkt, size); | return av_new_packet(avpkt, size); | ||||
| @@ -156,7 +156,7 @@ void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) | |||||
| } | } | ||||
| } | } | ||||
| static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1, | |||||
| static inline void render_line_unrolled(intptr_t x, int y, int x1, | |||||
| intptr_t sy, int ady, int adx, | intptr_t sy, int ady, int adx, | ||||
| float *buf) | float *buf) | ||||
| { | { | ||||
| @@ -168,30 +168,30 @@ static inline void render_line_unrolled(intptr_t x, uint8_t y, int x1, | |||||
| if (err >= 0) { | if (err >= 0) { | ||||
| err += ady - adx; | err += ady - adx; | ||||
| y += sy; | y += sy; | ||||
| buf[x++] = ff_vorbis_floor1_inverse_db_table[y]; | |||||
| buf[x++] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; | |||||
| } | } | ||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[y]; | |||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; | |||||
| } | } | ||||
| if (x <= 0) { | if (x <= 0) { | ||||
| if (err + ady >= 0) | if (err + ady >= 0) | ||||
| y += sy; | y += sy; | ||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[y]; | |||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; | |||||
| } | } | ||||
| } | } | ||||
| static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf) | |||||
| static void render_line(int x0, int y0, int x1, int y1, float *buf) | |||||
| { | { | ||||
| int dy = y1 - y0; | int dy = y1 - y0; | ||||
| int adx = x1 - x0; | int adx = x1 - x0; | ||||
| int ady = FFABS(dy); | int ady = FFABS(dy); | ||||
| int sy = dy < 0 ? -1 : 1; | int sy = dy < 0 ? -1 : 1; | ||||
| buf[x0] = ff_vorbis_floor1_inverse_db_table[y0]; | |||||
| buf[x0] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y0)]; | |||||
| if (ady*2 <= adx) { // optimized common case | if (ady*2 <= adx) { // optimized common case | ||||
| render_line_unrolled(x0, y0, x1, sy, ady, adx, buf); | render_line_unrolled(x0, y0, x1, sy, ady, adx, buf); | ||||
| } else { | } else { | ||||
| int base = dy / adx; | int base = dy / adx; | ||||
| int x = x0; | int x = x0; | ||||
| uint8_t y = y0; | |||||
| int y = y0; | |||||
| int err = -adx; | int err = -adx; | ||||
| ady -= FFABS(base) * adx; | ady -= FFABS(base) * adx; | ||||
| while (++x < x1) { | while (++x < x1) { | ||||
| @@ -201,7 +201,7 @@ static void render_line(int x0, uint8_t y0, int x1, int y1, float *buf) | |||||
| err -= adx; | err -= adx; | ||||
| y += sy; | y += sy; | ||||
| } | } | ||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[y]; | |||||
| buf[x] = ff_vorbis_floor1_inverse_db_table[av_clip_uint8(y)]; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -210,8 +210,7 @@ void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, | |||||
| uint16_t *y_list, int *flag, | uint16_t *y_list, int *flag, | ||||
| int multiplier, float *out, int samples) | int multiplier, float *out, int samples) | ||||
| { | { | ||||
| int lx, i; | |||||
| uint8_t ly; | |||||
| int lx, ly, i; | |||||
| lx = 0; | lx = 0; | ||||
| ly = y_list[0] * multiplier; | ly = y_list[0] * multiplier; | ||||
| for (i = 1; i < values; i++) { | for (i = 1; i < values; i++) { | ||||
| @@ -1256,20 +1256,20 @@ static int vorbis_floor1_decode(vorbis_context *vc, | |||||
| floor1_flag[i] = 1; | floor1_flag[i] = 1; | ||||
| if (val >= room) { | if (val >= room) { | ||||
| if (highroom > lowroom) { | if (highroom > lowroom) { | ||||
| floor1_Y_final[i] = val - lowroom + predicted; | |||||
| floor1_Y_final[i] = av_clip_uint16(val - lowroom + predicted); | |||||
| } else { | } else { | ||||
| floor1_Y_final[i] = predicted - val + highroom - 1; | |||||
| floor1_Y_final[i] = av_clip_uint16(predicted - val + highroom - 1); | |||||
| } | } | ||||
| } else { | } else { | ||||
| if (val & 1) { | if (val & 1) { | ||||
| floor1_Y_final[i] = predicted - (val + 1) / 2; | |||||
| floor1_Y_final[i] = av_clip_uint16(predicted - (val + 1) / 2); | |||||
| } else { | } else { | ||||
| floor1_Y_final[i] = predicted + val / 2; | |||||
| floor1_Y_final[i] = av_clip_uint16(predicted + val / 2); | |||||
| } | } | ||||
| } | } | ||||
| } else { | } else { | ||||
| floor1_flag[i] = 0; | floor1_flag[i] = 0; | ||||
| floor1_Y_final[i] = predicted; | |||||
| floor1_Y_final[i] = av_clip_uint16(predicted); | |||||
| } | } | ||||
| av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n", | av_dlog(NULL, " Decoded floor(%d) = %u / val %u\n", | ||||
| @@ -83,3 +83,4 @@ OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \ | |||||
| x86/mpegvideo_mmx.o \ | x86/mpegvideo_mmx.o \ | ||||
| x86/simple_idct_mmx.o \ | x86/simple_idct_mmx.o \ | ||||
| OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o | |||||
| @@ -1063,7 +1063,7 @@ emu_edge mmx | |||||
| ; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) | ; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) | ||||
| ; %5 = suffix | ; %5 = suffix | ||||
| %macro VECTOR_CLIP_INT32 4-5 | %macro VECTOR_CLIP_INT32 4-5 | ||||
| cglobal vector_clip_int32%5, 5,5,%2, dst, src, min, max, len | |||||
| cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len | |||||
| %if %4 | %if %4 | ||||
| cvtsi2ss m4, minm | cvtsi2ss m4, minm | ||||
| cvtsi2ss m5, maxm | cvtsi2ss m5, maxm | ||||
| @@ -0,0 +1,80 @@ | |||||
| /* | |||||
| * check XMM registers for clobbers on Win64 | |||||
| * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavutil/x86/w64xmmtest.h" | |||||
| wrap(avcodec_open2(AVCodecContext *avctx, | |||||
| AVCodec *codec, | |||||
| AVDictionary **options)) | |||||
| { | |||||
| testxmmclobbers(avcodec_open2, avctx, codec, options); | |||||
| } | |||||
| wrap(avcodec_decode_audio4(AVCodecContext *avctx, | |||||
| AVFrame *frame, | |||||
| int *got_frame_ptr, | |||||
| AVPacket *avpkt)) | |||||
| { | |||||
| testxmmclobbers(avcodec_decode_audio4, avctx, frame, | |||||
| got_frame_ptr, avpkt); | |||||
| } | |||||
| wrap(avcodec_decode_video2(AVCodecContext *avctx, | |||||
| AVFrame *picture, | |||||
| int *got_picture_ptr, | |||||
| AVPacket *avpkt)) | |||||
| { | |||||
| testxmmclobbers(avcodec_decode_video2, avctx, picture, | |||||
| got_picture_ptr, avpkt); | |||||
| } | |||||
| wrap(avcodec_decode_subtitle2(AVCodecContext *avctx, | |||||
| AVSubtitle *sub, | |||||
| int *got_sub_ptr, | |||||
| AVPacket *avpkt)) | |||||
| { | |||||
| testxmmclobbers(avcodec_decode_subtitle2, avctx, sub, | |||||
| got_sub_ptr, avpkt); | |||||
| } | |||||
| wrap(avcodec_encode_audio2(AVCodecContext *avctx, | |||||
| AVPacket *avpkt, | |||||
| const AVFrame *frame, | |||||
| int *got_packet_ptr)) | |||||
| { | |||||
| testxmmclobbers(avcodec_encode_audio2, avctx, avpkt, frame, | |||||
| got_packet_ptr); | |||||
| } | |||||
| wrap(avcodec_encode_video(AVCodecContext *avctx, | |||||
| uint8_t *buf, int buf_size, | |||||
| const AVFrame *pict)) | |||||
| { | |||||
| testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict); | |||||
| } | |||||
| wrap(avcodec_encode_subtitle(AVCodecContext *avctx, | |||||
| uint8_t *buf, int buf_size, | |||||
| const AVSubtitle *sub)) | |||||
| { | |||||
| testxmmclobbers(avcodec_encode_subtitle, avctx, buf, buf_size, sub); | |||||
| } | |||||
| @@ -129,6 +129,7 @@ typedef struct MOVStreamContext { | |||||
| int has_palette; | int has_palette; | ||||
| int64_t data_size; | int64_t data_size; | ||||
| uint32_t tmcd_flags; ///< tmcd track flags | uint32_t tmcd_flags; ///< tmcd track flags | ||||
| int64_t track_end; ///< used for dts generation in fragmented movie files | |||||
| } MOVStreamContext; | } MOVStreamContext; | ||||
| typedef struct MOVContext { | typedef struct MOVContext { | ||||
| @@ -1012,6 +1012,32 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| static int mov_read_dvc1(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| { | |||||
| AVStream *st; | |||||
| uint8_t profile_level; | |||||
| if (c->fc->nb_streams < 1) | |||||
| return 0; | |||||
| st = c->fc->streams[c->fc->nb_streams-1]; | |||||
| if (atom.size >= (1<<28) || atom.size < 7) | |||||
| return AVERROR_INVALIDDATA; | |||||
| profile_level = avio_r8(pb); | |||||
| if (profile_level & 0xf0 != 0xc0) | |||||
| return 0; | |||||
| av_free(st->codec->extradata); | |||||
| st->codec->extradata = av_mallocz(atom.size - 7 + FF_INPUT_BUFFER_PADDING_SIZE); | |||||
| if (!st->codec->extradata) | |||||
| return AVERROR(ENOMEM); | |||||
| st->codec->extradata_size = atom.size - 7; | |||||
| avio_seek(pb, 6, SEEK_CUR); | |||||
| avio_read(pb, st->codec->extradata, st->codec->extradata_size); | |||||
| return 0; | |||||
| } | |||||
| /** | /** | ||||
| * An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself, | * An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself, | ||||
| * but can have extradata appended at the end after the 40 bytes belonging | * but can have extradata appended at the end after the 40 bytes belonging | ||||
| @@ -1706,6 +1732,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| st->nb_frames= total_sample_count; | st->nb_frames= total_sample_count; | ||||
| if (duration) | if (duration) | ||||
| st->duration= duration; | st->duration= duration; | ||||
| sc->track_end = duration; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -2326,7 +2353,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| if (flags & 0x001) data_offset = avio_rb32(pb); | if (flags & 0x001) data_offset = avio_rb32(pb); | ||||
| if (flags & 0x004) first_sample_flags = avio_rb32(pb); | if (flags & 0x004) first_sample_flags = avio_rb32(pb); | ||||
| dts = st->duration - sc->time_offset; | |||||
| dts = sc->track_end - sc->time_offset; | |||||
| offset = frag->base_data_offset + data_offset; | offset = frag->base_data_offset + data_offset; | ||||
| distance = 0; | distance = 0; | ||||
| av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags); | av_dlog(c->fc, "first sample flags 0x%x\n", first_sample_flags); | ||||
| @@ -2356,7 +2383,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom) | |||||
| sc->data_size += sample_size; | sc->data_size += sample_size; | ||||
| } | } | ||||
| frag->moof_offset = offset; | frag->moof_offset = offset; | ||||
| st->duration = dts + sc->time_offset; | |||||
| st->duration = sc->track_end = dts + sc->time_offset; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -2538,6 +2565,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = { | |||||
| { MKTAG('w','f','e','x'), mov_read_wfex }, | { MKTAG('w','f','e','x'), mov_read_wfex }, | ||||
| { MKTAG('c','m','o','v'), mov_read_cmov }, | { MKTAG('c','m','o','v'), mov_read_cmov }, | ||||
| { MKTAG('c','h','a','n'), mov_read_chan }, /* channel layout */ | { MKTAG('c','h','a','n'), mov_read_chan }, /* channel layout */ | ||||
| { MKTAG('d','v','c','1'), mov_read_dvc1 }, | |||||
| { 0, NULL } | { 0, NULL } | ||||
| }; | }; | ||||
| @@ -0,0 +1,71 @@ | |||||
| /* | |||||
| * check XMM registers for clobbers on Win64 | |||||
| * Copyright (c) 2008 Ramiro Polla <ramiro.polla@gmail.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include <stdlib.h> | |||||
| #include <stdarg.h> | |||||
| #include "libavutil/bswap.h" | |||||
| #define storexmmregs(mem) \ | |||||
| __asm__ volatile( \ | |||||
| "movups %%xmm6 , 0x00(%0)\n\t" \ | |||||
| "movups %%xmm7 , 0x10(%0)\n\t" \ | |||||
| "movups %%xmm8 , 0x20(%0)\n\t" \ | |||||
| "movups %%xmm9 , 0x30(%0)\n\t" \ | |||||
| "movups %%xmm10, 0x40(%0)\n\t" \ | |||||
| "movups %%xmm11, 0x50(%0)\n\t" \ | |||||
| "movups %%xmm12, 0x60(%0)\n\t" \ | |||||
| "movups %%xmm13, 0x70(%0)\n\t" \ | |||||
| "movups %%xmm14, 0x80(%0)\n\t" \ | |||||
| "movups %%xmm15, 0x90(%0)\n\t" \ | |||||
| :: "r"(mem) : "memory") | |||||
| #define testxmmclobbers(func, ctx, ...) \ | |||||
| uint64_t xmm[2][10][2]; \ | |||||
| int ret; \ | |||||
| storexmmregs(xmm[0]); \ | |||||
| ret = __real_ ## func(ctx, __VA_ARGS__); \ | |||||
| storexmmregs(xmm[1]); \ | |||||
| if (memcmp(xmm[0], xmm[1], sizeof(xmm[0]))) { \ | |||||
| int i; \ | |||||
| av_log(ctx, AV_LOG_ERROR, \ | |||||
| "XMM REGS CLOBBERED IN %s!\n", #func); \ | |||||
| for (i = 0; i < 10; i ++) \ | |||||
| if (xmm[0][i][0] != xmm[1][i][0] || \ | |||||
| xmm[0][i][1] != xmm[1][i][1]) { \ | |||||
| av_log(ctx, AV_LOG_ERROR, \ | |||||
| "xmm%-2d = %016"PRIx64"%016"PRIx64"\n", \ | |||||
| 6 + i, av_bswap64(xmm[0][i][0]), \ | |||||
| av_bswap64(xmm[0][i][1])); \ | |||||
| av_log(ctx, AV_LOG_ERROR, \ | |||||
| " -> %016"PRIx64"%016"PRIx64"\n", \ | |||||
| av_bswap64(xmm[1][i][0]), \ | |||||
| av_bswap64(xmm[1][i][1])); \ | |||||
| } \ | |||||
| abort(); \ | |||||
| } \ | |||||
| return ret | |||||
| #define wrap(func) \ | |||||
| int __real_ ## func; \ | |||||
| int __wrap_ ## func; \ | |||||
| int __wrap_ ## func | |||||
| @@ -25,6 +25,8 @@ MMX-OBJS-$(HAVE_YASM) += x86/input.o \ | |||||
| $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) | $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) | ||||
| OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o | |||||
| TESTPROGS = colorspace swscale | TESTPROGS = colorspace swscale | ||||
| DIRS = bfin mlib ppc sparc x86 | DIRS = bfin mlib ppc sparc x86 | ||||
| @@ -30,11 +30,11 @@ and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. | |||||
| The following calculation is used for the conversion: | The following calculation is used for the conversion: | ||||
| r = clipz((y-oy)*cy + crv*(v-128)) | |||||
| g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128)) | |||||
| b = clipz((y-oy)*cy + cbu*(u-128)) | |||||
| r = clipz((y - oy) * cy + crv * (v - 128)) | |||||
| g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) | |||||
| b = clipz((y - oy) * cy + cbu * (u - 128)) | |||||
| y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision. | |||||
| y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision. | |||||
| New factorization to eliminate the truncation error which was | New factorization to eliminate the truncation error which was | ||||
| @@ -47,7 +47,7 @@ occurring due to the byteop3p. | |||||
| 2) Scale operands up by a factor of 4 not 8 because Blackfin | 2) Scale operands up by a factor of 4 not 8 because Blackfin | ||||
| multiplies include a shift. | multiplies include a shift. | ||||
| 3) Compute into the accumulators cy*yx0, cy*yx1. | |||||
| 3) Compute into the accumulators cy * yx0, cy * yx1. | |||||
| 4) Compute each of the linear equations: | 4) Compute each of the linear equations: | ||||
| r = clipz((y - oy) * cy + crv * (v - 128)) | r = clipz((y - oy) * cy + crv * (v - 128)) | ||||
| @@ -73,7 +73,7 @@ occurring due to the byteop3p. | |||||
| Where coeffs have the following layout in memory. | Where coeffs have the following layout in memory. | ||||
| uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv; | |||||
| uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv; | |||||
| coeffs is a pointer to oy. | coeffs is a pointer to oy. | ||||
| @@ -27,32 +27,34 @@ | |||||
| #include <assert.h> | #include <assert.h> | ||||
| #include "config.h" | #include "config.h" | ||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #include "libswscale/rgb2rgb.h" | #include "libswscale/rgb2rgb.h" | ||||
| #include "libswscale/swscale.h" | #include "libswscale/swscale.h" | ||||
| #include "libswscale/swscale_internal.h" | #include "libswscale/swscale_internal.h" | ||||
| #if defined (__FDPIC__) && CONFIG_SRAM | #if defined (__FDPIC__) && CONFIG_SRAM | ||||
| #define L1CODE __attribute__ ((l1_text)) | |||||
| #define L1CODE __attribute__((l1_text)) | |||||
| #else | #else | ||||
| #define L1CODE | #define L1CODE | ||||
| #endif | #endif | ||||
| int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||||
| int width, int height, | |||||
| int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, | |||||
| uint8_t *vdst, int width, int height, | |||||
| int lumStride, int chromStride, int srcStride) L1CODE; | int lumStride, int chromStride, int srcStride) L1CODE; | ||||
| int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, | |||||
| int width, int height, | |||||
| int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, | |||||
| uint8_t *vdst, int width, int height, | |||||
| int lumStride, int chromStride, int srcStride) L1CODE; | int lumStride, int chromStride, int srcStride) L1CODE; | ||||
| static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||||
| int srcSliceH, uint8_t* dst[], int dstStride[]) | |||||
| static int uyvytoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[], | |||||
| int srcSliceY, int srcSliceH, uint8_t *dst[], | |||||
| int dstStride[]) | |||||
| { | { | ||||
| uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY; | |||||
| uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2; | |||||
| uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2; | |||||
| uint8_t *ip = src[0] + srcStride[0]*srcSliceY; | |||||
| int w = dstStride[0]; | |||||
| uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; | |||||
| uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; | |||||
| uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; | |||||
| uint8_t *ip = src[0] + srcStride[0] * srcSliceY; | |||||
| int w = dstStride[0]; | |||||
| ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH, | ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH, | ||||
| dstStride[0], dstStride[1], srcStride[0]); | dstStride[0], dstStride[1], srcStride[0]); | ||||
| @@ -60,14 +62,15 @@ static int uyvytoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i | |||||
| return srcSliceH; | return srcSliceH; | ||||
| } | } | ||||
| static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, | |||||
| int srcSliceH, uint8_t* dst[], int dstStride[]) | |||||
| static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t *src[], int srcStride[], | |||||
| int srcSliceY, int srcSliceH, uint8_t *dst[], | |||||
| int dstStride[]) | |||||
| { | { | ||||
| uint8_t *dsty = dst[0] + dstStride[0]*srcSliceY; | |||||
| uint8_t *dstu = dst[1] + dstStride[1]*srcSliceY/2; | |||||
| uint8_t *dstv = dst[2] + dstStride[2]*srcSliceY/2; | |||||
| uint8_t *ip = src[0] + srcStride[0]*srcSliceY; | |||||
| int w = dstStride[0]; | |||||
| uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; | |||||
| uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; | |||||
| uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; | |||||
| uint8_t *ip = src[0] + srcStride[0] * srcSliceY; | |||||
| int w = dstStride[0]; | |||||
| ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH, | ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH, | ||||
| dstStride[0], dstStride[1], srcStride[0]); | dstStride[0], dstStride[1], srcStride[0]); | ||||
| @@ -75,15 +78,16 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i | |||||
| return srcSliceH; | return srcSliceH; | ||||
| } | } | ||||
| void ff_bfin_get_unscaled_swscale(SwsContext *c) | void ff_bfin_get_unscaled_swscale(SwsContext *c) | ||||
| { | { | ||||
| if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) { | if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) { | ||||
| av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); | |||||
| av_log(NULL, AV_LOG_VERBOSE, | |||||
| "selecting Blackfin optimized uyvytoyv12_unscaled\n"); | |||||
| c->swScale = uyvytoyv12_unscaled; | c->swScale = uyvytoyv12_unscaled; | ||||
| } | } | ||||
| if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) { | if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) { | ||||
| av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); | |||||
| av_log(NULL, AV_LOG_VERBOSE, | |||||
| "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); | |||||
| c->swScale = yuyvtoyv12_unscaled; | c->swScale = yuyvtoyv12_unscaled; | ||||
| } | } | ||||
| } | } | ||||
| @@ -26,15 +26,16 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include <inttypes.h> | #include <inttypes.h> | ||||
| #include <assert.h> | #include <assert.h> | ||||
| #include "config.h" | |||||
| #include <unistd.h> | #include <unistd.h> | ||||
| #include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
| #include "config.h" | |||||
| #include "libswscale/rgb2rgb.h" | #include "libswscale/rgb2rgb.h" | ||||
| #include "libswscale/swscale.h" | #include "libswscale/swscale.h" | ||||
| #include "libswscale/swscale_internal.h" | #include "libswscale/swscale_internal.h" | ||||
| #if defined(__FDPIC__) && CONFIG_SRAM | #if defined(__FDPIC__) && CONFIG_SRAM | ||||
| #define L1CODE __attribute__ ((l1_text)) | |||||
| #define L1CODE __attribute__((l1_text)) | |||||
| #else | #else | ||||
| #define L1CODE | #define L1CODE | ||||
| #endif | #endif | ||||
| @@ -48,21 +49,20 @@ void ff_bfin_yuv2rgb565_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, | |||||
| void ff_bfin_yuv2rgb24_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, | void ff_bfin_yuv2rgb24_line(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, | ||||
| int w, uint32_t *coeffs) L1CODE; | int w, uint32_t *coeffs) L1CODE; | ||||
| typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, | |||||
| int w, uint32_t *coeffs); | |||||
| typedef void (*ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out, | |||||
| int w, uint32_t *coeffs); | |||||
| static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) | static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) | ||||
| { | { | ||||
| int oy; | int oy; | ||||
| oy = c->yOffset&0xffff; | |||||
| oy = oy >> 3; // keep everything U8.0 for offset calculation | |||||
| oy = c->yOffset & 0xffff; | |||||
| oy = oy >> 3; // keep everything U8.0 for offset calculation | |||||
| c->oc = 128*0x01010101U; | |||||
| c->oy = oy*0x01010101U; | |||||
| c->oc = 128 * 0x01010101U; | |||||
| c->oy = oy * 0x01010101U; | |||||
| /* copy 64bit vector coeffs down to 32bit vector coeffs */ | /* copy 64bit vector coeffs down to 32bit vector coeffs */ | ||||
| c->cy = c->yCoeff; | |||||
| c->cy = c->yCoeff; | |||||
| c->zero = 0; | c->zero = 0; | ||||
| if (rgb) { | if (rgb) { | ||||
| @@ -77,7 +77,6 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) | |||||
| c->cgv = c->ugCoeff; | c->cgv = c->ugCoeff; | ||||
| } | } | ||||
| if (masks == 555) { | if (masks == 555) { | ||||
| c->rmask = 0x001f * 0x00010001U; | c->rmask = 0x001f * 0x00010001U; | ||||
| c->gmask = 0x03e0 * 0x00010001U; | c->gmask = 0x03e0 * 0x00010001U; | ||||
| @@ -89,27 +88,25 @@ static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) | |||||
| } | } | ||||
| } | } | ||||
| static int core_yuv420_rgb(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | |||||
| uint8_t **oplanes, int *outstrides, | |||||
| ltransform lcscf, int rgb, int masks) | |||||
| static int core_yuv420_rgb(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, uint8_t **oplanes, | |||||
| int *outstrides, ltransform lcscf, | |||||
| int rgb, int masks) | |||||
| { | { | ||||
| uint8_t *py,*pu,*pv,*op; | |||||
| uint8_t *py, *pu, *pv, *op; | |||||
| int w = instrides[0]; | int w = instrides[0]; | ||||
| int h2 = srcSliceH>>1; | |||||
| int h2 = srcSliceH >> 1; | |||||
| int i; | int i; | ||||
| bfin_prepare_coefficients(c, rgb, masks); | bfin_prepare_coefficients(c, rgb, masks); | ||||
| py = in[0]; | py = in[0]; | ||||
| pu = in[1+(1^rgb)]; | |||||
| pv = in[1+(0^rgb)]; | |||||
| op = oplanes[0] + srcSliceY*outstrides[0]; | |||||
| pu = in[1 + (1 ^ rgb)]; | |||||
| pv = in[1 + (0 ^ rgb)]; | |||||
| for (i=0;i<h2;i++) { | |||||
| op = oplanes[0] + srcSliceY * outstrides[0]; | |||||
| for (i = 0; i < h2; i++) { | |||||
| lcscf(py, pu, pv, op, w, &c->oy); | lcscf(py, pu, pv, op, w, &c->oy); | ||||
| py += instrides[0]; | py += instrides[0]; | ||||
| @@ -126,9 +123,7 @@ static int core_yuv420_rgb(SwsContext *c, | |||||
| return srcSliceH; | return srcSliceH; | ||||
| } | } | ||||
| static int bfin_yuv420_rgb555(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_rgb555(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -136,8 +131,7 @@ static int bfin_yuv420_rgb555(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb555_line, 1, 555); | outstrides, ff_bfin_yuv2rgb555_line, 1, 555); | ||||
| } | } | ||||
| static int bfin_yuv420_bgr555(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_bgr555(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -145,8 +139,7 @@ static int bfin_yuv420_bgr555(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb555_line, 0, 555); | outstrides, ff_bfin_yuv2rgb555_line, 0, 555); | ||||
| } | } | ||||
| static int bfin_yuv420_rgb24(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_rgb24(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -154,8 +147,7 @@ static int bfin_yuv420_rgb24(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb24_line, 1, 888); | outstrides, ff_bfin_yuv2rgb24_line, 1, 888); | ||||
| } | } | ||||
| static int bfin_yuv420_bgr24(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_bgr24(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -163,8 +155,7 @@ static int bfin_yuv420_bgr24(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb24_line, 0, 888); | outstrides, ff_bfin_yuv2rgb24_line, 0, 888); | ||||
| } | } | ||||
| static int bfin_yuv420_rgb565(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_rgb565(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -172,8 +163,7 @@ static int bfin_yuv420_rgb565(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb565_line, 1, 565); | outstrides, ff_bfin_yuv2rgb565_line, 1, 565); | ||||
| } | } | ||||
| static int bfin_yuv420_bgr565(SwsContext *c, | |||||
| uint8_t **in, int *instrides, | |||||
| static int bfin_yuv420_bgr565(SwsContext *c, uint8_t **in, int *instrides, | |||||
| int srcSliceY, int srcSliceH, | int srcSliceY, int srcSliceH, | ||||
| uint8_t **oplanes, int *outstrides) | uint8_t **oplanes, int *outstrides) | ||||
| { | { | ||||
| @@ -181,18 +171,29 @@ static int bfin_yuv420_bgr565(SwsContext *c, | |||||
| outstrides, ff_bfin_yuv2rgb565_line, 0, 565); | outstrides, ff_bfin_yuv2rgb565_line, 0, 565); | ||||
| } | } | ||||
| SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) | SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) | ||||
| { | { | ||||
| SwsFunc f; | SwsFunc f; | ||||
| switch(c->dstFormat) { | |||||
| case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break; | |||||
| case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break; | |||||
| case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break; | |||||
| case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break; | |||||
| case PIX_FMT_RGB24: f = bfin_yuv420_rgb24; break; | |||||
| case PIX_FMT_BGR24: f = bfin_yuv420_bgr24; break; | |||||
| switch (c->dstFormat) { | |||||
| case PIX_FMT_RGB555: | |||||
| f = bfin_yuv420_rgb555; | |||||
| break; | |||||
| case PIX_FMT_BGR555: | |||||
| f = bfin_yuv420_bgr555; | |||||
| break; | |||||
| case PIX_FMT_RGB565: | |||||
| f = bfin_yuv420_rgb565; | |||||
| break; | |||||
| case PIX_FMT_BGR565: | |||||
| f = bfin_yuv420_bgr565; | |||||
| break; | |||||
| case PIX_FMT_RGB24: | |||||
| f = bfin_yuv420_rgb24; | |||||
| break; | |||||
| case PIX_FMT_BGR24: | |||||
| f = bfin_yuv420_bgr24; | |||||
| break; | |||||
| default: | default: | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -51,6 +51,19 @@ bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV | |||||
| rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV | rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV | ||||
| rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV | rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV | ||||
| rgba_Ycoeff_rb: times 4 dw RY, BY | |||||
| rgba_Ycoeff_br: times 4 dw BY, RY | |||||
| rgba_Ycoeff_ga: times 4 dw GY, 0 | |||||
| rgba_Ycoeff_ag: times 4 dw 0, GY | |||||
| rgba_Ucoeff_rb: times 4 dw RU, BU | |||||
| rgba_Ucoeff_br: times 4 dw BU, RU | |||||
| rgba_Ucoeff_ga: times 4 dw GU, 0 | |||||
| rgba_Ucoeff_ag: times 4 dw 0, GU | |||||
| rgba_Vcoeff_rb: times 4 dw RV, BV | |||||
| rgba_Vcoeff_br: times 4 dw BV, RV | |||||
| rgba_Vcoeff_ga: times 4 dw GV, 0 | |||||
| rgba_Vcoeff_ag: times 4 dw 0, GV | |||||
| shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ | shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \ | ||||
| 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 | 6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80 | ||||
| shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \ | shuf_rgb_3x56: db 2, 0x80, 3, 0x80, 4, 0x80, 5, 0x80, \ | ||||
| @@ -294,6 +307,150 @@ RGB24_FUNCS 11, 13 | |||||
| INIT_XMM avx | INIT_XMM avx | ||||
| RGB24_FUNCS 11, 13 | RGB24_FUNCS 11, 13 | ||||
| ; %1 = nr. of XMM registers | |||||
| ; %2-5 = rgba, bgra, argb or abgr (in individual characters) | |||||
| %macro RGB32_TO_Y_FN 5-6 | |||||
| cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 | |||||
| mova m5, [rgba_Ycoeff_%2%4] | |||||
| mova m6, [rgba_Ycoeff_%3%5] | |||||
| %if %0 == 6 | |||||
| jmp mangle(program_name %+ _ %+ %6 %+ ToY %+ SUFFIX).body | |||||
| %else ; %0 == 6 | |||||
| .body: | |||||
| %if ARCH_X86_64 | |||||
| movsxd wq, wd | |||||
| %endif | |||||
| lea srcq, [srcq+wq*4] | |||||
| add wq, wq | |||||
| add dstq, wq | |||||
| neg wq | |||||
| mova m4, [rgb_Yrnd] | |||||
| pcmpeqb m7, m7 | |||||
| psrlw m7, 8 ; (word) { 0x00ff } x4 | |||||
| .loop: | |||||
| ; FIXME check alignment and use mova | |||||
| movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] | |||||
| movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] | |||||
| DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] | |||||
| pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] | |||||
| pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] | |||||
| pmaddwd m3, m5 ; (dword) { Bx*BY + Rx*RY }[4-7] | |||||
| pmaddwd m2, m6 ; (dword) { Gx*GY }[4-7] | |||||
| paddd m0, m4 ; += rgb_Yrnd | |||||
| paddd m2, m4 ; += rgb_Yrnd | |||||
| paddd m0, m1 ; (dword) { Y[0-3] } | |||||
| paddd m2, m3 ; (dword) { Y[4-7] } | |||||
| psrad m0, 9 | |||||
| psrad m2, 9 | |||||
| packssdw m0, m2 ; (word) { Y[0-7] } | |||||
| mova [dstq+wq], m0 | |||||
| add wq, mmsize | |||||
| jl .loop | |||||
| REP_RET | |||||
| %endif ; %0 == 3 | |||||
| %endmacro | |||||
| ; %1 = nr. of XMM registers | |||||
| ; %2-5 = rgba, bgra, argb or abgr (in individual characters) | |||||
| %macro RGB32_TO_UV_FN 5-6 | |||||
| cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 | |||||
| %if ARCH_X86_64 | |||||
| mova m8, [rgba_Ucoeff_%2%4] | |||||
| mova m9, [rgba_Ucoeff_%3%5] | |||||
| mova m10, [rgba_Vcoeff_%2%4] | |||||
| mova m11, [rgba_Vcoeff_%3%5] | |||||
| %define coeffU1 m8 | |||||
| %define coeffU2 m9 | |||||
| %define coeffV1 m10 | |||||
| %define coeffV2 m11 | |||||
| %else ; x86-32 | |||||
| %define coeffU1 [rgba_Ucoeff_%2%4] | |||||
| %define coeffU2 [rgba_Ucoeff_%3%5] | |||||
| %define coeffV1 [rgba_Vcoeff_%2%4] | |||||
| %define coeffV2 [rgba_Vcoeff_%3%5] | |||||
| %endif ; x86-64/32 | |||||
| %if ARCH_X86_64 && %0 == 6 | |||||
| jmp mangle(program_name %+ _ %+ %6 %+ ToUV %+ SUFFIX).body | |||||
| %else ; ARCH_X86_64 && %0 == 6 | |||||
| .body: | |||||
| %if ARCH_X86_64 | |||||
| movsxd wq, dword r5m | |||||
| %else ; x86-32 | |||||
| mov wq, r5m | |||||
| %endif | |||||
| add wq, wq | |||||
| add dstUq, wq | |||||
| add dstVq, wq | |||||
| lea srcq, [srcq+wq*2] | |||||
| neg wq | |||||
| pcmpeqb m7, m7 | |||||
| psrlw m7, 8 ; (word) { 0x00ff } x4 | |||||
| mova m6, [rgb_UVrnd] | |||||
| .loop: | |||||
| ; FIXME check alignment and use mova | |||||
| movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] | |||||
| movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] | |||||
| DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] | |||||
| pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] | |||||
| pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] | |||||
| pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3] | |||||
| pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3] | |||||
| paddd m3, m6 ; += rgb_UVrnd | |||||
| paddd m1, m6 ; += rgb_UVrnd | |||||
| paddd m2, m3 ; (dword) { V[0-3] } | |||||
| paddd m0, m1 ; (dword) { U[0-3] } | |||||
| pmaddwd m3, m5, coeffV1 ; (dword) { Bx*BV + Rx*RV }[4-7] | |||||
| pmaddwd m1, m4, coeffV2 ; (dword) { Gx*GV }[4-7] | |||||
| pmaddwd m5, coeffU1 ; (dword) { Bx*BU + Rx*RU }[4-7] | |||||
| pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] | |||||
| paddd m3, m6 ; += rgb_UVrnd | |||||
| paddd m5, m6 ; += rgb_UVrnd | |||||
| psrad m0, 9 | |||||
| paddd m1, m3 ; (dword) { V[4-7] } | |||||
| paddd m4, m5 ; (dword) { U[4-7] } | |||||
| psrad m2, 9 | |||||
| psrad m4, 9 | |||||
| psrad m1, 9 | |||||
| packssdw m0, m4 ; (word) { U[0-7] } | |||||
| packssdw m2, m1 ; (word) { V[0-7] } | |||||
| %if mmsize == 8 | |||||
| mova [dstUq+wq], m0 | |||||
| mova [dstVq+wq], m2 | |||||
| %else ; mmsize == 16 | |||||
| mova [dstUq+wq], m0 | |||||
| mova [dstVq+wq], m2 | |||||
| %endif ; mmsize == 8/16 | |||||
| add wq, mmsize | |||||
| jl .loop | |||||
| REP_RET | |||||
| %endif ; ARCH_X86_64 && %0 == 3 | |||||
| %endmacro | |||||
| ; %1 = nr. of XMM registers for rgb-to-Y func | |||||
| ; %2 = nr. of XMM registers for rgb-to-UV func | |||||
| %macro RGB32_FUNCS 2 | |||||
| RGB32_TO_Y_FN %1, r, g, b, a | |||||
| RGB32_TO_Y_FN %1, b, g, r, a, rgba | |||||
| RGB32_TO_Y_FN %1, a, r, g, b, rgba | |||||
| RGB32_TO_Y_FN %1, a, b, g, r, rgba | |||||
| RGB32_TO_UV_FN %2, r, g, b, a | |||||
| RGB32_TO_UV_FN %2, b, g, r, a, rgba | |||||
| RGB32_TO_UV_FN %2, a, r, g, b, rgba | |||||
| RGB32_TO_UV_FN %2, a, b, g, r, rgba | |||||
| %endmacro | |||||
| %if ARCH_X86_32 | |||||
| INIT_MMX mmx | |||||
| RGB32_FUNCS 0, 0 | |||||
| %endif | |||||
| INIT_XMM sse2 | |||||
| RGB32_FUNCS 8, 12 | |||||
| INIT_XMM avx | |||||
| RGB32_FUNCS 8, 12 | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; YUYV/UYVY/NV12/NV21 packed pixel shuffling. | ; YUYV/UYVY/NV12/NV21 packed pixel shuffling. | ||||
| ; | ; | ||||
| @@ -308,6 +308,10 @@ extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \ | |||||
| INPUT_FUNC(yuyv, opt); \ | INPUT_FUNC(yuyv, opt); \ | ||||
| INPUT_UV_FUNC(nv12, opt); \ | INPUT_UV_FUNC(nv12, opt); \ | ||||
| INPUT_UV_FUNC(nv21, opt); \ | INPUT_UV_FUNC(nv21, opt); \ | ||||
| INPUT_FUNC(rgba, opt); \ | |||||
| INPUT_FUNC(bgra, opt); \ | |||||
| INPUT_FUNC(argb, opt); \ | |||||
| INPUT_FUNC(abgr, opt); \ | |||||
| INPUT_FUNC(rgb24, opt); \ | INPUT_FUNC(rgb24, opt); \ | ||||
| INPUT_FUNC(bgr24, opt) | INPUT_FUNC(bgr24, opt) | ||||
| @@ -406,6 +410,10 @@ switch(c->dstBpc){ \ | |||||
| break; | break; | ||||
| case_rgb(rgb24, RGB24, mmx); | case_rgb(rgb24, RGB24, mmx); | ||||
| case_rgb(bgr24, BGR24, mmx); | case_rgb(bgr24, BGR24, mmx); | ||||
| case_rgb(bgra, BGRA, mmx); | |||||
| case_rgb(rgba, RGBA, mmx); | |||||
| case_rgb(abgr, ABGR, mmx); | |||||
| case_rgb(argb, ARGB, mmx); | |||||
| default: | default: | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -450,6 +458,10 @@ switch(c->dstBpc){ \ | |||||
| break; | break; | ||||
| case_rgb(rgb24, RGB24, sse2); | case_rgb(rgb24, RGB24, sse2); | ||||
| case_rgb(bgr24, BGR24, sse2); | case_rgb(bgr24, BGR24, sse2); | ||||
| case_rgb(bgra, BGRA, sse2); | |||||
| case_rgb(rgba, RGBA, sse2); | |||||
| case_rgb(abgr, ABGR, sse2); | |||||
| case_rgb(argb, ARGB, sse2); | |||||
| default: | default: | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -493,6 +505,10 @@ switch(c->dstBpc){ \ | |||||
| break; | break; | ||||
| case_rgb(rgb24, RGB24, avx); | case_rgb(rgb24, RGB24, avx); | ||||
| case_rgb(bgr24, BGR24, avx); | case_rgb(bgr24, BGR24, avx); | ||||
| case_rgb(bgra, BGRA, avx); | |||||
| case_rgb(rgba, RGBA, avx); | |||||
| case_rgb(abgr, ABGR, avx); | |||||
| case_rgb(argb, ARGB, avx); | |||||
| default: | default: | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -0,0 +1,31 @@ | |||||
| /* | |||||
| * check XMM registers for clobbers on Win64 | |||||
| * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/x86/w64xmmtest.h" | |||||
| #include "libswscale/swscale.h" | |||||
| wrap(sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], | |||||
| const int srcStride[], int srcSliceY, int srcSliceH, | |||||
| uint8_t *const dst[], const int dstStride[])) | |||||
| { | |||||
| testxmmclobbers(sws_scale, c, srcSlice, srcStride, srcSliceY, | |||||
| srcSliceH, dst, dstStride); | |||||
| } | |||||
| @@ -368,7 +368,7 @@ $tiny_psnr $pcm_dst $pcm_ref 2 1924 | |||||
| fi | fi | ||||
| if [ -n "$do_ac3_fixed" ] ; then | if [ -n "$do_ac3_fixed" ] ; then | ||||
| do_audio_encoding ac3.rm "-vn -acodec ac3_fixed" | |||||
| do_audio_encoding ac3.ac3 "-vn -acodec ac3_fixed" | |||||
| # binaries configured with --disable-sse decode ac3 differently | # binaries configured with --disable-sse decode ac3 differently | ||||
| #do_audio_decoding | #do_audio_decoding | ||||
| #$tiny_psnr $pcm_dst $pcm_ref 2 1024 | #$tiny_psnr $pcm_dst $pcm_ref 2 1024 | ||||
| @@ -1,2 +1,2 @@ | |||||
| e7fa185030a56d9db8663ad9e38c6c94 *./tests/data/acodec/ac3.rm | |||||
| 98751 ./tests/data/acodec/ac3.rm | |||||
| a1d1fc116463b771abf5aef7ed37d7b1 *./tests/data/acodec/ac3.ac3 | |||||
| 96408 ./tests/data/acodec/ac3.ac3 | |||||
| @@ -117,4 +117,3 @@ | |||||
| 0, 438750, 37440, 0xf0fe8c1c | 0, 438750, 37440, 0xf0fe8c1c | ||||
| 0, 442500, 37440, 0xc0036222 | 0, 442500, 37440, 0xc0036222 | ||||
| 0, 446250, 37440, 0x3058385c | 0, 446250, 37440, 0x3058385c | ||||
| 0, 450000, 37440, 0x68141016 | |||||
| @@ -0,0 +1,49 @@ | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 | |||||
| ret: 0 st:-1 flags:0 ts:-1.000000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 | |||||
| ret: 0 st:-1 flags:1 ts: 1.894167 | |||||
| ret: 0 st: 0 flags:1 dts: 1.880400 pts: 1.880400 pos: 30092 size: 558 | |||||
| ret: 0 st: 0 flags:0 ts: 0.788333 | |||||
| ret: 0 st: 0 flags:1 dts: 0.800911 pts: 0.800911 pos: 12818 size: 556 | |||||
| ret:-1 st: 0 flags:1 ts:-0.317500 | |||||
| ret: 0 st:-1 flags:0 ts: 2.576668 | |||||
| ret: 0 st: 0 flags:1 dts: 2.576844 pts: 2.576844 pos: 41238 size: 558 | |||||
| ret: 0 st:-1 flags:1 ts: 1.470835 | |||||
| ret: 0 st: 0 flags:1 dts: 1.462533 pts: 1.462533 pos: 23406 size: 556 | |||||
| ret: 0 st: 0 flags:0 ts: 0.365000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.383044 pts: 0.383044 pos: 6130 size: 558 | |||||
| ret:-1 st: 0 flags:1 ts:-0.740833 | |||||
| ret: 0 st:-1 flags:0 ts: 2.153336 | |||||
| ret: 0 st: 0 flags:1 dts: 2.158978 pts: 2.158978 pos: 34552 size: 556 | |||||
| ret: 0 st:-1 flags:1 ts: 1.047503 | |||||
| ret: 0 st: 0 flags:1 dts: 1.044667 pts: 1.044667 pos: 16718 size: 558 | |||||
| ret: 0 st: 0 flags:0 ts:-0.058333 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 | |||||
| ret: 0 st: 0 flags:1 ts: 2.835833 | |||||
| ret: 0 st: 0 flags:1 dts: 2.820600 pts: 2.820600 pos: 45140 size: 556 | |||||
| ret: 0 st:-1 flags:0 ts: 1.730004 | |||||
| ret: 0 st: 0 flags:1 dts: 1.741111 pts: 1.741111 pos: 27864 size: 556 | |||||
| ret: 0 st:-1 flags:1 ts: 0.624171 | |||||
| ret: 0 st: 0 flags:1 dts: 0.591978 pts: 0.591978 pos: 9474 size: 556 | |||||
| ret: 0 st: 0 flags:0 ts:-0.481667 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 | |||||
| ret: 0 st: 0 flags:1 ts: 2.412500 | |||||
| ret: 0 st: 0 flags:1 dts: 2.402733 pts: 2.402733 pos: 38452 size: 558 | |||||
| ret: 0 st:-1 flags:0 ts: 1.306672 | |||||
| ret: 0 st: 0 flags:1 dts: 1.323244 pts: 1.323244 pos: 21176 size: 558 | |||||
| ret: 0 st:-1 flags:1 ts: 0.200839 | |||||
| ret: 0 st: 0 flags:1 dts: 0.174111 pts: 0.174111 pos: 2786 size: 558 | |||||
| ret: 0 st: 0 flags:0 ts:-0.904989 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 0 size: 556 | |||||
| ret: 0 st: 0 flags:1 ts: 1.989178 | |||||
| ret: 0 st: 0 flags:1 dts: 1.984867 pts: 1.984867 pos: 31764 size: 558 | |||||
| ret: 0 st:-1 flags:0 ts: 0.883340 | |||||
| ret: 0 st: 0 flags:1 dts: 0.905378 pts: 0.905378 pos: 14488 size: 558 | |||||
| ret:-1 st:-1 flags:1 ts:-0.222493 | |||||
| ret: 0 st: 0 flags:0 ts: 2.671678 | |||||
| ret: 0 st: 0 flags:1 dts: 2.681311 pts: 2.681311 pos: 42910 size: 558 | |||||
| ret: 0 st: 0 flags:1 ts: 1.565844 | |||||
| ret: 0 st: 0 flags:1 dts: 1.532178 pts: 1.532178 pos: 24520 size: 558 | |||||
| ret: 0 st:-1 flags:0 ts: 0.460008 | |||||
| ret: 0 st: 0 flags:1 dts: 0.487511 pts: 0.487511 pos: 7802 size: 556 | |||||
| ret:-1 st:-1 flags:1 ts:-0.645825 | |||||
| @@ -1,41 +0,0 @@ | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret: 0 st:-1 flags:0 ts:-1.000000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret:-1 st:-1 flags:1 ts: 1.894167 | |||||
| ret:-1 st: 0 flags:0 ts: 0.788000 | |||||
| ret: 0 st: 0 flags:1 ts:-0.317000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret: 0 st:-1 flags:0 ts: 2.576668 | |||||
| ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558 | |||||
| ret:-1 st:-1 flags:1 ts: 1.470835 | |||||
| ret:-1 st: 0 flags:0 ts: 0.365000 | |||||
| ret: 0 st: 0 flags:1 ts:-0.741000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret: 0 st:-1 flags:0 ts: 2.153336 | |||||
| ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558 | |||||
| ret:-1 st:-1 flags:1 ts: 1.047503 | |||||
| ret: 0 st: 0 flags:0 ts:-0.058000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret: 0 st: 0 flags:1 ts: 2.836000 | |||||
| ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558 | |||||
| ret:-1 st:-1 flags:0 ts: 1.730004 | |||||
| ret:-1 st:-1 flags:1 ts: 0.624171 | |||||
| ret: 0 st: 0 flags:0 ts:-0.482000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret: 0 st: 0 flags:1 ts: 2.413000 | |||||
| ret: 0 st: 0 flags:1 dts: 2.124000 pts: 2.124000 pos: 34997 size: 558 | |||||
| ret: 0 st:-1 flags:0 ts: 1.306672 | |||||
| ret: 0 st: 0 flags:1 dts:65.537000 pts:65.537000 pos: 87488 size: 6132 | |||||
| ret:-1 st:-1 flags:1 ts: 0.200839 | |||||
| ret: 0 st: 0 flags:0 ts:-0.905000 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret:-1 st: 0 flags:1 ts: 1.989000 | |||||
| ret:-1 st:-1 flags:0 ts: 0.883340 | |||||
| ret: 0 st:-1 flags:1 ts:-0.222493 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||
| ret:-1 st: 0 flags:0 ts: 2.672000 | |||||
| ret:-1 st: 0 flags:1 ts: 1.566000 | |||||
| ret: 0 st:-1 flags:0 ts: 0.460008 | |||||
| ret: 0 st: 0 flags:1 dts: 1.567000 pts: 1.567000 pos: 25889 size: 556 | |||||
| ret: 0 st:-1 flags:1 ts:-0.645825 | |||||
| ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 | |||||