|
- ;******************************************************************************
- ;* SIMD-optimized UTVideo functions
- ;* Copyright (c) 2017 Paul B Mahol
- ;*
- ;* This file is part of FFmpeg.
- ;*
- ;* FFmpeg is free software; you can redistribute it and/or
- ;* modify it under the terms of the GNU Lesser General Public
- ;* License as published by the Free Software Foundation; either
- ;* version 2.1 of the License, or (at your option) any later version.
- ;*
- ;* FFmpeg is distributed in the hope that it will be useful,
- ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;* Lesser General Public License for more details.
- ;*
- ;* You should have received a copy of the GNU Lesser General Public
- ;* License along with FFmpeg; if not, write to the Free Software
- ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;******************************************************************************
-
- %include "libavutil/x86/x86util.asm"
-
- SECTION_RODATA
-
- pb_128: times 16 db 128
- pw_512: times 8 dw 512
- pw_1023: times 8 dw 1023
-
- SECTION .text
-
- ; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
- ; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b,
- ; int width, int height)
- %macro RESTORE_RGB_PLANES 0
- cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
- movsxdifnidn wq, wd
- add src_rq, wq
- add src_gq, wq
- add src_bq, wq
- neg wq
- %if ARCH_X86_64 == 0
- mov wm, wq
- DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
- %define wq r6m
- %define hd r7mp
- %endif
- mova m3, [pb_128]
- .nextrow:
- mov xq, wq
-
- .loop:
- mova m0, [src_rq + xq]
- mova m1, [src_gq + xq]
- mova m2, [src_bq + xq]
- psubb m1, m3
- paddb m0, m1
- paddb m2, m1
- mova [src_rq+xq], m0
- mova [src_bq+xq], m2
- add xq, mmsize
- jl .loop
-
- add src_rq, linesize_rq
- add src_gq, linesize_gq
- add src_bq, linesize_bq
- sub hd, 1
- jg .nextrow
- REP_RET
- %endmacro
-
- INIT_XMM sse2
- RESTORE_RGB_PLANES
-
- %macro RESTORE_RGB_PLANES10 0
- cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
- shl wd, 1
- shl linesize_rq, 1
- shl linesize_gq, 1
- shl linesize_bq, 1
- add src_rq, wq
- add src_gq, wq
- add src_bq, wq
- mova m3, [pw_512]
- mova m4, [pw_1023]
- neg wq
- %if ARCH_X86_64 == 0
- mov wm, wq
- DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
- %define wq r6m
- %define hd r7mp
- %endif
- .nextrow:
- mov xq, wq
-
- .loop:
- mova m0, [src_rq + xq]
- mova m1, [src_gq + xq]
- mova m2, [src_bq + xq]
- psubw m1, m3
- paddw m0, m1
- paddw m2, m1
- pand m0, m4
- pand m2, m4
- mova [src_rq+xq], m0
- mova [src_bq+xq], m2
- add xq, mmsize
- jl .loop
-
- add src_rq, linesize_rq
- add src_gq, linesize_gq
- add src_bq, linesize_bq
- sub hd, 1
- jg .nextrow
- REP_RET
- %endmacro
-
- INIT_XMM sse2
- RESTORE_RGB_PLANES10
|