Signed-off-by: Martin Storsjö <martin@martin.st>tags/n2.0
| @@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o \ | |||||
| alpha/motion_est_mvi_asm.o \ | alpha/motion_est_mvi_asm.o \ | ||||
| alpha/simple_idct_alpha.o \ | alpha/simple_idct_alpha.o \ | ||||
| OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \ | |||||
| alpha/hpeldsp_alpha_asm.o | |||||
| OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o | OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o | ||||
| @@ -119,196 +119,11 @@ static void clear_blocks_axp(int16_t *blocks) { | |||||
| } while (n); | } while (n); | ||||
| } | } | ||||
| static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | |||||
| { | |||||
| return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||||
| } | |||||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | |||||
| { | |||||
| return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||||
| } | |||||
| #if 0 | |||||
| /* The XY2 routines basically utilize this scheme, but reuse parts in | |||||
| each iteration. */ | |||||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | |||||
| { | |||||
| uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |||||
| uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |||||
| + (l2 & BYTE_VEC(0x03)) | |||||
| + (l3 & BYTE_VEC(0x03)) | |||||
| + (l4 & BYTE_VEC(0x03)) | |||||
| + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |||||
| return r1 + r2; | |||||
| } | |||||
| #endif | |||||
| #define OP(LOAD, STORE) \ | |||||
| do { \ | |||||
| STORE(LOAD(pixels), block); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } while (--h) | |||||
| #define OP_X2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix1, pix2; \ | |||||
| \ | |||||
| pix1 = LOAD(pixels); \ | |||||
| pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| STORE(AVG2(pix1, pix2), block); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } while (--h) | |||||
| #define OP_Y2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix = LOAD(pixels); \ | |||||
| do { \ | |||||
| uint64_t next_pix; \ | |||||
| \ | |||||
| pixels += line_size; \ | |||||
| next_pix = LOAD(pixels); \ | |||||
| STORE(AVG2(pix, next_pix), block); \ | |||||
| block += line_size; \ | |||||
| pix = next_pix; \ | |||||
| } while (--h); \ | |||||
| } while (0) | |||||
| #define OP_XY2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix1 = LOAD(pixels); \ | |||||
| uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |||||
| + (pix2 & BYTE_VEC(0x03)); \ | |||||
| uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||||
| + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||||
| \ | |||||
| do { \ | |||||
| uint64_t npix1, npix2; \ | |||||
| uint64_t npix_l, npix_h; \ | |||||
| uint64_t avg; \ | |||||
| \ | |||||
| pixels += line_size; \ | |||||
| npix1 = LOAD(pixels); \ | |||||
| npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| npix_l = (npix1 & BYTE_VEC(0x03)) \ | |||||
| + (npix2 & BYTE_VEC(0x03)); \ | |||||
| npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||||
| + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||||
| avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |||||
| + pix_h + npix_h; \ | |||||
| STORE(avg, block); \ | |||||
| \ | |||||
| block += line_size; \ | |||||
| pix_l = npix_l; \ | |||||
| pix_h = npix_h; \ | |||||
| } while (--h); \ | |||||
| } while (0) | |||||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | |||||
| static void OPNAME ## _pixels ## SUFF ## _axp \ | |||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | |||||
| if ((size_t) pixels & 0x7) { \ | |||||
| OPKIND(uldq, STORE); \ | |||||
| } else { \ | |||||
| OPKIND(ldq, STORE); \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static void OPNAME ## _pixels16 ## SUFF ## _axp \ | |||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | |||||
| OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | |||||
| OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | |||||
| } | |||||
| #define PIXOP(OPNAME, STORE) \ | |||||
| MAKE_OP(OPNAME, , OP, STORE) \ | |||||
| MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ | |||||
| MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ | |||||
| MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) | |||||
| /* Rounding primitives. */ | |||||
| #define AVG2 avg2 | |||||
| #define AVG4 avg4 | |||||
| #define AVG4_ROUNDER BYTE_VEC(0x02) | |||||
| #define STORE(l, b) stq(l, b) | |||||
| PIXOP(put, STORE); | |||||
| #undef STORE | |||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||||
| PIXOP(avg, STORE); | |||||
| /* Not rounding primitives. */ | |||||
| #undef AVG2 | |||||
| #undef AVG4 | |||||
| #undef AVG4_ROUNDER | |||||
| #undef STORE | |||||
| #define AVG2 avg2_no_rnd | |||||
| #define AVG4 avg4_no_rnd | |||||
| #define AVG4_ROUNDER BYTE_VEC(0x01) | |||||
| #define STORE(l, b) stq(l, b) | |||||
| PIXOP(put_no_rnd, STORE); | |||||
| #undef STORE | |||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||||
| PIXOP(avg_no_rnd, STORE); | |||||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| ptrdiff_t line_size, int h) | |||||
| { | |||||
| put_pixels_axp_asm(block, pixels, line_size, h); | |||||
| put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | |||||
| } | |||||
| av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) | av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) | ||||
| { | { | ||||
| const int high_bit_depth = avctx->bits_per_raw_sample > 8; | const int high_bit_depth = avctx->bits_per_raw_sample > 8; | ||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| c->put_pixels_tab[0][0] = put_pixels16_axp_asm; | |||||
| c->put_pixels_tab[0][1] = put_pixels16_x2_axp; | |||||
| c->put_pixels_tab[0][2] = put_pixels16_y2_axp; | |||||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; | |||||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; | |||||
| c->avg_pixels_tab[0][0] = avg_pixels16_axp; | |||||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; | |||||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; | |||||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; | |||||
| c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; | |||||
| c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; | |||||
| c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; | |||||
| c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; | |||||
| c->put_pixels_tab[1][0] = put_pixels_axp_asm; | |||||
| c->put_pixels_tab[1][1] = put_pixels_x2_axp; | |||||
| c->put_pixels_tab[1][2] = put_pixels_y2_axp; | |||||
| c->put_pixels_tab[1][3] = put_pixels_xy2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; | |||||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; | |||||
| c->avg_pixels_tab[1][0] = avg_pixels_axp; | |||||
| c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; | |||||
| c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; | |||||
| c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; | |||||
| c->clear_blocks = clear_blocks_axp; | c->clear_blocks = clear_blocks_axp; | ||||
| } | } | ||||
| @@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block); | |||||
| void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); | void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); | ||||
| void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | ||||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| ptrdiff_t line_size, int h); | |||||
| void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
| int line_size); | int line_size); | ||||
| void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
| @@ -26,114 +26,11 @@ | |||||
| #include "regdef.h" | #include "regdef.h" | ||||
| /* Some nicer register names. */ | |||||
| #define ta t10 | |||||
| #define tb t11 | |||||
| #define tc t12 | |||||
| #define td AT | |||||
| /* Danger: these overlap with the argument list and the return value */ | |||||
| #define te a5 | |||||
| #define tf a4 | |||||
| #define tg a3 | |||||
| #define th v0 | |||||
| .set noat | .set noat | ||||
| .set noreorder | .set noreorder | ||||
| .arch pca56 | .arch pca56 | ||||
| .text | .text | ||||
| /************************************************************************ | |||||
| * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| * int line_size, int h) | |||||
| */ | |||||
| .align 6 | |||||
| .globl put_pixels_axp_asm | |||||
| .ent put_pixels_axp_asm | |||||
| put_pixels_axp_asm: | |||||
| .frame sp, 0, ra | |||||
| .prologue 0 | |||||
| and a1, 7, t0 | |||||
| beq t0, $aligned | |||||
| .align 4 | |||||
| $unaligned: | |||||
| ldq_u t0, 0(a1) | |||||
| ldq_u t1, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t2, 0(a1) | |||||
| ldq_u t3, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t4, 0(a1) | |||||
| ldq_u t5, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t6, 0(a1) | |||||
| ldq_u t7, 8(a1) | |||||
| extql t0, a1, t0 | |||||
| addq a1, a2, a1 | |||||
| extqh t1, a1, t1 | |||||
| addq a0, a2, t8 | |||||
| extql t2, a1, t2 | |||||
| addq t8, a2, t9 | |||||
| extqh t3, a1, t3 | |||||
| addq t9, a2, ta | |||||
| extql t4, a1, t4 | |||||
| or t0, t1, t0 | |||||
| extqh t5, a1, t5 | |||||
| or t2, t3, t2 | |||||
| extql t6, a1, t6 | |||||
| or t4, t5, t4 | |||||
| extqh t7, a1, t7 | |||||
| or t6, t7, t6 | |||||
| stq t0, 0(a0) | |||||
| stq t2, 0(t8) | |||||
| stq t4, 0(t9) | |||||
| subq a3, 4, a3 | |||||
| stq t6, 0(ta) | |||||
| addq ta, a2, a0 | |||||
| bne a3, $unaligned | |||||
| ret | |||||
| .align 4 | |||||
| $aligned: | |||||
| ldq t0, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t1, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t2, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t3, 0(a1) | |||||
| addq a0, a2, t4 | |||||
| addq a1, a2, a1 | |||||
| addq t4, a2, t5 | |||||
| subq a3, 4, a3 | |||||
| stq t0, 0(a0) | |||||
| addq t5, a2, t6 | |||||
| stq t1, 0(t4) | |||||
| addq t6, a2, a0 | |||||
| stq t2, 0(t5) | |||||
| stq t3, 0(t6) | |||||
| bne a3, $aligned | |||||
| ret | |||||
| .end put_pixels_axp_asm | |||||
| /************************************************************************ | /************************************************************************ | ||||
| * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | ||||
| * int line_size) | * int line_size) | ||||
| @@ -0,0 +1,213 @@ | |||||
| /* | |||||
| * Alpha optimized DSP utils | |||||
| * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavcodec/hpeldsp.h" | |||||
| #include "hpeldsp_alpha.h" | |||||
| #include "asm.h" | |||||
| static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | |||||
| { | |||||
| return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||||
| } | |||||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | |||||
| { | |||||
| return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||||
| } | |||||
| #if 0 | |||||
| /* The XY2 routines basically utilize this scheme, but reuse parts in | |||||
| each iteration. */ | |||||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | |||||
| { | |||||
| uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |||||
| + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |||||
| uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |||||
| + (l2 & BYTE_VEC(0x03)) | |||||
| + (l3 & BYTE_VEC(0x03)) | |||||
| + (l4 & BYTE_VEC(0x03)) | |||||
| + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |||||
| return r1 + r2; | |||||
| } | |||||
| #endif | |||||
| #define OP(LOAD, STORE) \ | |||||
| do { \ | |||||
| STORE(LOAD(pixels), block); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } while (--h) | |||||
| #define OP_X2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix1, pix2; \ | |||||
| \ | |||||
| pix1 = LOAD(pixels); \ | |||||
| pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| STORE(AVG2(pix1, pix2), block); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } while (--h) | |||||
| #define OP_Y2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix = LOAD(pixels); \ | |||||
| do { \ | |||||
| uint64_t next_pix; \ | |||||
| \ | |||||
| pixels += line_size; \ | |||||
| next_pix = LOAD(pixels); \ | |||||
| STORE(AVG2(pix, next_pix), block); \ | |||||
| block += line_size; \ | |||||
| pix = next_pix; \ | |||||
| } while (--h); \ | |||||
| } while (0) | |||||
| #define OP_XY2(LOAD, STORE) \ | |||||
| do { \ | |||||
| uint64_t pix1 = LOAD(pixels); \ | |||||
| uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |||||
| + (pix2 & BYTE_VEC(0x03)); \ | |||||
| uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||||
| + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||||
| \ | |||||
| do { \ | |||||
| uint64_t npix1, npix2; \ | |||||
| uint64_t npix_l, npix_h; \ | |||||
| uint64_t avg; \ | |||||
| \ | |||||
| pixels += line_size; \ | |||||
| npix1 = LOAD(pixels); \ | |||||
| npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||||
| npix_l = (npix1 & BYTE_VEC(0x03)) \ | |||||
| + (npix2 & BYTE_VEC(0x03)); \ | |||||
| npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||||
| + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||||
| avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |||||
| + pix_h + npix_h; \ | |||||
| STORE(avg, block); \ | |||||
| \ | |||||
| block += line_size; \ | |||||
| pix_l = npix_l; \ | |||||
| pix_h = npix_h; \ | |||||
| } while (--h); \ | |||||
| } while (0) | |||||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | |||||
| static void OPNAME ## _pixels ## SUFF ## _axp \ | |||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | |||||
| if ((size_t) pixels & 0x7) { \ | |||||
| OPKIND(uldq, STORE); \ | |||||
| } else { \ | |||||
| OPKIND(ldq, STORE); \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static void OPNAME ## _pixels16 ## SUFF ## _axp \ | |||||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||||
| ptrdiff_t line_size, int h) \ | |||||
| { \ | |||||
| OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | |||||
| OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | |||||
| } | |||||
| #define PIXOP(OPNAME, STORE) \ | |||||
| MAKE_OP(OPNAME, , OP, STORE) \ | |||||
| MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ | |||||
| MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ | |||||
| MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) | |||||
| /* Rounding primitives. */ | |||||
| #define AVG2 avg2 | |||||
| #define AVG4 avg4 | |||||
| #define AVG4_ROUNDER BYTE_VEC(0x02) | |||||
| #define STORE(l, b) stq(l, b) | |||||
| PIXOP(put, STORE); | |||||
| #undef STORE | |||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||||
| PIXOP(avg, STORE); | |||||
| /* Not rounding primitives. */ | |||||
| #undef AVG2 | |||||
| #undef AVG4 | |||||
| #undef AVG4_ROUNDER | |||||
| #undef STORE | |||||
| #define AVG2 avg2_no_rnd | |||||
| #define AVG4 avg4_no_rnd | |||||
| #define AVG4_ROUNDER BYTE_VEC(0x01) | |||||
| #define STORE(l, b) stq(l, b) | |||||
| PIXOP(put_no_rnd, STORE); | |||||
| #undef STORE | |||||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||||
| PIXOP(avg_no_rnd, STORE); | |||||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| ptrdiff_t line_size, int h) | |||||
| { | |||||
| put_pixels_axp_asm(block, pixels, line_size, h); | |||||
| put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | |||||
| } | |||||
| av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags) | |||||
| { | |||||
| c->put_pixels_tab[0][0] = put_pixels16_axp_asm; | |||||
| c->put_pixels_tab[0][1] = put_pixels16_x2_axp; | |||||
| c->put_pixels_tab[0][2] = put_pixels16_y2_axp; | |||||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; | |||||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; | |||||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; | |||||
| c->avg_pixels_tab[0][0] = avg_pixels16_axp; | |||||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; | |||||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; | |||||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; | |||||
| c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; | |||||
| c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; | |||||
| c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; | |||||
| c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; | |||||
| c->put_pixels_tab[1][0] = put_pixels_axp_asm; | |||||
| c->put_pixels_tab[1][1] = put_pixels_x2_axp; | |||||
| c->put_pixels_tab[1][2] = put_pixels_y2_axp; | |||||
| c->put_pixels_tab[1][3] = put_pixels_xy2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; | |||||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; | |||||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; | |||||
| c->avg_pixels_tab[1][0] = avg_pixels_axp; | |||||
| c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; | |||||
| c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; | |||||
| c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; | |||||
| } | |||||
| @@ -0,0 +1,28 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H | |||||
| #define AVCODEC_ALPHA_HPELDSP_ALPHA_H | |||||
| #include <stdint.h> | |||||
| #include <stddef.h> | |||||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| ptrdiff_t line_size, int h); | |||||
| #endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */ | |||||
| @@ -0,0 +1,124 @@ | |||||
| /* | |||||
| * Alpha optimized DSP utils | |||||
| * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| /* | |||||
| * These functions are scheduled for pca56. They should work | |||||
| * reasonably on ev6, though. | |||||
| */ | |||||
| #include "regdef.h" | |||||
| .set noat | |||||
| .set noreorder | |||||
| .arch pca56 | |||||
| .text | |||||
| /************************************************************************ | |||||
| * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||||
| * int line_size, int h) | |||||
| */ | |||||
| .align 6 | |||||
| .globl put_pixels_axp_asm | |||||
| .ent put_pixels_axp_asm | |||||
| put_pixels_axp_asm: | |||||
| .frame sp, 0, ra | |||||
| .prologue 0 | |||||
| and a1, 7, t0 | |||||
| beq t0, $aligned | |||||
| .align 4 | |||||
| $unaligned: | |||||
| ldq_u t0, 0(a1) | |||||
| ldq_u t1, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t2, 0(a1) | |||||
| ldq_u t3, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t4, 0(a1) | |||||
| ldq_u t5, 8(a1) | |||||
| addq a1, a2, a1 | |||||
| nop | |||||
| ldq_u t6, 0(a1) | |||||
| ldq_u t7, 8(a1) | |||||
| extql t0, a1, t0 | |||||
| addq a1, a2, a1 | |||||
| extqh t1, a1, t1 | |||||
| addq a0, a2, t8 | |||||
| extql t2, a1, t2 | |||||
| addq t8, a2, t9 | |||||
| extqh t3, a1, t3 | |||||
| addq t9, a2, ta | |||||
| extql t4, a1, t4 | |||||
| or t0, t1, t0 | |||||
| extqh t5, a1, t5 | |||||
| or t2, t3, t2 | |||||
| extql t6, a1, t6 | |||||
| or t4, t5, t4 | |||||
| extqh t7, a1, t7 | |||||
| or t6, t7, t6 | |||||
| stq t0, 0(a0) | |||||
| stq t2, 0(t8) | |||||
| stq t4, 0(t9) | |||||
| subq a3, 4, a3 | |||||
| stq t6, 0(ta) | |||||
| addq ta, a2, a0 | |||||
| bne a3, $unaligned | |||||
| ret | |||||
| .align 4 | |||||
| $aligned: | |||||
| ldq t0, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t1, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t2, 0(a1) | |||||
| addq a1, a2, a1 | |||||
| ldq t3, 0(a1) | |||||
| addq a0, a2, t4 | |||||
| addq a1, a2, a1 | |||||
| addq t4, a2, t5 | |||||
| subq a3, 4, a3 | |||||
| stq t0, 0(a0) | |||||
| addq t5, a2, t6 | |||||
| stq t1, 0(t4) | |||||
| addq t6, a2, a0 | |||||
| stq t2, 0(t5) | |||||
| stq t3, 0(t6) | |||||
| bne a3, $aligned | |||||
| ret | |||||
| .end put_pixels_axp_asm | |||||
| @@ -63,4 +63,15 @@ | |||||
| #define sp $30 /* stack pointer */ | #define sp $30 /* stack pointer */ | ||||
| #define zero $31 /* reads as zero, writes are noops */ | #define zero $31 /* reads as zero, writes are noops */ | ||||
| /* Some nicer register names. */ | |||||
| #define ta t10 | |||||
| #define tb t11 | |||||
| #define tc t12 | |||||
| #define td AT | |||||
| /* Danger: these overlap with the argument list and the return value */ | |||||
| #define te a5 | |||||
| #define tf a4 | |||||
| #define tg a3 | |||||
| #define th v0 | |||||
| #endif /* AVCODEC_ALPHA_REGDEF_H */ | #endif /* AVCODEC_ALPHA_REGDEF_H */ | ||||
| @@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) | |||||
| hpel_funcs(avg, [3], 2); | hpel_funcs(avg, [3], 2); | ||||
| hpel_funcs(avg_no_rnd,, 16); | hpel_funcs(avg_no_rnd,, 16); | ||||
| if (ARCH_ALPHA) | |||||
| ff_hpeldsp_init_alpha(c, flags); | |||||
| if (ARCH_ARM) | if (ARCH_ARM) | ||||
| ff_hpeldsp_init_arm(c, flags); | ff_hpeldsp_init_arm(c, flags); | ||||
| if (ARCH_BFIN) | if (ARCH_BFIN) | ||||
| @@ -94,6 +94,7 @@ typedef struct HpelDSPContext { | |||||
| void ff_hpeldsp_init(HpelDSPContext *c, int flags); | void ff_hpeldsp_init(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); | |||||
| void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); | void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); | void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); | ||||
| void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); | void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); | ||||