Signed-off-by: Martin Storsjö <martin@martin.st>tags/n2.0
| @@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o \ | |||
| alpha/motion_est_mvi_asm.o \ | |||
| alpha/simple_idct_alpha.o \ | |||
| OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \ | |||
| alpha/hpeldsp_alpha_asm.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o | |||
| @@ -119,196 +119,11 @@ static void clear_blocks_axp(int16_t *blocks) { | |||
| } while (n); | |||
| } | |||
| static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | |||
| { | |||
| return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||
| } | |||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | |||
| { | |||
| return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||
| } | |||
| #if 0 | |||
| /* The XY2 routines basically utilize this scheme, but reuse parts in | |||
| each iteration. */ | |||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | |||
| { | |||
| uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |||
| uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |||
| + (l2 & BYTE_VEC(0x03)) | |||
| + (l3 & BYTE_VEC(0x03)) | |||
| + (l4 & BYTE_VEC(0x03)) | |||
| + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |||
| return r1 + r2; | |||
| } | |||
| #endif | |||
| #define OP(LOAD, STORE) \ | |||
| do { \ | |||
| STORE(LOAD(pixels), block); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } while (--h) | |||
| #define OP_X2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix1, pix2; \ | |||
| \ | |||
| pix1 = LOAD(pixels); \ | |||
| pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| STORE(AVG2(pix1, pix2), block); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } while (--h) | |||
| #define OP_Y2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix = LOAD(pixels); \ | |||
| do { \ | |||
| uint64_t next_pix; \ | |||
| \ | |||
| pixels += line_size; \ | |||
| next_pix = LOAD(pixels); \ | |||
| STORE(AVG2(pix, next_pix), block); \ | |||
| block += line_size; \ | |||
| pix = next_pix; \ | |||
| } while (--h); \ | |||
| } while (0) | |||
| #define OP_XY2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix1 = LOAD(pixels); \ | |||
| uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |||
| + (pix2 & BYTE_VEC(0x03)); \ | |||
| uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||
| + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||
| \ | |||
| do { \ | |||
| uint64_t npix1, npix2; \ | |||
| uint64_t npix_l, npix_h; \ | |||
| uint64_t avg; \ | |||
| \ | |||
| pixels += line_size; \ | |||
| npix1 = LOAD(pixels); \ | |||
| npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| npix_l = (npix1 & BYTE_VEC(0x03)) \ | |||
| + (npix2 & BYTE_VEC(0x03)); \ | |||
| npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||
| + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||
| avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |||
| + pix_h + npix_h; \ | |||
| STORE(avg, block); \ | |||
| \ | |||
| block += line_size; \ | |||
| pix_l = npix_l; \ | |||
| pix_h = npix_h; \ | |||
| } while (--h); \ | |||
| } while (0) | |||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | |||
| static void OPNAME ## _pixels ## SUFF ## _axp \ | |||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||
| ptrdiff_t line_size, int h) \ | |||
| { \ | |||
| if ((size_t) pixels & 0x7) { \ | |||
| OPKIND(uldq, STORE); \ | |||
| } else { \ | |||
| OPKIND(ldq, STORE); \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static void OPNAME ## _pixels16 ## SUFF ## _axp \ | |||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||
| ptrdiff_t line_size, int h) \ | |||
| { \ | |||
| OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | |||
| OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | |||
| } | |||
| #define PIXOP(OPNAME, STORE) \ | |||
| MAKE_OP(OPNAME, , OP, STORE) \ | |||
| MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ | |||
| MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ | |||
| MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) | |||
| /* Rounding primitives. */ | |||
| #define AVG2 avg2 | |||
| #define AVG4 avg4 | |||
| #define AVG4_ROUNDER BYTE_VEC(0x02) | |||
| #define STORE(l, b) stq(l, b) | |||
| PIXOP(put, STORE); | |||
| #undef STORE | |||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||
| PIXOP(avg, STORE); | |||
| /* Not rounding primitives. */ | |||
| #undef AVG2 | |||
| #undef AVG4 | |||
| #undef AVG4_ROUNDER | |||
| #undef STORE | |||
| #define AVG2 avg2_no_rnd | |||
| #define AVG4 avg4_no_rnd | |||
| #define AVG4_ROUNDER BYTE_VEC(0x01) | |||
| #define STORE(l, b) stq(l, b) | |||
| PIXOP(put_no_rnd, STORE); | |||
| #undef STORE | |||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||
| PIXOP(avg_no_rnd, STORE); | |||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h) | |||
| { | |||
| put_pixels_axp_asm(block, pixels, line_size, h); | |||
| put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | |||
| } | |||
| av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| const int high_bit_depth = avctx->bits_per_raw_sample > 8; | |||
| if (!high_bit_depth) { | |||
| c->put_pixels_tab[0][0] = put_pixels16_axp_asm; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_axp; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_axp; | |||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; | |||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_axp; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; | |||
| c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; | |||
| c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; | |||
| c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; | |||
| c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; | |||
| c->put_pixels_tab[1][0] = put_pixels_axp_asm; | |||
| c->put_pixels_tab[1][1] = put_pixels_x2_axp; | |||
| c->put_pixels_tab[1][2] = put_pixels_y2_axp; | |||
| c->put_pixels_tab[1][3] = put_pixels_xy2_axp; | |||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; | |||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; | |||
| c->avg_pixels_tab[1][0] = avg_pixels_axp; | |||
| c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; | |||
| c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; | |||
| c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; | |||
| c->clear_blocks = clear_blocks_axp; | |||
| } | |||
| @@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block); | |||
| void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block); | |||
| void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block); | |||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | |||
| int line_size); | |||
| void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | |||
| @@ -26,114 +26,11 @@ | |||
| #include "regdef.h" | |||
| /* Some nicer register names. */ | |||
| #define ta t10 | |||
| #define tb t11 | |||
| #define tc t12 | |||
| #define td AT | |||
| /* Danger: these overlap with the argument list and the return value */ | |||
| #define te a5 | |||
| #define tf a4 | |||
| #define tg a3 | |||
| #define th v0 | |||
| .set noat | |||
| .set noreorder | |||
| .arch pca56 | |||
| .text | |||
| /************************************************************************ | |||
| * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| * int line_size, int h) | |||
| */ | |||
| .align 6 | |||
| .globl put_pixels_axp_asm | |||
| .ent put_pixels_axp_asm | |||
| put_pixels_axp_asm: | |||
| .frame sp, 0, ra | |||
| .prologue 0 | |||
| and a1, 7, t0 | |||
| beq t0, $aligned | |||
| .align 4 | |||
| $unaligned: | |||
| ldq_u t0, 0(a1) | |||
| ldq_u t1, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t2, 0(a1) | |||
| ldq_u t3, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t4, 0(a1) | |||
| ldq_u t5, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t6, 0(a1) | |||
| ldq_u t7, 8(a1) | |||
| extql t0, a1, t0 | |||
| addq a1, a2, a1 | |||
| extqh t1, a1, t1 | |||
| addq a0, a2, t8 | |||
| extql t2, a1, t2 | |||
| addq t8, a2, t9 | |||
| extqh t3, a1, t3 | |||
| addq t9, a2, ta | |||
| extql t4, a1, t4 | |||
| or t0, t1, t0 | |||
| extqh t5, a1, t5 | |||
| or t2, t3, t2 | |||
| extql t6, a1, t6 | |||
| or t4, t5, t4 | |||
| extqh t7, a1, t7 | |||
| or t6, t7, t6 | |||
| stq t0, 0(a0) | |||
| stq t2, 0(t8) | |||
| stq t4, 0(t9) | |||
| subq a3, 4, a3 | |||
| stq t6, 0(ta) | |||
| addq ta, a2, a0 | |||
| bne a3, $unaligned | |||
| ret | |||
| .align 4 | |||
| $aligned: | |||
| ldq t0, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t1, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t2, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t3, 0(a1) | |||
| addq a0, a2, t4 | |||
| addq a1, a2, a1 | |||
| addq t4, a2, t5 | |||
| subq a3, 4, a3 | |||
| stq t0, 0(a0) | |||
| addq t5, a2, t6 | |||
| stq t1, 0(t4) | |||
| addq t6, a2, a0 | |||
| stq t2, 0(t5) | |||
| stq t3, 0(t6) | |||
| bne a3, $aligned | |||
| ret | |||
| .end put_pixels_axp_asm | |||
| /************************************************************************ | |||
| * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, | |||
| * int line_size) | |||
| @@ -0,0 +1,213 @@ | |||
| /* | |||
| * Alpha optimized DSP utils | |||
| * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavcodec/hpeldsp.h" | |||
| #include "hpeldsp_alpha.h" | |||
| #include "asm.h" | |||
| static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) | |||
| { | |||
| return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||
| } | |||
| static inline uint64_t avg2(uint64_t a, uint64_t b) | |||
| { | |||
| return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); | |||
| } | |||
| #if 0 | |||
| /* The XY2 routines basically utilize this scheme, but reuse parts in | |||
| each iteration. */ | |||
| static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) | |||
| { | |||
| uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l2 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l3 & ~BYTE_VEC(0x03)) >> 2) | |||
| + ((l4 & ~BYTE_VEC(0x03)) >> 2); | |||
| uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) | |||
| + (l2 & BYTE_VEC(0x03)) | |||
| + (l3 & BYTE_VEC(0x03)) | |||
| + (l4 & BYTE_VEC(0x03)) | |||
| + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); | |||
| return r1 + r2; | |||
| } | |||
| #endif | |||
| #define OP(LOAD, STORE) \ | |||
| do { \ | |||
| STORE(LOAD(pixels), block); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } while (--h) | |||
| #define OP_X2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix1, pix2; \ | |||
| \ | |||
| pix1 = LOAD(pixels); \ | |||
| pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| STORE(AVG2(pix1, pix2), block); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } while (--h) | |||
| #define OP_Y2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix = LOAD(pixels); \ | |||
| do { \ | |||
| uint64_t next_pix; \ | |||
| \ | |||
| pixels += line_size; \ | |||
| next_pix = LOAD(pixels); \ | |||
| STORE(AVG2(pix, next_pix), block); \ | |||
| block += line_size; \ | |||
| pix = next_pix; \ | |||
| } while (--h); \ | |||
| } while (0) | |||
| #define OP_XY2(LOAD, STORE) \ | |||
| do { \ | |||
| uint64_t pix1 = LOAD(pixels); \ | |||
| uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ | |||
| + (pix2 & BYTE_VEC(0x03)); \ | |||
| uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||
| + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||
| \ | |||
| do { \ | |||
| uint64_t npix1, npix2; \ | |||
| uint64_t npix_l, npix_h; \ | |||
| uint64_t avg; \ | |||
| \ | |||
| pixels += line_size; \ | |||
| npix1 = LOAD(pixels); \ | |||
| npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ | |||
| npix_l = (npix1 & BYTE_VEC(0x03)) \ | |||
| + (npix2 & BYTE_VEC(0x03)); \ | |||
| npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ | |||
| + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ | |||
| avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ | |||
| + pix_h + npix_h; \ | |||
| STORE(avg, block); \ | |||
| \ | |||
| block += line_size; \ | |||
| pix_l = npix_l; \ | |||
| pix_h = npix_h; \ | |||
| } while (--h); \ | |||
| } while (0) | |||
| #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ | |||
| static void OPNAME ## _pixels ## SUFF ## _axp \ | |||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||
| ptrdiff_t line_size, int h) \ | |||
| { \ | |||
| if ((size_t) pixels & 0x7) { \ | |||
| OPKIND(uldq, STORE); \ | |||
| } else { \ | |||
| OPKIND(ldq, STORE); \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static void OPNAME ## _pixels16 ## SUFF ## _axp \ | |||
| (uint8_t *restrict block, const uint8_t *restrict pixels, \ | |||
| ptrdiff_t line_size, int h) \ | |||
| { \ | |||
| OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ | |||
| OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ | |||
| } | |||
| #define PIXOP(OPNAME, STORE) \ | |||
| MAKE_OP(OPNAME, , OP, STORE) \ | |||
| MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ | |||
| MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ | |||
| MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) | |||
| /* Rounding primitives. */ | |||
| #define AVG2 avg2 | |||
| #define AVG4 avg4 | |||
| #define AVG4_ROUNDER BYTE_VEC(0x02) | |||
| #define STORE(l, b) stq(l, b) | |||
| PIXOP(put, STORE); | |||
| #undef STORE | |||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||
| PIXOP(avg, STORE); | |||
| /* Not rounding primitives. */ | |||
| #undef AVG2 | |||
| #undef AVG4 | |||
| #undef AVG4_ROUNDER | |||
| #undef STORE | |||
| #define AVG2 avg2_no_rnd | |||
| #define AVG4 avg4_no_rnd | |||
| #define AVG4_ROUNDER BYTE_VEC(0x01) | |||
| #define STORE(l, b) stq(l, b) | |||
| PIXOP(put_no_rnd, STORE); | |||
| #undef STORE | |||
| #define STORE(l, b) stq(AVG2(l, ldq(b)), b); | |||
| PIXOP(avg_no_rnd, STORE); | |||
| static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h) | |||
| { | |||
| put_pixels_axp_asm(block, pixels, line_size, h); | |||
| put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); | |||
| } | |||
| av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags) | |||
| { | |||
| c->put_pixels_tab[0][0] = put_pixels16_axp_asm; | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_axp; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_axp; | |||
| c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; | |||
| c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; | |||
| c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; | |||
| c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; | |||
| c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; | |||
| c->avg_pixels_tab[0][0] = avg_pixels16_axp; | |||
| c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; | |||
| c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; | |||
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; | |||
| c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp; | |||
| c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp; | |||
| c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp; | |||
| c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp; | |||
| c->put_pixels_tab[1][0] = put_pixels_axp_asm; | |||
| c->put_pixels_tab[1][1] = put_pixels_x2_axp; | |||
| c->put_pixels_tab[1][2] = put_pixels_y2_axp; | |||
| c->put_pixels_tab[1][3] = put_pixels_xy2_axp; | |||
| c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; | |||
| c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; | |||
| c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; | |||
| c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; | |||
| c->avg_pixels_tab[1][0] = avg_pixels_axp; | |||
| c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; | |||
| c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; | |||
| c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; | |||
| } | |||
| @@ -0,0 +1,28 @@ | |||
| /* | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H | |||
| #define AVCODEC_ALPHA_HPELDSP_ALPHA_H | |||
| #include <stdint.h> | |||
| #include <stddef.h> | |||
| void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| ptrdiff_t line_size, int h); | |||
| #endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */ | |||
| @@ -0,0 +1,124 @@ | |||
| /* | |||
| * Alpha optimized DSP utils | |||
| * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | |||
| * | |||
| * This file is part of Libav. | |||
| * | |||
| * Libav is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * Libav is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with Libav; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| /* | |||
| * These functions are scheduled for pca56. They should work | |||
| * reasonably on ev6, though. | |||
| */ | |||
| #include "regdef.h" | |||
| .set noat | |||
| .set noreorder | |||
| .arch pca56 | |||
| .text | |||
| /************************************************************************ | |||
| * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, | |||
| * int line_size, int h) | |||
| */ | |||
| .align 6 | |||
| .globl put_pixels_axp_asm | |||
| .ent put_pixels_axp_asm | |||
| put_pixels_axp_asm: | |||
| .frame sp, 0, ra | |||
| .prologue 0 | |||
| and a1, 7, t0 | |||
| beq t0, $aligned | |||
| .align 4 | |||
| $unaligned: | |||
| ldq_u t0, 0(a1) | |||
| ldq_u t1, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t2, 0(a1) | |||
| ldq_u t3, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t4, 0(a1) | |||
| ldq_u t5, 8(a1) | |||
| addq a1, a2, a1 | |||
| nop | |||
| ldq_u t6, 0(a1) | |||
| ldq_u t7, 8(a1) | |||
| extql t0, a1, t0 | |||
| addq a1, a2, a1 | |||
| extqh t1, a1, t1 | |||
| addq a0, a2, t8 | |||
| extql t2, a1, t2 | |||
| addq t8, a2, t9 | |||
| extqh t3, a1, t3 | |||
| addq t9, a2, ta | |||
| extql t4, a1, t4 | |||
| or t0, t1, t0 | |||
| extqh t5, a1, t5 | |||
| or t2, t3, t2 | |||
| extql t6, a1, t6 | |||
| or t4, t5, t4 | |||
| extqh t7, a1, t7 | |||
| or t6, t7, t6 | |||
| stq t0, 0(a0) | |||
| stq t2, 0(t8) | |||
| stq t4, 0(t9) | |||
| subq a3, 4, a3 | |||
| stq t6, 0(ta) | |||
| addq ta, a2, a0 | |||
| bne a3, $unaligned | |||
| ret | |||
| .align 4 | |||
| $aligned: | |||
| ldq t0, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t1, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t2, 0(a1) | |||
| addq a1, a2, a1 | |||
| ldq t3, 0(a1) | |||
| addq a0, a2, t4 | |||
| addq a1, a2, a1 | |||
| addq t4, a2, t5 | |||
| subq a3, 4, a3 | |||
| stq t0, 0(a0) | |||
| addq t5, a2, t6 | |||
| stq t1, 0(t4) | |||
| addq t6, a2, a0 | |||
| stq t2, 0(t5) | |||
| stq t3, 0(t6) | |||
| bne a3, $aligned | |||
| ret | |||
| .end put_pixels_axp_asm | |||
| @@ -63,4 +63,15 @@ | |||
| #define sp $30 /* stack pointer */ | |||
| #define zero $31 /* reads as zero, writes are noops */ | |||
| /* Some nicer register names. */ | |||
| #define ta t10 | |||
| #define tb t11 | |||
| #define tc t12 | |||
| #define td AT | |||
| /* Danger: these overlap with the argument list and the return value */ | |||
| #define te a5 | |||
| #define tf a4 | |||
| #define tg a3 | |||
| #define th v0 | |||
| #endif /* AVCODEC_ALPHA_REGDEF_H */ | |||
| @@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags) | |||
| hpel_funcs(avg, [3], 2); | |||
| hpel_funcs(avg_no_rnd,, 16); | |||
| if (ARCH_ALPHA) | |||
| ff_hpeldsp_init_alpha(c, flags); | |||
| if (ARCH_ARM) | |||
| ff_hpeldsp_init_arm(c, flags); | |||
| if (ARCH_BFIN) | |||
| @@ -94,6 +94,7 @@ typedef struct HpelDSPContext { | |||
| void ff_hpeldsp_init(HpelDSPContext *c, int flags); | |||
| void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags); | |||
| void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags); | |||
| void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags); | |||
| void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags); | |||