|  | @@ -0,0 +1,186 @@ | 
														
													
														
															
																|  |  |  |  |  | /* | 
														
													
														
															
																|  |  |  |  |  | * Alpha optimized DSP utils | 
														
													
														
															
																|  |  |  |  |  | * Copyright (c) 2002 Falk Hueffner <falk@debian.org> | 
														
													
														
															
																|  |  |  |  |  | * | 
														
													
														
															
																|  |  |  |  |  | * This program is free software; you can redistribute it and/or modify | 
														
													
														
															
																|  |  |  |  |  | * it under the terms of the GNU General Public License as published by | 
														
													
														
															
																|  |  |  |  |  | * the Free Software Foundation; either version 2 of the License, or | 
														
													
														
															
																|  |  |  |  |  | * (at your option) any later version. | 
														
													
														
															
																|  |  |  |  |  | * | 
														
													
														
															
																|  |  |  |  |  | * This program is distributed in the hope that it will be useful, | 
														
													
														
															
																|  |  |  |  |  | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
														
													
														
															
																|  |  |  |  |  | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
														
													
														
															
																|  |  |  |  |  | * GNU General Public License for more details. | 
														
													
														
															
																|  |  |  |  |  | * | 
														
													
														
															
																|  |  |  |  |  | * You should have received a copy of the GNU General Public License | 
														
													
														
															
																|  |  |  |  |  | * along with this program; if not, write to the Free Software | 
														
													
														
															
																|  |  |  |  |  | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
														
													
														
															
																|  |  |  |  |  | */ | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | #include "regdef.h" | 
														
													
														
															
																|  |  |  |  |  | #ifdef HAVE_AV_CONFIG_H | 
														
													
														
															
																|  |  |  |  |  | #include "config.h" | 
														
													
														
															
																|  |  |  |  |  | #endif | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | /* Some nicer register names.  */ | 
														
													
														
															
																|  |  |  |  |  | #define ta t10 | 
														
													
														
															
																|  |  |  |  |  | #define tb t11 | 
														
													
														
															
																|  |  |  |  |  | #define tc t12 | 
														
													
														
															
																|  |  |  |  |  | #define td AT | 
														
													
														
															
																|  |  |  |  |  | /* Danger: these overlap with the argument list and the return value */ | 
														
													
														
															
																|  |  |  |  |  | #define te a5 | 
														
													
														
															
																|  |  |  |  |  | #define tf a4 | 
														
													
														
															
																|  |  |  |  |  | #define tg a3 | 
														
													
														
															
																|  |  |  |  |  | #define th v0 | 
														
													
														
															
																|  |  |  |  |  |  | 
														
													
														
															
																|  |  |  |  |  | .set noat | 
														
													
														
															
																|  |  |  |  |  | .set noreorder | 
														
													
														
															
																|  |  |  |  |  | .arch pca56 | 
														
													
														
															
																|  |  |  |  |  | .text | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | /***************************************************************************** | 
														
													
														
															
																|  |  |  |  |  | * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) | 
														
													
														
															
																|  |  |  |  |  | * | 
														
													
														
															
																|  |  |  |  |  | * This code is written with a pca56 in mind. For ev6, one should | 
														
													
														
															
																|  |  |  |  |  | * really take the increased latency of 3 cycles for MVI instructions | 
														
													
														
															
																|  |  |  |  |  | * into account. | 
														
													
														
															
																|  |  |  |  |  | * | 
														
													
														
															
																|  |  |  |  |  | * It is important to keep the loading and first use of a register as | 
														
													
														
															
																|  |  |  |  |  | * far apart as possible, because if a register is accessed before it | 
														
													
														
															
																|  |  |  |  |  | * has been fetched from memory, the CPU will stall. | 
														
													
														
															
																|  |  |  |  |  | */ | 
														
													
														
															
																|  |  |  |  |  | .align 4 | 
														
													
														
															
																|  |  |  |  |  | .globl pix_abs16x16_mvi_asm | 
														
													
														
															
																|  |  |  |  |  | .ent pix_abs16x16_mvi_asm | 
														
													
														
															
																|  |  |  |  |  | pix_abs16x16_mvi_asm: | 
														
													
														
															
																|  |  |  |  |  | .frame sp, 0, ra, 0 | 
														
													
														
															
																|  |  |  |  |  | .prologue 0 | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | #ifdef HAVE_GPROF | 
														
													
														
															
																|  |  |  |  |  | lda     AT, _mcount | 
														
													
														
															
																|  |  |  |  |  | jsr     AT, (AT), _mcount | 
														
													
														
															
																|  |  |  |  |  | #endif | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | and     a1, 7, t0 | 
														
													
														
															
																|  |  |  |  |  | clr     v0 | 
														
													
														
															
																|  |  |  |  |  | lda     a3, 16 | 
														
													
														
															
																|  |  |  |  |  | beq     t0, $aligned | 
														
													
														
															
																|  |  |  |  |  | .align 4 | 
														
													
														
															
																|  |  |  |  |  | $unaligned: | 
														
													
														
															
																|  |  |  |  |  | /* Registers: | 
														
													
														
															
																|  |  |  |  |  | line 0: | 
														
													
														
															
																|  |  |  |  |  | t0:  left_u -> left lo -> left | 
														
													
														
															
																|  |  |  |  |  | t1:  mid | 
														
													
														
															
																|  |  |  |  |  | t2:  right_u -> right hi -> right | 
														
													
														
															
																|  |  |  |  |  | t3:  ref left | 
														
													
														
															
																|  |  |  |  |  | t4:  ref right | 
														
													
														
															
																|  |  |  |  |  | line 1: | 
														
													
														
															
																|  |  |  |  |  | t5:  left_u -> left lo -> left | 
														
													
														
															
																|  |  |  |  |  | t6:  mid | 
														
													
														
															
																|  |  |  |  |  | t7:  right_u -> right hi -> right | 
														
													
														
															
																|  |  |  |  |  | t8:  ref left | 
														
													
														
															
																|  |  |  |  |  | t9:  ref right | 
														
													
														
															
																|  |  |  |  |  | temp: | 
														
													
														
															
																|  |  |  |  |  | ta:  left hi | 
														
													
														
															
																|  |  |  |  |  | tb:  right lo | 
														
													
														
															
																|  |  |  |  |  | tc:  error left | 
														
													
														
															
																|  |  |  |  |  | td:  error right  */ | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | /* load line 0 */ | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t0, 0(a1)       # left_u | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t1, 8(a1)       # mid | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t2, 16(a1)      # right_u | 
														
													
														
															
																|  |  |  |  |  | ldq     t3, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     t4, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | /* load line 1 */ | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t5, 0(a1)       # left_u | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t6, 8(a1)       # mid | 
														
													
														
															
																|  |  |  |  |  | ldq_u   t7, 16(a1)      # right_u | 
														
													
														
															
																|  |  |  |  |  | ldq     t8, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     t9, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | /* calc line 0 */ | 
														
													
														
															
																|  |  |  |  |  | extql   t0, a1, t0      # left lo | 
														
													
														
															
																|  |  |  |  |  | extqh   t1, a1, ta      # left hi | 
														
													
														
															
																|  |  |  |  |  | extql   t1, a1, tb      # right lo | 
														
													
														
															
																|  |  |  |  |  | or      t0, ta, t0      # left | 
														
													
														
															
																|  |  |  |  |  | extqh   t2, a1, t2      # right hi | 
														
													
														
															
																|  |  |  |  |  | perr    t3, t0, tc      # error left | 
														
													
														
															
																|  |  |  |  |  | or      t2, tb, t2      # right | 
														
													
														
															
																|  |  |  |  |  | perr    t4, t2, td      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, tc, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, td, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | /* calc line 1 */ | 
														
													
														
															
																|  |  |  |  |  | extql   t5, a1, t5      # left lo | 
														
													
														
															
																|  |  |  |  |  | extqh   t6, a1, ta      # left hi | 
														
													
														
															
																|  |  |  |  |  | extql   t6, a1, tb      # right lo | 
														
													
														
															
																|  |  |  |  |  | or      t5, ta, t5      # left | 
														
													
														
															
																|  |  |  |  |  | extqh   t7, a1, t7      # right hi | 
														
													
														
															
																|  |  |  |  |  | perr    t8, t5, tc      # error left | 
														
													
														
															
																|  |  |  |  |  | or      t7, tb, t7      # right | 
														
													
														
															
																|  |  |  |  |  | perr    t9, t7, td      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, tc, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, td, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | /* loop */ | 
														
													
														
															
																|  |  |  |  |  | subq    a3,  2, a3      # h -= 2 | 
														
													
														
															
																|  |  |  |  |  | bne     a3, $unaligned | 
														
													
														
															
																|  |  |  |  |  | ret | 
														
													
														
															
																|  |  |  |  |  | 
 | 
														
													
														
															
																|  |  |  |  |  | .align 4 | 
														
													
														
															
																|  |  |  |  |  | $aligned: | 
														
													
														
															
																|  |  |  |  |  | /* load line 0 */ | 
														
													
														
															
																|  |  |  |  |  | ldq     t0, 0(a1)       # left | 
														
													
														
															
																|  |  |  |  |  | ldq     t1, 8(a1)       # right | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | ldq     t2, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     t3, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | /* load line 1 */ | 
														
													
														
															
																|  |  |  |  |  | ldq     t4, 0(a1)       # left | 
														
													
														
															
																|  |  |  |  |  | ldq     t5, 8(a1)       # right | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | ldq     t6, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     t7, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | /* load line 2 */ | 
														
													
														
															
																|  |  |  |  |  | ldq     t8, 0(a1)       # left | 
														
													
														
															
																|  |  |  |  |  | ldq     t9, 8(a1)       # right | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | ldq     ta, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     tb, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | /* load line 3 */ | 
														
													
														
															
																|  |  |  |  |  | ldq     tc, 0(a1)       # left | 
														
													
														
															
																|  |  |  |  |  | ldq     td, 8(a1)       # right | 
														
													
														
															
																|  |  |  |  |  | addq    a1, a2, a1      # pix2 | 
														
													
														
															
																|  |  |  |  |  | ldq     te, 0(a0)       # ref left | 
														
													
														
															
																|  |  |  |  |  | ldq     tf, 8(a0)       # ref right | 
														
													
														
															
																|  |  |  |  |  | /* calc line 0 */ | 
														
													
														
															
																|  |  |  |  |  | perr    t0, t2, t0      # error left | 
														
													
														
															
																|  |  |  |  |  | addq    a0, a2, a0      # pix1 | 
														
													
														
															
																|  |  |  |  |  | perr    t1, t3, t1      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t0, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | /* calc line 1 */ | 
														
													
														
															
																|  |  |  |  |  | perr    t4, t6, t0      # error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t1, v0      # add error right | 
														
													
														
															
																|  |  |  |  |  | perr    t5, t7, t1      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t0, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | /* calc line 2 */ | 
														
													
														
															
																|  |  |  |  |  | perr    t8, ta, t0      # error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t1, v0      # add error right | 
														
													
														
															
																|  |  |  |  |  | perr    t9, tb, t1      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t0, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | /* calc line 3 */ | 
														
													
														
															
																|  |  |  |  |  | perr    tc, te, t0      # error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t1, v0      # add error right | 
														
													
														
															
																|  |  |  |  |  | perr    td, tf, t1      # error right | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t0, v0      # add error left | 
														
													
														
															
																|  |  |  |  |  | addq    v0, t1, v0      # add error right | 
														
													
														
															
																|  |  |  |  |  | /* loop */ | 
														
													
														
															
																|  |  |  |  |  | subq    a3,  4, a3      # h -= 4 | 
														
													
														
															
																|  |  |  |  |  | bne     a3, $aligned | 
														
													
														
															
																|  |  |  |  |  | ret | 
														
													
														
															
																|  |  |  |  |  | .end pix_abs16x16_mvi_asm |