|
|
@@ -5,6 +5,11 @@ |
|
|
|
// current version mostly by Michael Niedermayer (michaelni@gmx.at) |
|
|
|
// the parts written by michael are under GNU GPL |
|
|
|
|
|
|
|
/* |
|
|
|
supported Input formats: YV12 (grayscale soon too) |
|
|
|
supported output formats: YV12, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) |
|
|
|
*/ |
|
|
|
|
|
|
|
#include <inttypes.h> |
|
|
|
#include <string.h> |
|
|
|
#include <math.h> |
|
|
@@ -16,6 +21,7 @@ |
|
|
|
#endif |
|
|
|
#include "swscale.h" |
|
|
|
#include "../cpudetect.h" |
|
|
|
#include "../libvo/img_format.h" |
|
|
|
#undef MOVNTQ |
|
|
|
#undef PAVGB |
|
|
|
|
|
|
@@ -23,14 +29,20 @@ |
|
|
|
//#undef HAVE_MMX |
|
|
|
//#undef ARCH_X86 |
|
|
|
#define DITHER1XBPP |
|
|
|
int fullUVIpol=0; |
|
|
|
//disables the unscaled height version |
|
|
|
int allwaysIpol=0; |
|
|
|
|
|
|
|
#define RET 0xC3 //near return opcode |
|
|
|
|
|
|
|
//#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } |
|
|
|
#ifdef MP_DEBUG |
|
|
|
#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } |
|
|
|
#else |
|
|
|
#define ASSERT(x) ; |
|
|
|
#endif |
|
|
|
|
|
|
|
#ifdef M_PI |
|
|
|
#define PI M_PI |
|
|
|
#else |
|
|
|
#define PI 3.14159265358979323846 |
|
|
|
#endif |
|
|
|
|
|
|
|
extern int verbose; // defined in mplayer.c |
|
|
|
/* |
|
|
@@ -50,7 +62,6 @@ change the distance of the u & v buffer |
|
|
|
Move static / global vars into a struct so multiple scalers can be used |
|
|
|
write special vertical cubic upscale version |
|
|
|
Optimize C code (yv12 / minmax) |
|
|
|
dstStride[3] |
|
|
|
*/ |
|
|
|
|
|
|
|
#define ABS(a) ((a) > 0 ? (a) : (-(a))) |
|
|
@@ -101,39 +112,9 @@ static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; |
|
|
|
static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; |
|
|
|
static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; |
|
|
|
|
|
|
|
static uint64_t __attribute__((aligned(8))) temp0; |
|
|
|
// FIXME remove |
|
|
|
static uint64_t __attribute__((aligned(8))) asm_yalpha1; |
|
|
|
static uint64_t __attribute__((aligned(8))) asm_uvalpha1; |
|
|
|
|
|
|
|
static int16_t __attribute__((aligned(8))) *lumPixBuf[2000]; |
|
|
|
static int16_t __attribute__((aligned(8))) *chrPixBuf[2000]; |
|
|
|
static int16_t __attribute__((aligned(8))) hLumFilter[8000]; |
|
|
|
static int16_t __attribute__((aligned(8))) hLumFilterPos[2000]; |
|
|
|
static int16_t __attribute__((aligned(8))) hChrFilter[8000]; |
|
|
|
static int16_t __attribute__((aligned(8))) hChrFilterPos[2000]; |
|
|
|
static int16_t __attribute__((aligned(8))) vLumFilter[8000]; |
|
|
|
static int16_t __attribute__((aligned(8))) vLumFilterPos[2000]; |
|
|
|
static int16_t __attribute__((aligned(8))) vChrFilter[8000]; |
|
|
|
static int16_t __attribute__((aligned(8))) vChrFilterPos[2000]; |
|
|
|
|
|
|
|
// Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx |
|
|
|
//FIXME these are very likely too small / 8000 caused problems with 480x480 |
|
|
|
static int16_t __attribute__((aligned(8))) lumMmxFilter[16000]; |
|
|
|
static int16_t __attribute__((aligned(8))) chrMmxFilter[16000]; |
|
|
|
#else |
|
|
|
static int16_t *lumPixBuf[2000]; |
|
|
|
static int16_t *chrPixBuf[2000]; |
|
|
|
static int16_t hLumFilter[8000]; |
|
|
|
static int16_t hLumFilterPos[2000]; |
|
|
|
static int16_t hChrFilter[8000]; |
|
|
|
static int16_t hChrFilterPos[2000]; |
|
|
|
static int16_t vLumFilter[8000]; |
|
|
|
static int16_t vLumFilterPos[2000]; |
|
|
|
static int16_t vChrFilter[8000]; |
|
|
|
static int16_t vChrFilterPos[2000]; |
|
|
|
//FIXME just dummy vars |
|
|
|
static int16_t lumMmxFilter[1]; |
|
|
|
static int16_t chrMmxFilter[1]; |
|
|
|
#endif |
|
|
|
|
|
|
|
// clipping helper table for C implementations: |
|
|
@@ -159,28 +140,22 @@ static int clip_yuvtab_0c92[768]; |
|
|
|
static int clip_yuvtab_1a1e[768]; |
|
|
|
static int clip_yuvtab_40cf[768]; |
|
|
|
|
|
|
|
static int hLumFilterSize=0; |
|
|
|
static int hChrFilterSize=0; |
|
|
|
static int vLumFilterSize=0; |
|
|
|
static int vChrFilterSize=0; |
|
|
|
static int vLumBufSize=0; |
|
|
|
static int vChrBufSize=0; |
|
|
|
|
|
|
|
//global sws_flags from the command line |
|
|
|
int sws_flags=0; |
|
|
|
|
|
|
|
#ifdef CAN_COMPILE_X86_ASM |
|
|
|
static uint8_t funnyYCode[10000]; |
|
|
|
static uint8_t funnyUVCode[10000]; |
|
|
|
#endif |
|
|
|
/* cpuCaps combined from cpudetect and whats actually compiled in |
|
|
|
(if there is no support for something compiled in it wont appear here) */ |
|
|
|
static CpuCaps cpuCaps; |
|
|
|
|
|
|
|
static int canMMX2BeUsed=0; |
|
|
|
void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, |
|
|
|
int srcSliceH, uint8_t* dst[], int dstStride[])=NULL; |
|
|
|
|
|
|
|
#ifdef CAN_COMPILE_X86_ASM |
|
|
|
void in_asm_used_var_warning_killer() |
|
|
|
{ |
|
|
|
volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ |
|
|
|
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
|
|
|
M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
|
|
|
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+ |
|
|
|
M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
|
|
|
if(i) i=0; |
|
|
|
} |
|
|
|
#endif |
|
|
@@ -220,9 +195,9 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt |
|
|
|
|
|
|
|
static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
|
|
|
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
|
|
|
uint8_t *dest, int dstW, int dstbpp) |
|
|
|
uint8_t *dest, int dstW, int dstFormat) |
|
|
|
{ |
|
|
|
if(dstbpp==32) |
|
|
|
if(dstFormat==IMGFMT_BGR32) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
for(i=0; i<(dstW>>1); i++){ |
|
|
@@ -260,7 +235,7 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt |
|
|
|
dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; |
|
|
|
} |
|
|
|
} |
|
|
|
else if(dstbpp==24) |
|
|
|
else if(dstFormat==IMGFMT_BGR24) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
for(i=0; i<(dstW>>1); i++){ |
|
|
@@ -299,7 +274,7 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt |
|
|
|
dest+=6; |
|
|
|
} |
|
|
|
} |
|
|
|
else if(dstbpp==16) |
|
|
|
else if(dstFormat==IMGFMT_BGR16) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
for(i=0; i<(dstW>>1); i++){ |
|
|
@@ -339,7 +314,7 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt |
|
|
|
clip_table16r[(Y2 + Cr) >>13]; |
|
|
|
} |
|
|
|
} |
|
|
|
else if(dstbpp==15) |
|
|
|
else if(dstFormat==IMGFMT_BGR15) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
for(i=0; i<(dstW>>1); i++){ |
|
|
@@ -467,42 +442,320 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt |
|
|
|
// minor note: the HAVE_xyz is messed up after that line so dont use it |
|
|
|
|
|
|
|
|
|
|
|
// *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: |
|
|
|
// *** Note: it's called multiple times while decoding a frame, first time y==0 |
|
|
|
// switching the cpu type during a sliced drawing can have bad effects, like sig11 |
|
|
|
void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , |
|
|
|
int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, |
|
|
|
// old global scaler, dont use for new code |
|
|
|
// will use sws_flags from the command line |
|
|
|
void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , |
|
|
|
int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp, |
|
|
|
int srcW, int srcH, int dstW, int dstH){ |
|
|
|
|
|
|
|
#ifdef RUNTIME_CPUDETECT |
|
|
|
#ifdef CAN_COMPILE_X86_ASM |
|
|
|
// ordered per speed fasterst first |
|
|
|
if(gCpuCaps.hasMMX2) |
|
|
|
SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
else if(gCpuCaps.has3DNow) |
|
|
|
SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
else if(gCpuCaps.hasMMX) |
|
|
|
SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
else |
|
|
|
SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
#else |
|
|
|
SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
#endif |
|
|
|
#else //RUNTIME_CPUDETECT |
|
|
|
#ifdef HAVE_MMX2 |
|
|
|
SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
#elif defined (HAVE_3DNOW) |
|
|
|
SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
#elif defined (HAVE_MMX) |
|
|
|
SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
#else |
|
|
|
SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
|
|
|
static SwsContext *context=NULL; |
|
|
|
int dstFormat; |
|
|
|
int flags=0; |
|
|
|
static int firstTime=1; |
|
|
|
int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1}; |
|
|
|
|
|
|
|
if(firstTime) |
|
|
|
{ |
|
|
|
flags= SWS_PRINT_INFO; |
|
|
|
firstTime=0; |
|
|
|
} |
|
|
|
|
|
|
|
switch(dstbpp) |
|
|
|
{ |
|
|
|
case 8 : dstFormat= IMGFMT_Y8; break; |
|
|
|
case 12: dstFormat= IMGFMT_YV12; break; |
|
|
|
case 15: dstFormat= IMGFMT_BGR15; break; |
|
|
|
case 16: dstFormat= IMGFMT_BGR16; break; |
|
|
|
case 24: dstFormat= IMGFMT_BGR24; break; |
|
|
|
case 32: dstFormat= IMGFMT_BGR32; break; |
|
|
|
default: return; |
|
|
|
} |
|
|
|
|
|
|
|
switch(sws_flags) |
|
|
|
{ |
|
|
|
case 0: flags|= SWS_FAST_BILINEAR; break; |
|
|
|
case 1: flags|= SWS_BILINEAR; break; |
|
|
|
case 2: flags|= SWS_BICUBIC; break; |
|
|
|
case 3: flags|= SWS_X; break; |
|
|
|
default:flags|= SWS_BILINEAR; break; |
|
|
|
} |
|
|
|
|
|
|
|
if(!context) context=getSwsContext(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat, flags, NULL, NULL); |
|
|
|
|
|
|
|
|
|
|
|
swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3); |
|
|
|
} |
|
|
|
|
|
|
|
static inline void initFilter(int16_t *dstFilter, int16_t *filterPos, int *filterSize, int xInc, |
|
|
|
int srcW, int dstW, int filterAlign, int one, int flags) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
double filter[10000]; |
|
|
|
#ifdef ARCH_X86 |
|
|
|
if(gCpuCaps.hasMMX) |
|
|
|
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) |
|
|
|
#endif |
|
|
|
#endif //!RUNTIME_CPUDETECT |
|
|
|
|
|
|
|
if(ABS(xInc - 0x10000) <10) // unscaled |
|
|
|
{ |
|
|
|
int i; |
|
|
|
*filterSize= (1 +(filterAlign-1)) & (~(filterAlign-1)); // 1 or 4 normaly |
|
|
|
for(i=0; i<dstW*(*filterSize); i++) filter[i]=0; |
|
|
|
|
|
|
|
for(i=0; i<dstW; i++) |
|
|
|
{ |
|
|
|
filter[i*(*filterSize)]=1; |
|
|
|
filterPos[i]=i; |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
else if(xInc <= (1<<16) || (flags&SWS_FAST_BILINEAR)) // upscale |
|
|
|
{ |
|
|
|
int i; |
|
|
|
int xDstInSrc; |
|
|
|
if (flags&SWS_BICUBIC) *filterSize= 4; |
|
|
|
else if(flags&SWS_X ) *filterSize= 4; |
|
|
|
else *filterSize= 2; |
|
|
|
// printf("%d %d %d\n", filterSize, srcW, dstW); |
|
|
|
*filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); |
|
|
|
|
|
|
|
xDstInSrc= xInc/2 - 0x8000; |
|
|
|
for(i=0; i<dstW; i++) |
|
|
|
{ |
|
|
|
int xx= (xDstInSrc>>16) - (*filterSize>>1) + 1; |
|
|
|
int j; |
|
|
|
|
|
|
|
filterPos[i]= xx; |
|
|
|
if((flags & SWS_BICUBIC) || (flags & SWS_X)) |
|
|
|
{ |
|
|
|
double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16); |
|
|
|
double y1,y2,y3,y4; |
|
|
|
double A= -0.6; |
|
|
|
if(flags & SWS_BICUBIC){ |
|
|
|
// Equation is from VirtualDub |
|
|
|
y1 = ( + A*d - 2.0*A*d*d + A*d*d*d); |
|
|
|
y2 = (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); |
|
|
|
y3 = ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d); |
|
|
|
y4 = ( + A*d*d - A*d*d*d); |
|
|
|
}else{ |
|
|
|
// cubic interpolation (derived it myself) |
|
|
|
y1 = ( -2.0*d + 3.0*d*d - 1.0*d*d*d)/6.0; |
|
|
|
y2 = (6.0 -3.0*d - 6.0*d*d + 3.0*d*d*d)/6.0; |
|
|
|
y3 = ( +6.0*d + 3.0*d*d - 3.0*d*d*d)/6.0; |
|
|
|
y4 = ( -1.0*d + 1.0*d*d*d)/6.0; |
|
|
|
} |
|
|
|
|
|
|
|
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc); |
|
|
|
filter[i*(*filterSize) + 0]= y1; |
|
|
|
filter[i*(*filterSize) + 1]= y2; |
|
|
|
filter[i*(*filterSize) + 2]= y3; |
|
|
|
filter[i*(*filterSize) + 3]= y4; |
|
|
|
// printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
for(j=0; j<*filterSize; j++) |
|
|
|
{ |
|
|
|
double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); |
|
|
|
double coeff= 1.0 - d; |
|
|
|
if(coeff<0) coeff=0; |
|
|
|
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc); |
|
|
|
filter[i*(*filterSize) + j]= coeff; |
|
|
|
xx++; |
|
|
|
} |
|
|
|
} |
|
|
|
xDstInSrc+= xInc; |
|
|
|
} |
|
|
|
} |
|
|
|
else // downscale |
|
|
|
{ |
|
|
|
int xDstInSrc; |
|
|
|
if(flags&SWS_BICUBIC) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); |
|
|
|
else if(flags&SWS_X) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); |
|
|
|
else *filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); |
|
|
|
// printf("%d %d %d\n", *filterSize, srcW, dstW); |
|
|
|
*filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); |
|
|
|
|
|
|
|
xDstInSrc= xInc/2 - 0x8000; |
|
|
|
for(i=0; i<dstW; i++) |
|
|
|
{ |
|
|
|
int xx= (int)((double)xDstInSrc/(double)(1<<16) - ((*filterSize)-1)*0.5 + 0.5); |
|
|
|
int j; |
|
|
|
filterPos[i]= xx; |
|
|
|
for(j=0; j<*filterSize; j++) |
|
|
|
{ |
|
|
|
double d= ABS((xx<<16) - xDstInSrc)/(double)xInc; |
|
|
|
double coeff; |
|
|
|
if((flags & SWS_BICUBIC) || (flags & SWS_X)) |
|
|
|
{ |
|
|
|
double A= -0.75; |
|
|
|
// d*=2; |
|
|
|
// Equation is from VirtualDub |
|
|
|
if(d<1.0) |
|
|
|
coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); |
|
|
|
else if(d<2.0) |
|
|
|
coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d); |
|
|
|
else |
|
|
|
coeff=0.0; |
|
|
|
} |
|
|
|
/* else if(flags & SWS_X) |
|
|
|
{ |
|
|
|
}*/ |
|
|
|
else |
|
|
|
{ |
|
|
|
coeff= 1.0 - d; |
|
|
|
if(coeff<0) coeff=0; |
|
|
|
} |
|
|
|
// printf("%1.3f %d %d \n", coeff, (int)d, xDstInSrc); |
|
|
|
filter[i*(*filterSize) + j]= coeff; |
|
|
|
xx++; |
|
|
|
} |
|
|
|
xDstInSrc+= xInc; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//fix borders |
|
|
|
for(i=0; i<dstW; i++) |
|
|
|
{ |
|
|
|
int j; |
|
|
|
if(filterPos[i] < 0) |
|
|
|
{ |
|
|
|
// Move filter coeffs left to compensate for filterPos |
|
|
|
for(j=1; j<*filterSize; j++) |
|
|
|
{ |
|
|
|
int left= MAX(j + filterPos[i], 0); |
|
|
|
filter[i*(*filterSize) + left] += filter[i*(*filterSize) + j]; |
|
|
|
filter[i*(*filterSize) + j]=0; |
|
|
|
} |
|
|
|
filterPos[i]= 0; |
|
|
|
} |
|
|
|
|
|
|
|
if(filterPos[i] + (*filterSize) > srcW) |
|
|
|
{ |
|
|
|
int shift= filterPos[i] + (*filterSize) - srcW; |
|
|
|
// Move filter coeffs right to compensate for filterPos |
|
|
|
for(j=(*filterSize)-2; j>=0; j--) |
|
|
|
{ |
|
|
|
int right= MIN(j + shift, (*filterSize)-1); |
|
|
|
filter[i*(*filterSize) +right] += filter[i*(*filterSize) +j]; |
|
|
|
filter[i*(*filterSize) +j]=0; |
|
|
|
} |
|
|
|
filterPos[i]= srcW - (*filterSize); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//FIXME try to align filterpos if possible / try to shift filterpos to put zeros at the end |
|
|
|
// and skip these than later |
|
|
|
|
|
|
|
//Normalize |
|
|
|
for(i=0; i<dstW; i++) |
|
|
|
{ |
|
|
|
int j; |
|
|
|
double sum=0; |
|
|
|
double scale= one; |
|
|
|
for(j=0; j<*filterSize; j++) |
|
|
|
{ |
|
|
|
sum+= filter[i*(*filterSize) + j]; |
|
|
|
} |
|
|
|
scale/= sum; |
|
|
|
for(j=0; j<*filterSize; j++) |
|
|
|
{ |
|
|
|
dstFilter[i*(*filterSize) + j]= (int)(filter[i*(*filterSize) + j]*scale); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef ARCH_X86 |
|
|
|
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode) |
|
|
|
{ |
|
|
|
uint8_t *fragment; |
|
|
|
int imm8OfPShufW1; |
|
|
|
int imm8OfPShufW2; |
|
|
|
int fragmentLength; |
|
|
|
|
|
|
|
int xpos, i; |
|
|
|
|
|
|
|
// create an optimized horizontal scaling routine |
|
|
|
|
|
|
|
//code fragment |
|
|
|
|
|
|
|
asm volatile( |
|
|
|
"jmp 9f \n\t" |
|
|
|
// Begin |
|
|
|
"0: \n\t" |
|
|
|
"movq (%%esi), %%mm0 \n\t" //FIXME Alignment |
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
"psrlq $8, %%mm0 \n\t" |
|
|
|
"punpcklbw %%mm7, %%mm1 \n\t" |
|
|
|
"movq %%mm2, %%mm3 \n\t" |
|
|
|
"punpcklbw %%mm7, %%mm0 \n\t" |
|
|
|
"addw %%bx, %%cx \n\t" //2*xalpha += (4*lumXInc)&0xFFFF |
|
|
|
"pshufw $0xFF, %%mm1, %%mm1 \n\t" |
|
|
|
"1: \n\t" |
|
|
|
"adcl %%edx, %%esi \n\t" //xx+= (4*lumXInc)>>16 + carry |
|
|
|
"pshufw $0xFF, %%mm0, %%mm0 \n\t" |
|
|
|
"2: \n\t" |
|
|
|
"psrlw $9, %%mm3 \n\t" |
|
|
|
"psubw %%mm1, %%mm0 \n\t" |
|
|
|
"pmullw %%mm3, %%mm0 \n\t" |
|
|
|
"paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF |
|
|
|
"psllw $7, %%mm1 \n\t" |
|
|
|
"paddw %%mm1, %%mm0 \n\t" |
|
|
|
|
|
|
|
"movq %%mm0, (%%edi, %%eax) \n\t" |
|
|
|
|
|
|
|
"addl $8, %%eax \n\t" |
|
|
|
// End |
|
|
|
"9: \n\t" |
|
|
|
// "int $3\n\t" |
|
|
|
"leal 0b, %0 \n\t" |
|
|
|
"leal 1b, %1 \n\t" |
|
|
|
"leal 2b, %2 \n\t" |
|
|
|
"decl %1 \n\t" |
|
|
|
"decl %2 \n\t" |
|
|
|
"subl %0, %1 \n\t" |
|
|
|
"subl %0, %2 \n\t" |
|
|
|
"leal 9b, %3 \n\t" |
|
|
|
"subl %0, %3 \n\t" |
|
|
|
:"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2), |
|
|
|
"=r" (fragmentLength) |
|
|
|
); |
|
|
|
|
|
|
|
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers |
|
|
|
|
|
|
|
for(i=0; i<dstW/8; i++) |
|
|
|
{ |
|
|
|
int xx=xpos>>16; |
|
|
|
|
|
|
|
if((i&3) == 0) |
|
|
|
{ |
|
|
|
int a=0; |
|
|
|
int b=((xpos+xInc)>>16) - xx; |
|
|
|
int c=((xpos+xInc*2)>>16) - xx; |
|
|
|
int d=((xpos+xInc*3)>>16) - xx; |
|
|
|
|
|
|
|
memcpy(funnyCode + fragmentLength*i/4, fragment, fragmentLength); |
|
|
|
|
|
|
|
funnyCode[fragmentLength*i/4 + imm8OfPShufW1]= |
|
|
|
funnyCode[fragmentLength*i/4 + imm8OfPShufW2]= |
|
|
|
a | (b<<2) | (c<<4) | (d<<6); |
|
|
|
|
|
|
|
// if we dont need to read 8 bytes than dont :), reduces the chance of |
|
|
|
// crossing a cache line |
|
|
|
if(d<3) funnyCode[fragmentLength*i/4 + 1]= 0x6E; |
|
|
|
|
|
|
|
funnyCode[fragmentLength*(i+4)/4]= RET; |
|
|
|
} |
|
|
|
xpos+=xInc; |
|
|
|
} |
|
|
|
} |
|
|
|
#endif // ARCH_X86 |
|
|
|
|
|
|
|
//FIXME remove |
|
|
|
void SwScale_Init(){ |
|
|
|
} |
|
|
|
|
|
|
|
static void globalInit(){ |
|
|
|
// generating tables: |
|
|
|
int i; |
|
|
|
for(i=0; i<768; i++){ |
|
|
@@ -517,7 +770,7 @@ void SwScale_Init(){ |
|
|
|
|
|
|
|
for(i=0; i<768; i++) |
|
|
|
{ |
|
|
|
int v= clip_table[i]; |
|
|
|
int v= clip_table[i]; |
|
|
|
clip_table16b[i]= v>>3; |
|
|
|
clip_table16g[i]= (v<<3)&0x07E0; |
|
|
|
clip_table16r[i]= (v<<8)&0xF800; |
|
|
@@ -526,5 +779,346 @@ void SwScale_Init(){ |
|
|
|
clip_table15r[i]= (v<<7)&0x7C00; |
|
|
|
} |
|
|
|
|
|
|
|
cpuCaps= gCpuCaps; |
|
|
|
|
|
|
|
#ifdef RUNTIME_CPUDETECT |
|
|
|
#ifdef CAN_COMPILE_X86_ASM |
|
|
|
// ordered per speed fasterst first |
|
|
|
if(gCpuCaps.hasMMX2) |
|
|
|
swScale= swScale_MMX2; |
|
|
|
else if(gCpuCaps.has3DNow) |
|
|
|
swScale= swScale_3DNOW; |
|
|
|
else if(gCpuCaps.hasMMX) |
|
|
|
swScale= swScale_MMX; |
|
|
|
else |
|
|
|
swScale= swScale_C; |
|
|
|
|
|
|
|
#else |
|
|
|
swScale= swScale_C; |
|
|
|
cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0; |
|
|
|
#endif |
|
|
|
#else //RUNTIME_CPUDETECT |
|
|
|
#ifdef HAVE_MMX2 |
|
|
|
swScale= swScale_MMX2; |
|
|
|
cpuCaps.has3DNow = 0; |
|
|
|
#elif defined (HAVE_3DNOW) |
|
|
|
swScale= swScale_3DNOW; |
|
|
|
cpuCaps.hasMMX2 = 0; |
|
|
|
#elif defined (HAVE_MMX) |
|
|
|
swScale= swScale_MMX; |
|
|
|
cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0; |
|
|
|
#else |
|
|
|
swScale= swScale_C; |
|
|
|
cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0; |
|
|
|
#endif |
|
|
|
#endif //!RUNTIME_CPUDETECT |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, |
|
|
|
SwsFilter *srcFilter, SwsFilter *dstFilter){ |
|
|
|
|
|
|
|
const int widthAlign= dstFormat==IMGFMT_YV12 ? 16 : 8; |
|
|
|
SwsContext *c; |
|
|
|
int i; |
|
|
|
//const int bytespp= (dstbpp+1)/8; //(12->1, 15&16->2, 24->3, 32->4) |
|
|
|
//const int over= dstFormat==IMGFMT_YV12 ? (((dstW+15)&(~15))) - dststride |
|
|
|
// : (((dstW+7)&(~7)))*bytespp - dststride; |
|
|
|
if(swScale==NULL) globalInit(); |
|
|
|
|
|
|
|
/* sanity check */ |
|
|
|
if(srcW<1 || srcH<1 || dstW<1 || dstH<1) return NULL; |
|
|
|
if(srcW>=SWS_MAX_SIZE || dstW>=SWS_MAX_SIZE || srcH>=SWS_MAX_SIZE || dstH>=SWS_MAX_SIZE) |
|
|
|
{ |
|
|
|
fprintf(stderr, "size is too large, increase SWS_MAX_SIZE\n"); |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
/* FIXME |
|
|
|
if(dstStride[0]%widthAlign !=0 ) |
|
|
|
{ |
|
|
|
if(flags & SWS_PRINT_INFO) |
|
|
|
fprintf(stderr, "SwScaler: Warning: dstStride is not a multiple of %d!\n" |
|
|
|
"SwScaler: ->cannot do aligned memory acesses anymore\n", |
|
|
|
widthAlign); |
|
|
|
} |
|
|
|
*/ |
|
|
|
c= memalign(64, sizeof(SwsContext)); |
|
|
|
|
|
|
|
c->srcW= srcW; |
|
|
|
c->srcH= srcH; |
|
|
|
c->dstW= dstW; |
|
|
|
c->dstH= dstH; |
|
|
|
c->lumXInc= ((srcW<<16) + (1<<15))/dstW; |
|
|
|
c->lumYInc= ((srcH<<16) + (1<<15))/dstH; |
|
|
|
c->flags= flags; |
|
|
|
c->dstFormat= dstFormat; |
|
|
|
c->srcFormat= srcFormat; |
|
|
|
|
|
|
|
if(cpuCaps.hasMMX2) |
|
|
|
{ |
|
|
|
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; |
|
|
|
if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) |
|
|
|
{ |
|
|
|
if(flags&SWS_PRINT_INFO) |
|
|
|
fprintf(stderr, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
c->canMMX2BeUsed=0; |
|
|
|
|
|
|
|
// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst |
|
|
|
// but only for the FAST_BILINEAR mode otherwise do correct scaling |
|
|
|
// n-2 is the last chrominance sample available |
|
|
|
// this is not perfect, but noone shuld notice the difference, the more correct variant |
|
|
|
// would be like the vertical one, but that would require some special code for the |
|
|
|
// first and last pixel |
|
|
|
if(flags&SWS_FAST_BILINEAR) |
|
|
|
{ |
|
|
|
if(c->canMMX2BeUsed) c->lumXInc+= 20; |
|
|
|
//we dont use the x86asm scaler if mmx is available |
|
|
|
else if(cpuCaps.hasMMX) c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; |
|
|
|
} |
|
|
|
|
|
|
|
/* set chrXInc & chrDstW */ |
|
|
|
if((flags&SWS_FULL_UV_IPOL) && dstFormat!=IMGFMT_YV12) |
|
|
|
c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW; |
|
|
|
else |
|
|
|
c->chrXInc= c->lumXInc, c->chrDstW= (dstW+1)>>1; |
|
|
|
|
|
|
|
/* set chrYInc & chrDstH */ |
|
|
|
if(dstFormat==IMGFMT_YV12) c->chrYInc= c->lumYInc, c->chrDstH= (dstH+1)>>1; |
|
|
|
else c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH; |
|
|
|
|
|
|
|
/* precalculate horizontal scaler filter coefficients */ |
|
|
|
{ |
|
|
|
const int filterAlign= cpuCaps.hasMMX ? 4 : 1; |
|
|
|
|
|
|
|
initFilter(c->hLumFilter, c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, |
|
|
|
srcW , dstW, filterAlign, 1<<14, flags); |
|
|
|
initFilter(c->hChrFilter, c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, |
|
|
|
(srcW+1)>>1, c->chrDstW, filterAlign, 1<<14, flags); |
|
|
|
|
|
|
|
#ifdef ARCH_X86 |
|
|
|
// cant downscale !!! |
|
|
|
if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) |
|
|
|
{ |
|
|
|
initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode); |
|
|
|
initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode); |
|
|
|
} |
|
|
|
#endif |
|
|
|
} // Init Horizontal stuff |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* precalculate vertical scaler filter coefficients */ |
|
|
|
initFilter(c->vLumFilter, c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, |
|
|
|
srcH , dstH, 1, (1<<12)-4, flags); |
|
|
|
initFilter(c->vChrFilter, c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, |
|
|
|
(srcH+1)>>1, c->chrDstH, 1, (1<<12)-4, flags); |
|
|
|
|
|
|
|
// Calculate Buffer Sizes so that they wont run out while handling these damn slices |
|
|
|
c->vLumBufSize= c->vLumFilterSize; |
|
|
|
c->vChrBufSize= c->vChrFilterSize; |
|
|
|
for(i=0; i<dstH; i++) |
|
|
|
{ |
|
|
|
int chrI= i*c->chrDstH / dstH; |
|
|
|
int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, |
|
|
|
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<1)); |
|
|
|
nextSlice&= ~1; // Slices start at even boundaries |
|
|
|
if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice) |
|
|
|
c->vLumBufSize= nextSlice - c->vLumFilterPos[i ]; |
|
|
|
if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>1)) |
|
|
|
c->vChrBufSize= (nextSlice>>1) - c->vChrFilterPos[chrI]; |
|
|
|
} |
|
|
|
|
|
|
|
// allocate pixbufs (we use dynamic allocation because otherwise we would need to |
|
|
|
// allocate several megabytes to handle all possible cases) |
|
|
|
for(i=0; i<c->vLumBufSize; i++) |
|
|
|
c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000); |
|
|
|
for(i=0; i<c->vChrBufSize; i++) |
|
|
|
c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000); |
|
|
|
|
|
|
|
//try to avoid drawing green stuff between the right end and the stride end |
|
|
|
for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000); |
|
|
|
for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000); |
|
|
|
|
|
|
|
ASSERT(c->chrDstH <= dstH) |
|
|
|
ASSERT(c->vLumFilterSize* dstH*4 <= SWS_MAX_SIZE*20) |
|
|
|
ASSERT(c->vChrFilterSize*c->chrDstH*4 <= SWS_MAX_SIZE*20) |
|
|
|
|
|
|
|
// pack filter data for mmx code |
|
|
|
if(cpuCaps.hasMMX) |
|
|
|
{ |
|
|
|
for(i=0; i<c->vLumFilterSize*dstH; i++) |
|
|
|
c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]= |
|
|
|
c->vLumFilter[i]; |
|
|
|
for(i=0; i<c->vChrFilterSize*c->chrDstH; i++) |
|
|
|
c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]= |
|
|
|
c->vChrFilter[i]; |
|
|
|
} |
|
|
|
|
|
|
|
if(flags&SWS_PRINT_INFO) |
|
|
|
{ |
|
|
|
#ifdef DITHER1XBPP |
|
|
|
char *dither= cpuCaps.hasMMX ? " dithered" : ""; |
|
|
|
#endif |
|
|
|
if(flags&SWS_FAST_BILINEAR) |
|
|
|
fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler "); |
|
|
|
else if(flags&SWS_BILINEAR) |
|
|
|
fprintf(stderr, "\nSwScaler: BILINEAR scaler "); |
|
|
|
else if(flags&SWS_BICUBIC) |
|
|
|
fprintf(stderr, "\nSwScaler: BICUBIC scaler "); |
|
|
|
else |
|
|
|
fprintf(stderr, "\nSwScaler: ehh flags invalid?! "); |
|
|
|
|
|
|
|
if(dstFormat==IMGFMT_BGR15) |
|
|
|
fprintf(stderr, "with%s BGR15 output ", dither); |
|
|
|
else if(dstFormat==IMGFMT_BGR16) |
|
|
|
fprintf(stderr, "with%s BGR16 output ", dither); |
|
|
|
else if(dstFormat==IMGFMT_BGR24) |
|
|
|
fprintf(stderr, "with BGR24 output "); |
|
|
|
else if(dstFormat==IMGFMT_BGR32) |
|
|
|
fprintf(stderr, "with BGR32 output "); |
|
|
|
else if(dstFormat==IMGFMT_YV12) |
|
|
|
fprintf(stderr, "with YV12 output "); |
|
|
|
else |
|
|
|
fprintf(stderr, "without output "); |
|
|
|
|
|
|
|
if(cpuCaps.hasMMX2) |
|
|
|
fprintf(stderr, "using MMX2\n"); |
|
|
|
else if(cpuCaps.has3DNow) |
|
|
|
fprintf(stderr, "using 3DNOW\n"); |
|
|
|
else if(cpuCaps.hasMMX) |
|
|
|
fprintf(stderr, "using MMX\n"); |
|
|
|
else |
|
|
|
fprintf(stderr, "using C\n"); |
|
|
|
} |
|
|
|
|
|
|
|
if((flags & SWS_PRINT_INFO) && verbose) |
|
|
|
{ |
|
|
|
if(cpuCaps.hasMMX) |
|
|
|
{ |
|
|
|
if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) |
|
|
|
printf("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); |
|
|
|
else |
|
|
|
{ |
|
|
|
if(c->hLumFilterSize==4) |
|
|
|
printf("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); |
|
|
|
else if(c->hLumFilterSize==8) |
|
|
|
printf("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); |
|
|
|
else |
|
|
|
printf("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); |
|
|
|
|
|
|
|
if(c->hChrFilterSize==4) |
|
|
|
printf("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); |
|
|
|
else if(c->hChrFilterSize==8) |
|
|
|
printf("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); |
|
|
|
else |
|
|
|
printf("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
#ifdef ARCH_X86 |
|
|
|
printf("SwScaler: using X86-Asm scaler for horizontal scaling\n"); |
|
|
|
#else |
|
|
|
if(flags & SWS_FAST_BILINEAR) |
|
|
|
printf("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); |
|
|
|
else |
|
|
|
printf("SwScaler: using C scaler for horizontal scaling\n"); |
|
|
|
#endif |
|
|
|
} |
|
|
|
if(dstFormat==IMGFMT_YV12) |
|
|
|
{ |
|
|
|
if(c->vLumFilterSize==1) |
|
|
|
printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); |
|
|
|
else |
|
|
|
printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
if(c->vLumFilterSize==1 && c->vChrFilterSize==2) |
|
|
|
printf("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" |
|
|
|
"SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C"); |
|
|
|
else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) |
|
|
|
printf("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); |
|
|
|
else |
|
|
|
printf("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); |
|
|
|
} |
|
|
|
|
|
|
|
if(dstFormat==IMGFMT_BGR24) |
|
|
|
printf("SwScaler: using %s YV12->BGR24 Converter\n", |
|
|
|
cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C")); |
|
|
|
else |
|
|
|
printf("SwScaler: using %s YV12->BGR Converter\n", cpuCaps.hasMMX ? "MMX" : "C");//FIXME print format |
|
|
|
|
|
|
|
printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); |
|
|
|
} |
|
|
|
|
|
|
|
return c; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* returns a normalized gaussian curve used to filter stuff |
|
|
|
* quality=3 is high quality, lowwer is lowwer quality |
|
|
|
*/ |
|
|
|
double *getGaussian(double variance, double quality){ |
|
|
|
const int length= (int)(variance*quality + 0.5) | 1; |
|
|
|
int i; |
|
|
|
double *coeff= memalign(sizeof(double), length*sizeof(double)); |
|
|
|
double middle= (length-1)*0.5; |
|
|
|
|
|
|
|
for(i=0; i<length; i++) |
|
|
|
{ |
|
|
|
double dist= i-middle; |
|
|
|
coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI); |
|
|
|
} |
|
|
|
|
|
|
|
normalize(coeff, length, 1.0); |
|
|
|
return coeff; |
|
|
|
} |
|
|
|
|
|
|
|
void normalize(double *coeff, int length, double height){ |
|
|
|
int i; |
|
|
|
double sum=0; |
|
|
|
double inv; |
|
|
|
|
|
|
|
for(i=0; i<length; i++) |
|
|
|
sum+= coeff[i]; |
|
|
|
|
|
|
|
inv= height/sum; |
|
|
|
|
|
|
|
for(i=0; i<length; i++) |
|
|
|
coeff[i]*= height; |
|
|
|
} |
|
|
|
|
|
|
|
double *conv(double *a, int aLength, double *b, int bLength){ |
|
|
|
int length= aLength + bLength - 1; |
|
|
|
double *coeff= memalign(sizeof(double), length*sizeof(double)); |
|
|
|
int i, j; |
|
|
|
|
|
|
|
for(i=0; i<length; i++) coeff[i]= 0.0; |
|
|
|
|
|
|
|
for(i=0; i<aLength; i++) |
|
|
|
{ |
|
|
|
for(j=0; j<bLength; j++) |
|
|
|
{ |
|
|
|
coeff[i+j]+= a[i]*b[j]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return coeff; |
|
|
|
} |
|
|
|
|
|
|
|
/* |
|
|
|
double *sum(double *a, int aLength, double *b, int bLength){ |
|
|
|
int length= MAX(aLength, bLength); |
|
|
|
double *coeff= memalign(sizeof(double), length*sizeof(double)); |
|
|
|
int i; |
|
|
|
|
|
|
|
for(i=0; i<length; i++) coeff[i]= 0.0; |
|
|
|
|
|
|
|
for(i=0; i<aLength; i++) coeff[i]+= a[i]; |
|
|
|
} |
|
|
|
*/ |