once when the scaler is initialized, instead of building them and freeing them over and over. This gives massive performance improvements. patch by Alan Curry, pacman*at*TheWorld*dot*com Originally committed as revision 17589 to svn://svn.mplayerhq.hu/mplayer/trunk/postproctags/v0.5
| @@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int | |||||
| c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, | c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, | ||||
| (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, | (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, | ||||
| srcFilter->chrV, dstFilter->chrV, c->param); | srcFilter->chrV, dstFilter->chrV, c->param); | ||||
| #ifdef HAVE_ALTIVEC | |||||
| c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH); | |||||
| c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH); | |||||
| for (i=0;i<c->vLumFilterSize*c->dstH;i++) { | |||||
| int j; | |||||
| short *p = (short *)&c->vYCoeffsBank[i]; | |||||
| for (j=0;j<8;j++) | |||||
| p[j] = c->vLumFilter[i]; | |||||
| } | |||||
| for (i=0;i<c->vChrFilterSize*c->dstH;i++) { | |||||
| int j; | |||||
| short *p = (short *)&c->vCCoeffsBank[i]; | |||||
| for (j=0;j<8;j++) | |||||
| p[j] = c->vChrFilter[i]; | |||||
| } | |||||
| #endif | |||||
| } | } | ||||
| // Calculate Buffer Sizes so that they won't run out while handling these damn slices | // Calculate Buffer Sizes so that they won't run out while handling these damn slices | ||||
| @@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){ | |||||
| c->hLumFilter = NULL; | c->hLumFilter = NULL; | ||||
| if(c->hChrFilter) free(c->hChrFilter); | if(c->hChrFilter) free(c->hChrFilter); | ||||
| c->hChrFilter = NULL; | c->hChrFilter = NULL; | ||||
| #ifdef HAVE_ALTIVEC | |||||
| if(c->vYCoeffsBank) free(c->vYCoeffsBank); | |||||
| c->vYCoeffsBank = NULL; | |||||
| if(c->vCCoeffsBank) free(c->vCCoeffsBank); | |||||
| c->vCCoeffsBank = NULL; | |||||
| #endif | |||||
| if(c->vLumFilterPos) free(c->vLumFilterPos); | if(c->vLumFilterPos) free(c->vLumFilterPos); | ||||
| c->vLumFilterPos = NULL; | c->vLumFilterPos = NULL; | ||||
| @@ -154,6 +154,7 @@ typedef struct SwsContext{ | |||||
| vector signed short CGV; | vector signed short CGV; | ||||
| vector signed short OY; | vector signed short OY; | ||||
| vector unsigned short CSHIFT; | vector unsigned short CSHIFT; | ||||
| vector signed short *vYCoeffsBank, *vCCoeffsBank; | |||||
| #endif | #endif | ||||
| @@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c, | |||||
| uint8_t *dest, int dstW, int dstY) | uint8_t *dest, int dstW, int dstY) | ||||
| { | { | ||||
| int i,j; | int i,j; | ||||
| short tmp __attribute__((aligned (16))); | |||||
| int16_t *p; | |||||
| short *f; | short *f; | ||||
| vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; | vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; | ||||
| vector signed short R0,G0,B0,R1,G1,B1; | vector signed short R0,G0,B0,R1,G1,B1; | ||||
| @@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c, | |||||
| vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); | vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); | ||||
| unsigned long scratch[16] __attribute__ ((aligned (16))); | unsigned long scratch[16] __attribute__ ((aligned (16))); | ||||
| vector signed short *vYCoeffsBank, *vCCoeffsBank; | |||||
| vector signed short *YCoeffs, *CCoeffs; | vector signed short *YCoeffs, *CCoeffs; | ||||
| vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH); | |||||
| vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH); | |||||
| for (i=0;i<lumFilterSize*c->dstH;i++) { | |||||
| tmp = c->vLumFilter[i]; | |||||
| p = &vYCoeffsBank[i]; | |||||
| for (j=0;j<8;j++) | |||||
| p[j] = tmp; | |||||
| } | |||||
| for (i=0;i<chrFilterSize*c->dstH;i++) { | |||||
| tmp = c->vChrFilter[i]; | |||||
| p = &vCCoeffsBank[i]; | |||||
| for (j=0;j<8;j++) | |||||
| p[j] = tmp; | |||||
| } | |||||
| YCoeffs = vYCoeffsBank+dstY*lumFilterSize; | |||||
| CCoeffs = vCCoeffsBank+dstY*chrFilterSize; | |||||
| YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; | |||||
| CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; | |||||
| out = (vector unsigned char *)dest; | out = (vector unsigned char *)dest; | ||||
| @@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c, | |||||
| memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); | memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); | ||||
| } | } | ||||
| if (vYCoeffsBank) free (vYCoeffsBank); | |||||
| if (vCCoeffsBank) free (vCCoeffsBank); | |||||
| } | } | ||||