diff --git a/postproc/swscale.c b/postproc/swscale.c index e4537f7bf2..6f9c203a2a 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -2110,6 +2110,25 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, srcFilter->chrV, dstFilter->chrV, c->param); + +#ifdef HAVE_ALTIVEC + c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH); + c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->dstH); + + for (i=0;ivLumFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vYCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vLumFilter[i]; + } + + for (i=0;ivChrFilterSize*c->dstH;i++) { + int j; + short *p = (short *)&c->vCCoeffsBank[i]; + for (j=0;j<8;j++) + p[j] = c->vChrFilter[i]; + } +#endif } // Calculate Buffer Sizes so that they won't run out while handling these damn slices @@ -2644,6 +2663,12 @@ void sws_freeContext(SwsContext *c){ c->hLumFilter = NULL; if(c->hChrFilter) free(c->hChrFilter); c->hChrFilter = NULL; +#ifdef HAVE_ALTIVEC + if(c->vYCoeffsBank) free(c->vYCoeffsBank); + c->vYCoeffsBank = NULL; + if(c->vCCoeffsBank) free(c->vCCoeffsBank); + c->vCCoeffsBank = NULL; +#endif if(c->vLumFilterPos) free(c->vLumFilterPos); c->vLumFilterPos = NULL; diff --git a/postproc/swscale_internal.h b/postproc/swscale_internal.h index c6611da509..b4e1dbeea1 100644 --- a/postproc/swscale_internal.h +++ b/postproc/swscale_internal.h @@ -154,6 +154,7 @@ typedef struct SwsContext{ vector signed short CGV; vector signed short OY; vector unsigned short CSHIFT; + vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif diff --git a/postproc/yuv2rgb_altivec.c b/postproc/yuv2rgb_altivec.c index 2d2f7766b2..dee68b2f2e 100644 --- a/postproc/yuv2rgb_altivec.c +++ b/postproc/yuv2rgb_altivec.c @@ -774,8 +774,6 @@ altivec_yuv2packedX (SwsContext *c, uint8_t *dest, int dstW, int dstY) { int i,j; - short tmp __attribute__((aligned (16))); - int16_t *p; short *f; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; @@ -787,29 +785,10 @@ altivec_yuv2packedX (SwsContext *c, vector unsigned short SCL = vec_splat((vector unsigned short)AVV(4),0); unsigned long scratch[16] __attribute__ ((aligned (16))); - vector signed short *vYCoeffsBank, *vCCoeffsBank; - vector signed short *YCoeffs, *CCoeffs; - vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH); - vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH); - - for (i=0;idstH;i++) { - tmp = c->vLumFilter[i]; - p = &vYCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - for (i=0;idstH;i++) { - tmp = c->vChrFilter[i]; - p = &vCCoeffsBank[i]; - for (j=0;j<8;j++) - p[j] = tmp; - } - - YCoeffs = vYCoeffsBank+dstY*lumFilterSize; - CCoeffs = vCCoeffsBank+dstY*chrFilterSize; + YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; + CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; out = (vector unsigned char *)dest; @@ -962,7 +941,4 @@ altivec_yuv2packedX (SwsContext *c, memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); } - if (vYCoeffsBank) free (vYCoeffsBank); - if (vCCoeffsBank) free (vCCoeffsBank); - }