av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
{
- int i, j;
+ int i;
int usesVFilter, usesHFilter;
int unscaled;
SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
{
const int filterAlign = X86_MMX(cpu_flags) ? 4 :
PPC_ALTIVEC(cpu_flags) ? 8 :
- have_neon(cpu_flags) ? 4 : 1;
+ have_neon(cpu_flags) ? 8 : 1;
if ((ret = initFilter(&c->hLumFilter, &c->hLumFilterPos,
&c->hLumFilterSize, c->lumXInc,
/* precalculate vertical scaler filter coefficients */
{
const int filterAlign = X86_MMX(cpu_flags) ? 2 :
- PPC_ALTIVEC(cpu_flags) ? 8 : 1;
+ PPC_ALTIVEC(cpu_flags) ? 8 :
+ have_neon(cpu_flags) ? 2 : 1;
if ((ret = initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize,
c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
#endif
}
- // calculate buffer sizes so that they won't run out while handling these damn slices
- c->vLumBufSize = c->vLumFilterSize;
- c->vChrBufSize = c->vChrFilterSize;
- for (i = 0; i < dstH; i++) {
- int chrI = (int64_t)i * c->chrDstH / dstH;
- int nextSlice = FFMAX(c->vLumFilterPos[i] + c->vLumFilterSize - 1,
- ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)
- << c->chrSrcVSubSample));
-
- nextSlice >>= c->chrSrcVSubSample;
- nextSlice <<= c->chrSrcVSubSample;
- if (c->vLumFilterPos[i] + c->vLumBufSize < nextSlice)
- c->vLumBufSize = nextSlice - c->vLumFilterPos[i];
- if (c->vChrFilterPos[chrI] + c->vChrBufSize <
- (nextSlice >> c->chrSrcVSubSample))
- c->vChrBufSize = (nextSlice >> c->chrSrcVSubSample) -
- c->vChrFilterPos[chrI];
- }
-
for (i = 0; i < 4; i++)
FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);
- /* Allocate pixbufs (we use dynamic allocation because otherwise we would
- * need to allocate several megabytes to handle all possible cases) */
- FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
- FF_ALLOCZ_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
- FF_ALLOCZ_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize * 3 * sizeof(int16_t *), fail);
- if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
- FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
- /* Note we need at least one pixel more at the end because of the MMX code
- * (just in case someone wants to replace the 4000/8000). */
- /* align at 16 bytes for AltiVec */
- for (i = 0; i < c->vLumBufSize; i++) {
- FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i + c->vLumBufSize],
- dst_stride + 16, fail);
- c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
- }
+ c->needAlpha = (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) ? 1 : 0;
+
// 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
c->uv_offx2 = dst_stride + 16;
- for (i = 0; i < c->vChrBufSize; i++) {
- FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize],
- dst_stride * 2 + 32, fail);
- c->chrUPixBuf[i] = c->chrUPixBuf[i + c->vChrBufSize];
- c->chrVPixBuf[i] = c->chrVPixBuf[i + c->vChrBufSize]
- = c->chrUPixBuf[i] + (dst_stride >> 1) + 8;
- }
- if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
- for (i = 0; i < c->vLumBufSize; i++) {
- FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i + c->vLumBufSize],
- dst_stride + 16, fail);
- c->alpPixBuf[i] = c->alpPixBuf[i + c->vLumBufSize];
- }
-
- // try to avoid drawing green stuff between the right end and the stride end
- for (i = 0; i < c->vChrBufSize; i++)
- if(desc_dst->comp[0].depth == 16){
- av_assert0(c->dstBpc > 14);
- for(j=0; j<dst_stride/2+1; j++)
- ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
- } else
- for(j=0; j<dst_stride+1; j++)
- ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
av_assert0(c->chrDstH <= dstH);
if (!c)
return;
- if (c->lumPixBuf) {
- for (i = 0; i < c->vLumBufSize; i++)
- av_freep(&c->lumPixBuf[i]);
- av_freep(&c->lumPixBuf);
- }
-
- if (c->chrUPixBuf) {
- for (i = 0; i < c->vChrBufSize; i++)
- av_freep(&c->chrUPixBuf[i]);
- av_freep(&c->chrUPixBuf);
- av_freep(&c->chrVPixBuf);
- }
-
- if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
- for (i = 0; i < c->vLumBufSize; i++)
- av_freep(&c->alpPixBuf[i]);
- av_freep(&c->alpPixBuf);
- }
-
for (i = 0; i < 4; i++)
av_freep(&c->dither_error[i]);