X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fimgresample.c;h=d1d6757c6c23747f495807645b6958865d8fd282;hb=63d2bad846e279dbf2cc3261f6e24c405c68b17a;hp=84336ef5900f043af0bf9897592089dd4d704e25;hpb=a163ed1aaae3d15ad59ca176f5f468a5e883c2cb;p=ffmpeg diff --git a/libavcodec/imgresample.c b/libavcodec/imgresample.c index 84336ef5900..d1d6757c6c2 100644 --- a/libavcodec/imgresample.c +++ b/libavcodec/imgresample.c @@ -28,8 +28,8 @@ #include "swscale.h" #include "dsputil.h" -#ifdef USE_FASTMEMCPY -#include "libvo/fastmemcpy.h" +#ifdef HAVE_ALTIVEC +#include "ppc/imgresample_altivec.h" #endif #define NB_COMPONENTS 3 @@ -48,6 +48,7 @@ #define LINE_BUF_HEIGHT (NB_TAPS * 4) struct SwsContext { + AVClass *av_class; struct ImgReSampleContext *resampling_ctx; enum PixelFormat src_pix_fmt, dst_pix_fmt; }; @@ -171,7 +172,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, src_pos += src_incr;\ } -#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); +#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq); /* XXX: do four pixels at a time */ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, @@ -282,134 +283,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, } emms(); } -#endif - -#ifdef HAVE_ALTIVEC -typedef union { - vector unsigned char v; - unsigned char c[16]; -} vec_uc_t; - -typedef union { - vector signed short v; - signed short s[8]; -} vec_ss_t; - -void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, - int wrap, int16_t *filter) -{ - int sum, i; - const uint8_t *s; - vector unsigned char *tv, tmp, dstv, zero; - vec_ss_t srchv[4], srclv[4], fv[4]; - vector signed short zeros, sumhv, sumlv; - s = src; - - for(i=0;i<4;i++) - { - /* - The vec_madds later on does an implicit >>15 on the result. - Since FILTER_BITS is 8, and we have 15 bits of magnitude in - a signed short, we have just enough bits to pre-shift our - filter constants <<7 to compensate for vec_madds. - */ - fv[i].s[0] = filter[i] << (15-FILTER_BITS); - fv[i].v = vec_splat(fv[i].v, 0); - } - - zero = vec_splat_u8(0); - zeros = vec_splat_s16(0); - - - /* - When we're resampling, we'd ideally like both our input buffers, - and output buffers to be 16-byte aligned, so we can do both aligned - reads and writes. Sadly we can't always have this at the moment, so - we opt for aligned writes, as unaligned writes have a huge overhead. - To do this, do enough scalar resamples to get dst 16-byte aligned. - */ - i = (-(int)dst) & 0xf; - while(i>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - i--; - } - - /* Do our altivec resampling on 16 pixels at once. */ - while(dst_width>=16) { - /* - Read 16 (potentially unaligned) bytes from each of - 4 lines into 4 vectors, and split them into shorts. - Interleave the multipy/accumulate for the resample - filter with the loads to hide the 3 cycle latency - the vec_madds have. - */ - tv = (vector unsigned char *) &s[0 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); - srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[0].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); - sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); - - tv = (vector unsigned char *) &s[1 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); - srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[1].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); - sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); - - tv = (vector unsigned char *) &s[2 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); - srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[2].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); - sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); - - tv = (vector unsigned char *) &s[3 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); - srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[3].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); - sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); - - /* - Pack the results into our destination vector, - and do an aligned write of that back to memory. - */ - dstv = vec_packsu(sumhv, sumlv) ; - vec_st(dstv, 0, (vector unsigned char *) dst); - - dst+=16; - s+=16; - dst_width-=16; - } - - /* - If there are any leftover pixels, resample them - with the slow scalar method. - */ - while(dst_width>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - } -} -#endif +#endif /* HAVE_MMX */ /* slow version to handle limit cases. Does not need optimisation */ static void h_resample_slow(uint8_t *dst, int dst_width, @@ -646,7 +520,9 @@ struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, struct SwsContext *ctx; ctx = av_malloc(sizeof(struct SwsContext)); - if (ctx == NULL) { + if (ctx) + ctx->av_class = av_mallocz(sizeof(AVClass)); + if (!ctx || !ctx->av_class) { av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); return NULL; @@ -672,12 +548,15 @@ struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, void sws_freeContext(struct SwsContext *ctx) { + if (!ctx) + return; if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { img_resample_close(ctx->resampling_ctx); } else { av_free(ctx->resampling_ctx); } + av_free(ctx->av_class); av_free(ctx); } @@ -727,7 +606,7 @@ int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], uint8_t *buf1 = NULL, *buf2 = NULL; enum PixelFormat current_pix_fmt; - for (i = 0; i < 3; i++) { + for (i = 0; i < 4; i++) { src_pict.data[i] = src[i]; src_pict.linesize[i] = srcStride[i]; dst_pict.data[i] = dst[i]; @@ -800,7 +679,7 @@ int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], goto the_end; } } else if (resampled_picture != &dst_pict) { - img_copy(&dst_pict, resampled_picture, current_pix_fmt, + av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); } @@ -813,6 +692,7 @@ the_end: #ifdef TEST #include +#undef exit /* input */ #define XSIZE 256 @@ -940,8 +820,8 @@ int main(int argc, char **argv) exit(1); } av_log(NULL, AV_LOG_INFO, "MMX OK\n"); -#endif +#endif /* HAVE_MMX */ return 0; } -#endif +#endif /* TEST */