X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fimgresample.c;h=8da1b4d315f3fb9392c60bafafe55437f8ac56cc;hb=d3b4b74c32cf302d36a4c4d2cce08027f0a22560;hp=f69a5d5505ed10bbe3de9ba45272c0501bedc70e;hpb=5b43487d23204001070efd44430134b5ea8e0087;p=ffmpeg diff --git a/libavcodec/imgresample.c b/libavcodec/imgresample.c index f69a5d5505e..8da1b4d315f 100644 --- a/libavcodec/imgresample.c +++ b/libavcodec/imgresample.c @@ -25,8 +25,12 @@ */ #include "avcodec.h" -#include "swscale.h" #include "dsputil.h" +#include "libswscale/swscale.h" + +#ifdef HAVE_ALTIVEC +#include "ppc/imgresample_altivec.h" +#endif #define NB_COMPONENTS 3 @@ -44,7 +48,7 @@ #define LINE_BUF_HEIGHT (NB_TAPS * 4) struct SwsContext { - AVClass *av_class; + const AVClass *av_class; struct ImgReSampleContext *resampling_ctx; enum PixelFormat src_pix_fmt, dst_pix_fmt; }; @@ -281,134 +285,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, } #endif /* HAVE_MMX */ -#ifdef HAVE_ALTIVEC -typedef union { - vector unsigned char v; - unsigned char c[16]; -} vec_uc_t; - -typedef union { - vector signed short v; - signed short s[8]; -} vec_ss_t; - -void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, - int wrap, int16_t *filter) -{ - int sum, i; - const uint8_t *s; - vector unsigned char *tv, tmp, dstv, zero; - vec_ss_t srchv[4], srclv[4], fv[4]; - vector signed short zeros, sumhv, sumlv; - s = src; - - for(i=0;i<4;i++) - { - /* - The vec_madds later on does an implicit >>15 on the result. - Since FILTER_BITS is 8, and we have 15 bits of magnitude in - a signed short, we have just enough bits to pre-shift our - filter constants <<7 to compensate for vec_madds. - */ - fv[i].s[0] = filter[i] << (15-FILTER_BITS); - fv[i].v = vec_splat(fv[i].v, 0); - } - - zero = vec_splat_u8(0); - zeros = vec_splat_s16(0); - - - /* - When we're resampling, we'd ideally like both our input buffers, - and output buffers to be 16-byte aligned, so we can do both aligned - reads and writes. Sadly we can't always have this at the moment, so - we opt for aligned writes, as unaligned writes have a huge overhead. - To do this, do enough scalar resamples to get dst 16-byte aligned. - */ - i = (-(int)dst) & 0xf; - while(i>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - i--; - } - - /* Do our altivec resampling on 16 pixels at once. */ - while(dst_width>=16) { - /* - Read 16 (potentially unaligned) bytes from each of - 4 lines into 4 vectors, and split them into shorts. - Interleave the multipy/accumulate for the resample - filter with the loads to hide the 3 cycle latency - the vec_madds have. - */ - tv = (vector unsigned char *) &s[0 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); - srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[0].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); - sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); - - tv = (vector unsigned char *) &s[1 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); - srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[1].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); - sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); - - tv = (vector unsigned char *) &s[2 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); - srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[2].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); - sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); - - tv = (vector unsigned char *) &s[3 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); - srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[3].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); - sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); - - /* - Pack the results into our destination vector, - and do an aligned write of that back to memory. - */ - dstv = vec_packsu(sumhv, sumlv) ; - vec_st(dstv, 0, (vector unsigned char *) dst); - - dst+=16; - s+=16; - dst_width-=16; - } - - /* - If there are any leftover pixels, resample them - with the slow scalar method. - */ - while(dst_width>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - } -} -#endif /* HAVE_ALTIVEC */ - -/* slow version to handle limit cases. Does not need optimisation */ +/* slow version to handle limit cases. Does not need optimization */ static void h_resample_slow(uint8_t *dst, int dst_width, const uint8_t *src, int src_width, int src_start, int src_incr, int16_t *filters) @@ -514,7 +391,7 @@ static void component_resample(ImgReSampleContext *s, h_resample(new_line, owidth, src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, &s->h_filters[0][0]); - /* handle ring buffer wraping */ + /* handle ring buffer wrapping */ if (ring_y >= LINE_BUF_HEIGHT) { memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, new_line, owidth); @@ -570,7 +447,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, if (!s) return NULL; if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) - return NULL; + goto fail; s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); if (!s->line_buf) goto fail; @@ -635,6 +512,13 @@ void img_resample_close(ImgReSampleContext *s) av_free(s); } +static const char *context_to_name(void* ptr) +{ + return "imgconvert"; +} + +static const AVClass context_class = { "imgresample", context_to_name, NULL }; + struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, SwsFilter *srcFilter, @@ -643,17 +527,16 @@ struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, struct SwsContext *ctx; ctx = av_malloc(sizeof(struct SwsContext)); - if (ctx) - ctx->av_class = av_mallocz(sizeof(AVClass)); - if (!ctx || !ctx->av_class) { + if (!ctx) { av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); return NULL; } + ctx->av_class = &context_class; if ((srcH != dstH) || (srcW != dstW)) { if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) { - av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n"); + av_log(ctx, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n"); } ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH); } else { @@ -679,7 +562,6 @@ void sws_freeContext(struct SwsContext *ctx) } else { av_free(ctx->resampling_ctx); } - av_free(ctx->av_class); av_free(ctx); } @@ -757,7 +639,7 @@ int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], &src_pict, ctx->src_pix_fmt, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) { - av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n"); res = -1; goto the_end; } @@ -796,7 +678,7 @@ int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], resampled_picture, current_pix_fmt, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) { - av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + av_log(ctx, AV_LOG_ERROR, "pixel format conversion not handled\n"); res = -1; goto the_end;