X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fv210enc.c;h=ca6ad2ee2f2aeef697241d1cca39191931adbb76;hb=e7078e842d93436edba1f30af1f9869d3913f7fe;hp=714b6fb55142bbbae2ea1a8c31133226e6a4db43;hpb=eeb9e61a518f6b3a1a85dd44e44d55e395e015f6;p=ffmpeg diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c index 714b6fb5514..ca6ad2ee2f2 100644 --- a/libavcodec/v210enc.c +++ b/libavcodec/v210enc.c @@ -23,109 +23,208 @@ #include "avcodec.h" #include "bytestream.h" +#include "internal.h" +#include "v210enc.h" -static av_cold int encode_init(AVCodecContext *avctx) +#define CLIP(v) av_clip(v, 4, 1019) +#define CLIP8(v) av_clip(v, 1, 254) + +#define WRITE_PIXELS(a, b, c) \ + do { \ + val = CLIP(*a++); \ + val |= (CLIP(*b++) << 10) | \ + (CLIP(*c++) << 20); \ + AV_WL32(dst, val); \ + dst += 4; \ + } while (0) + +#define WRITE_PIXELS8(a, b, c) \ + do { \ + val = (CLIP8(*a++) << 2); \ + val |= (CLIP8(*b++) << 12) | \ + (CLIP8(*c++) << 22); \ + AV_WL32(dst, val); \ + dst += 4; \ + } while (0) + +static void v210_planar_pack_8_c(const uint8_t *y, const uint8_t *u, + const uint8_t *v, uint8_t *dst, + ptrdiff_t width) { - if (avctx->width & 1) { - av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); - return AVERROR(EINVAL); + uint32_t val; + int i; + + /* unroll this to match the assembly */ + for (i = 0; i < width - 11; i += 12) { + WRITE_PIXELS8(u, y, v); + WRITE_PIXELS8(y, u, y); + WRITE_PIXELS8(v, y, u); + WRITE_PIXELS8(y, v, y); + WRITE_PIXELS8(u, y, v); + WRITE_PIXELS8(y, u, y); + WRITE_PIXELS8(v, y, u); + WRITE_PIXELS8(y, v, y); } +} - if (avctx->pix_fmt != PIX_FMT_YUV422P10) { - av_log(avctx, AV_LOG_ERROR, "v210 needs YUV422P10\n"); - return -1; +static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u, + const uint16_t *v, uint8_t *dst, + ptrdiff_t width) +{ + uint32_t val; + int i; + + for (i = 0; i < width - 5; i += 6) { + WRITE_PIXELS(u, y, v); + WRITE_PIXELS(y, u, y); + WRITE_PIXELS(v, y, u); + WRITE_PIXELS(y, v, y); } +} + +av_cold void ff_v210enc_init(V210EncContext *s) +{ + s->pack_line_8 = v210_planar_pack_8_c; + s->pack_line_10 = v210_planar_pack_10_c; - if (avctx->bits_per_raw_sample != 10) - av_log(avctx, AV_LOG_WARNING, "bits per raw sample: %d != 10-bit\n", - avctx->bits_per_raw_sample); + if (ARCH_X86) + ff_v210enc_init_x86(s); +} - avctx->coded_frame = avcodec_alloc_frame(); - if (!avctx->coded_frame) - return AVERROR(ENOMEM); +static av_cold int encode_init(AVCodecContext *avctx) +{ + V210EncContext *s = avctx->priv_data; + if (avctx->width & 1) { + av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n"); + return AVERROR(EINVAL); + } + +#if FF_API_CODED_FRAME +FF_DISABLE_DEPRECATION_WARNINGS avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + ff_v210enc_init(s); return 0; } -static int encode_frame(AVCodecContext *avctx, unsigned char *buf, - int buf_size, void *data) +static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *pic, int *got_packet) { - const AVFrame *pic = data; + V210EncContext *s = avctx->priv_data; int aligned_width = ((avctx->width + 47) / 48) * 48; int stride = aligned_width * 8 / 3; int line_padding = stride - ((avctx->width * 8 + 11) / 12) * 4; - int h, w; - const uint16_t *y = (const uint16_t*)pic->data[0]; - const uint16_t *u = (const uint16_t*)pic->data[1]; - const uint16_t *v = (const uint16_t*)pic->data[2]; - PutByteContext p; - - if (buf_size < avctx->height * stride) { - av_log(avctx, AV_LOG_ERROR, "output buffer too small\n"); - return AVERROR(ENOMEM); - } - - bytestream2_init_writer(&p, buf, buf_size); - -#define CLIP(v) av_clip(v, 4, 1019) - -#define WRITE_PIXELS(a, b, c) \ - do { \ - val = CLIP(*a++); \ - val |= (CLIP(*b++) << 10) | \ - (CLIP(*c++) << 20); \ - bytestream2_put_le32u(&p, val); \ - } while (0) - - for (h = 0; h < avctx->height; h++) { - uint32_t val; - for (w = 0; w < avctx->width - 5; w += 6) { - WRITE_PIXELS(u, y, v); - WRITE_PIXELS(y, u, y); - WRITE_PIXELS(v, y, u); - WRITE_PIXELS(y, v, y); - } - if (w < avctx->width - 1) { - WRITE_PIXELS(u, y, v); + int h, w, ret; + uint8_t *dst; - val = CLIP(*y++); - if (w == avctx->width - 2) - bytestream2_put_le32u(&p, val); + ret = ff_alloc_packet(pkt, avctx->height * stride); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n"); + return ret; + } + dst = pkt->data; + + if (pic->format == AV_PIX_FMT_YUV422P10) { + const uint16_t *y = (const uint16_t *)pic->data[0]; + const uint16_t *u = (const uint16_t *)pic->data[1]; + const uint16_t *v = (const uint16_t *)pic->data[2]; + for (h = 0; h < avctx->height; h++) { + uint32_t val; + w = (avctx->width / 6) * 6; + s->pack_line_10(y, u, v, dst, w); + + y += w; + u += w >> 1; + v += w >> 1; + dst += (w / 6) * 16; + if (w < avctx->width - 1) { + WRITE_PIXELS(u, y, v); + + val = CLIP(*y++); + if (w == avctx->width - 2) { + AV_WL32(dst, val); + dst += 4; + } + } + if (w < avctx->width - 3) { + val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20); + AV_WL32(dst, val); + dst += 4; + + val = CLIP(*v++) | (CLIP(*y++) << 10); + AV_WL32(dst, val); + dst += 4; + } + + memset(dst, 0, line_padding); + dst += line_padding; + y += pic->linesize[0] / 2 - avctx->width; + u += pic->linesize[1] / 2 - avctx->width / 2; + v += pic->linesize[2] / 2 - avctx->width / 2; } - if (w < avctx->width - 3) { - val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20); - bytestream2_put_le32u(&p, val); - - val = CLIP(*v++) | (CLIP(*y++) << 10); - bytestream2_put_le32u(&p, val); + } else if(pic->format == AV_PIX_FMT_YUV422P) { + const uint8_t *y = pic->data[0]; + const uint8_t *u = pic->data[1]; + const uint8_t *v = pic->data[2]; + for (h = 0; h < avctx->height; h++) { + uint32_t val; + w = (avctx->width / 12) * 12; + s->pack_line_8(y, u, v, dst, w); + + y += w; + u += w >> 1; + v += w >> 1; + dst += (w / 12) * 32; + + for (; w < avctx->width - 5; w += 6) { + WRITE_PIXELS8(u, y, v); + WRITE_PIXELS8(y, u, y); + WRITE_PIXELS8(v, y, u); + WRITE_PIXELS8(y, v, y); + } + if (w < avctx->width - 1) { + WRITE_PIXELS8(u, y, v); + + val = CLIP8(*y++) << 2; + if (w == avctx->width - 2) { + AV_WL32(dst, val); + dst += 4; + } + } + if (w < avctx->width - 3) { + val |= (CLIP8(*u++) << 12) | (CLIP8(*y++) << 22); + AV_WL32(dst, val); + dst += 4; + + val = (CLIP8(*v++) << 2) | (CLIP8(*y++) << 12); + AV_WL32(dst, val); + dst += 4; + } + memset(dst, 0, line_padding); + dst += line_padding; + + y += pic->linesize[0] - avctx->width; + u += pic->linesize[1] - avctx->width / 2; + v += pic->linesize[2] - avctx->width / 2; } - - bytestream2_set_buffer(&p, 0, line_padding); - - y += pic->linesize[0] / 2 - avctx->width; - u += pic->linesize[1] / 2 - avctx->width / 2; - v += pic->linesize[2] / 2 - avctx->width / 2; } - return bytestream2_tell_p(&p); -} - -static av_cold int encode_close(AVCodecContext *avctx) -{ - av_freep(&avctx->coded_frame); - + pkt->flags |= AV_PKT_FLAG_KEY; + *got_packet = 1; return 0; } AVCodec ff_v210_encoder = { .name = "v210", + .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), .type = AVMEDIA_TYPE_VIDEO, - .id = CODEC_ID_V210, + .id = AV_CODEC_ID_V210, + .priv_data_size = sizeof(V210EncContext), .init = encode_init, - .encode = encode_frame, - .close = encode_close, - .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P10, PIX_FMT_NONE}, - .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"), + .encode2 = encode_frame, + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P, AV_PIX_FMT_NONE }, };