2 * QuickTime RPZA Video Encoder
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * QT RPZA Video Encoder by Todd Kirby <doubleshot@pacbell.net> and David Adler
26 #include "libavutil/avassert.h"
27 #include "libavutil/common.h"
28 #include "libavutil/opt.h"
34 typedef struct RpzaContext {
37 int skip_frame_thresh;
38 int start_one_color_thresh;
39 int continue_one_color_thresh;
40 int sixteen_color_thresh;
42 AVFrame *prev_frame; // buffer for previous source frame
43 PutBitContext pb; // buffer for encoded frame data.
45 int frame_width; // width in pixels of source frame
46 int frame_height; // height in pixesl of source frame
48 int first_frame; // flag set to one when the first frame is being processed
49 // so that comparisons with previous frame data in not attempted
52 typedef enum channel_offset {
64 #define SQR(x) ((x) * (x))
66 /* 15 bit components */
67 #define GET_CHAN(color, chan) (((color) >> ((chan) * 5) & 0x1F) * 8)
68 #define R(color) GET_CHAN(color, RED)
69 #define G(color) GET_CHAN(color, GREEN)
70 #define B(color) GET_CHAN(color, BLUE)
72 typedef struct BlockInfo {
86 static void get_colors(uint8_t *min, uint8_t *max, uint8_t color4[4][3])
90 color4[0][0] = min[0];
91 color4[0][1] = min[1];
92 color4[0][2] = min[2];
94 color4[3][0] = max[0];
95 color4[3][1] = max[1];
96 color4[3][2] = max[2];
99 step = (color4[3][0] - color4[0][0] + 1) / 3;
100 color4[1][0] = color4[0][0] + step;
101 color4[2][0] = color4[3][0] - step;
104 step = (color4[3][1] - color4[0][1] + 1) / 3;
105 color4[1][1] = color4[0][1] + step;
106 color4[2][1] = color4[3][1] - step;
109 step = (color4[3][2] - color4[0][2] + 1) / 3;
110 color4[1][2] = color4[0][2] + step;
111 color4[2][2] = color4[3][2] - step;
114 /* Fill BlockInfo struct with information about a 4x4 block of the image */
115 static int get_block_info(BlockInfo *bi, int block)
117 bi->row = block / bi->blocks_per_row;
118 bi->col = block % bi->blocks_per_row;
120 // test for right edge block
121 if (bi->col == bi->blocks_per_row - 1 && (bi->image_width % 4) != 0) {
122 bi->block_width = bi->image_width % 4;
127 // test for bottom edge block
128 if (bi->row == (bi->image_height / 4) && (bi->image_height % 4) != 0) {
129 bi->block_height = bi->image_height % 4;
131 bi->block_height = 4;
134 return block ? (bi->col * 4) + (bi->row * bi->rowstride * 4) : 0;
137 static uint16_t rgb24_to_rgb555(uint8_t *rgb24)
154 * Returns the total difference between two 24 bit color values
156 static int diff_colors(uint8_t *colorA, uint8_t *colorB)
160 tot = SQR(colorA[0] - colorB[0]);
161 tot += SQR(colorA[1] - colorB[1]);
162 tot += SQR(colorA[2] - colorB[2]);
168 * Returns the maximum channel difference
170 static int max_component_diff(uint16_t *colorA, uint16_t *colorB)
174 diff = FFABS(R(colorA[0]) - R(colorB[0]));
178 diff = FFABS(G(colorA[0]) - G(colorB[0]));
182 diff = FFABS(B(colorA[0]) - B(colorB[0]));
190 * Find the channel that has the largest difference between minimum and maximum
191 * color values. Put the minimum value in min, maximum in max and the channel
194 static void get_max_component_diff(BlockInfo *bi, uint16_t *block_ptr,
195 uint8_t *min, uint8_t *max, channel_offset *chan)
198 uint8_t min_r, max_r, min_g, max_g, min_b, max_b;
201 // fix warning about uninitialized vars
202 min_r = min_g = min_b = UINT8_MAX;
203 max_r = max_g = max_b = 0;
205 // loop thru and compare pixels
206 for (y = 0; y < bi->block_height; y++) {
207 for (x = 0; x < bi->block_width; x++){
209 min_r = FFMIN(R(block_ptr[x]), min_r);
210 min_g = FFMIN(G(block_ptr[x]), min_g);
211 min_b = FFMIN(B(block_ptr[x]), min_b);
213 max_r = FFMAX(R(block_ptr[x]), max_r);
214 max_g = FFMAX(G(block_ptr[x]), max_g);
215 max_b = FFMAX(B(block_ptr[x]), max_b);
217 block_ptr += bi->rowstride;
224 if (r > g && r > b) {
228 } else if (g > b && g >= r) {
240 * Compare two 4x4 blocks to determine if the total difference between the
241 * blocks is greater than the thresh parameter. Returns -1 if difference
242 * exceeds threshold or zero otherwise.
244 static int compare_blocks(uint16_t *block1, uint16_t *block2, BlockInfo *bi, int thresh)
247 for (y = 0; y < bi->block_height; y++) {
248 for (x = 0; x < bi->block_width; x++) {
249 diff = max_component_diff(&block1[x], &block2[x]);
250 if (diff >= thresh) {
254 block1 += bi->rowstride;
255 block2 += bi->rowstride;
261 * Determine the fit of one channel to another within a 4x4 block. This
262 * is used to determine the best palette choices for 4-color encoding.
264 static int leastsquares(uint16_t *block_ptr, BlockInfo *bi,
265 channel_offset xchannel, channel_offset ychannel,
266 double *slope, double *y_intercept, double *correlation_coef)
268 double sumx = 0, sumy = 0, sumx2 = 0, sumy2 = 0, sumxy = 0,
269 sumx_sq = 0, sumy_sq = 0, tmp, tmp2;
273 count = bi->block_height * bi->block_width;
278 for (i = 0; i < bi->block_height; i++) {
279 for (j = 0; j < bi->block_width; j++){
280 x = GET_CHAN(block_ptr[j], xchannel);
281 y = GET_CHAN(block_ptr[j], ychannel);
288 block_ptr += bi->rowstride;
291 sumx_sq = sumx * sumx;
292 tmp = (count * sumx2 - sumx_sq);
294 // guard against div/0
298 sumy_sq = sumy * sumy;
300 *slope = (sumx * sumy - sumxy) / tmp;
301 *y_intercept = (sumy - (*slope) * sumx) / count;
303 tmp2 = count * sumy2 - sumy_sq;
305 *correlation_coef = 0.0;
307 *correlation_coef = (count * sumxy - sumx * sumy) /
315 * Determine the amount of error in the leastsquares fit.
317 static int calc_lsq_max_fit_error(uint16_t *block_ptr, BlockInfo *bi,
318 int min, int max, int tmp_min, int tmp_max,
319 channel_offset xchannel, channel_offset ychannel)
325 for (i = 0; i < bi->block_height; i++) {
326 for (j = 0; j < bi->block_width; j++){
327 int x_inc, lin_y, lin_x;
328 x = GET_CHAN(block_ptr[j], xchannel);
329 y = GET_CHAN(block_ptr[j], ychannel);
331 /* calculate x_inc as the 4-color index (0..3) */
332 x_inc = floor( (x - min) * 3.0 / (max - min) + 0.5);
333 x_inc = FFMAX(FFMIN(3, x_inc), 0);
335 /* calculate lin_y corresponding to x_inc */
336 lin_y = (int)(tmp_min + (tmp_max - tmp_min) * x_inc / 3.0 + 0.5);
338 err = FFABS(lin_y - y);
342 /* calculate lin_x corresponding to x_inc */
343 lin_x = (int)(min + (max - min) * x_inc / 3.0 + 0.5);
345 err = FFABS(lin_x - x);
349 block_ptr += bi->rowstride;
356 * Find the closest match to a color within the 4-color palette
358 static int match_color(uint16_t *color, uint8_t colors[4][3])
361 int smallest_variance = INT_MAX;
362 uint8_t dithered_color[3];
364 for (int channel = 0; channel < 3; channel++) {
365 dithered_color[channel] = GET_CHAN(color[0], channel);
368 for (int palette_entry = 0; palette_entry < 4; palette_entry++) {
369 int variance = diff_colors(dithered_color, colors[palette_entry]);
371 if (variance < smallest_variance) {
372 smallest_variance = variance;
381 * Encode a block using the 4-color opcode and palette. return number of
382 * blocks encoded (until we implement multi-block 4 color runs this will
385 static int encode_four_color_block(uint8_t *min_color, uint8_t *max_color,
386 PutBitContext *pb, uint16_t *block_ptr, BlockInfo *bi)
389 uint8_t color4[4][3];
390 uint16_t rounded_max, rounded_min;
392 // round min and max wider
393 rounded_min = rgb24_to_rgb555(min_color);
394 rounded_max = rgb24_to_rgb555(max_color);
396 // put a and b colors
397 // encode 4 colors = first 16 bit color with MSB zeroed and...
398 put_bits(pb, 16, rounded_max & ~0x8000);
399 // ...second 16 bit color with MSB on.
400 put_bits(pb, 16, rounded_min | 0x8000);
402 get_colors(min_color, max_color, color4);
404 for (y = 0; y < 4; y++) {
405 for (x = 0; x < 4; x++) {
406 idx = match_color(&block_ptr[x], color4);
407 put_bits(pb, 2, idx);
409 block_ptr += bi->rowstride;
411 return 1; // num blocks encoded
415 * Copy a 4x4 block from the current frame buffer to the previous frame buffer.
417 static void update_block_in_prev_frame(const uint16_t *src_pixels,
418 uint16_t *dest_pixels,
419 const BlockInfo *bi, int block_counter)
421 for (int y = 0; y < 4; y++) {
422 memcpy(dest_pixels, src_pixels, 8);
423 dest_pixels += bi->rowstride;
424 src_pixels += bi->rowstride;
429 * update statistics for the specified block. If first_block,
430 * it initializes the statistics. Otherwise it updates the statistics IF THIS
431 * BLOCK IS SUITABLE TO CONTINUE A 1-COLOR RUN. That is, it checks whether
432 * the range of colors (since the routine was called first_block != 0) are
433 * all close enough intensities to be represented by a single color.
435 * The routine returns 0 if this block is too different to be part of
436 * the same run of 1-color blocks. The routine returns 1 if this
437 * block can be part of the same 1-color block run.
439 * If the routine returns 1, it also updates its arguments to include
440 * the statistics of this block. Otherwise, the stats are unchanged
441 * and don't include the current block.
443 static int update_block_stats(RpzaContext *s, BlockInfo *bi, uint16_t *block,
444 uint8_t min_color[3], uint8_t max_color[3],
445 int *total_rgb, int *total_pixels,
446 uint8_t avg_color[3], int first_block)
450 int total_pixels_blk;
453 uint8_t min_color_blk[3], max_color_blk[3];
454 int total_rgb_blk[3];
455 uint8_t avg_color_blk[3];
458 min_color[0] = UINT8_MAX;
459 min_color[1] = UINT8_MAX;
460 min_color[2] = UINT8_MAX;
468 threshold = s->start_one_color_thresh;
470 threshold = s->continue_one_color_thresh;
474 The *_blk variables will include the current block.
475 Initialize them based on the blocks so far.
477 min_color_blk[0] = min_color[0];
478 min_color_blk[1] = min_color[1];
479 min_color_blk[2] = min_color[2];
480 max_color_blk[0] = max_color[0];
481 max_color_blk[1] = max_color[1];
482 max_color_blk[2] = max_color[2];
483 total_rgb_blk[0] = total_rgb[0];
484 total_rgb_blk[1] = total_rgb[1];
485 total_rgb_blk[2] = total_rgb[2];
486 total_pixels_blk = *total_pixels + bi->block_height * bi->block_width;
489 Update stats for this block's pixels
491 for (y = 0; y < bi->block_height; y++) {
492 for (x = 0; x < bi->block_width; x++) {
493 total_rgb_blk[0] += R(block[x]);
494 total_rgb_blk[1] += G(block[x]);
495 total_rgb_blk[2] += B(block[x]);
497 min_color_blk[0] = FFMIN(R(block[x]), min_color_blk[0]);
498 min_color_blk[1] = FFMIN(G(block[x]), min_color_blk[1]);
499 min_color_blk[2] = FFMIN(B(block[x]), min_color_blk[2]);
501 max_color_blk[0] = FFMAX(R(block[x]), max_color_blk[0]);
502 max_color_blk[1] = FFMAX(G(block[x]), max_color_blk[1]);
503 max_color_blk[2] = FFMAX(B(block[x]), max_color_blk[2]);
505 block += bi->rowstride;
509 Calculate average color including current block.
511 avg_color_blk[0] = total_rgb_blk[0] / total_pixels_blk;
512 avg_color_blk[1] = total_rgb_blk[1] / total_pixels_blk;
513 avg_color_blk[2] = total_rgb_blk[2] / total_pixels_blk;
516 Are all the pixels within threshold of the average color?
518 is_in_range = (max_color_blk[0] - avg_color_blk[0] <= threshold &&
519 max_color_blk[1] - avg_color_blk[1] <= threshold &&
520 max_color_blk[2] - avg_color_blk[2] <= threshold &&
521 avg_color_blk[0] - min_color_blk[0] <= threshold &&
522 avg_color_blk[1] - min_color_blk[1] <= threshold &&
523 avg_color_blk[2] - min_color_blk[2] <= threshold);
527 Set the output variables to include this block.
529 min_color[0] = min_color_blk[0];
530 min_color[1] = min_color_blk[1];
531 min_color[2] = min_color_blk[2];
532 max_color[0] = max_color_blk[0];
533 max_color[1] = max_color_blk[1];
534 max_color[2] = max_color_blk[2];
535 total_rgb[0] = total_rgb_blk[0];
536 total_rgb[1] = total_rgb_blk[1];
537 total_rgb[2] = total_rgb_blk[2];
538 *total_pixels = total_pixels_blk;
539 avg_color[0] = avg_color_blk[0];
540 avg_color[1] = avg_color_blk[1];
541 avg_color[2] = avg_color_blk[2];
547 static void rpza_encode_stream(RpzaContext *s, const AVFrame *pict)
550 int block_counter = 0;
553 int prev_block_offset;
554 int block_offset = 0;
555 uint8_t min = 0, max = 0;
558 int tmp_min, tmp_max;
560 uint8_t avg_color[3];
562 uint8_t min_color[3], max_color[3];
563 double slope, y_intercept, correlation_coef;
564 uint16_t *src_pixels = (uint16_t *)pict->data[0];
565 uint16_t *prev_pixels = (uint16_t *)s->prev_frame->data[0];
567 /* Number of 4x4 blocks in frame. */
568 total_blocks = ((s->frame_width + 3) / 4) * ((s->frame_height + 3) / 4);
570 bi.image_width = s->frame_width;
571 bi.image_height = s->frame_height;
572 bi.rowstride = pict->linesize[0] / 2;
574 bi.blocks_per_row = (s->frame_width + 3) / 4;
576 while (block_counter < total_blocks) {
578 // make sure we have a valid previous frame and we're not writing
580 if (!s->first_frame) {
582 prev_block_offset = 0;
584 while (n_blocks < 32 && block_counter + n_blocks < total_blocks) {
586 block_offset = get_block_info(&bi, block_counter + n_blocks);
588 // multi-block opcodes cannot span multiple rows.
589 // If we're starting a new row, break out and write the opcode
590 /* TODO: Should eventually use bi.row here to determine when a
591 row break occurs, but that is currently breaking the
592 quicktime player. This is probably due to a bug in the
593 way I'm calculating the current row.
595 if (prev_block_offset && block_offset - prev_block_offset > 12) {
599 prev_block_offset = block_offset;
601 if (compare_blocks(&prev_pixels[block_offset],
602 &src_pixels[block_offset], &bi, s->skip_frame_thresh) != 0) {
603 // write out skipable blocks
607 put_bits(&s->pb, 8, 0x80 | (n_blocks - 1));
608 block_counter += n_blocks;
616 * NOTE: we don't update skipped blocks in the previous frame buffer
617 * since skipped needs always to be compared against the first skipped
618 * block to avoid artifacts during gradual fade in/outs.
621 // update_block_in_prev_frame(&src_pixels[block_offset],
622 // &prev_pixels[block_offset], &bi, block_counter + n_blocks);
627 // we're either at the end of the frame or we've reached the maximum
628 // of 32 blocks in a run. Write out the run.
631 put_bits(&s->pb, 8, 0x80 | (n_blocks - 1));
632 block_counter += n_blocks;
638 block_offset = get_block_info(&bi, block_counter);
643 if (update_block_stats(s, &bi, &src_pixels[block_offset],
644 min_color, max_color,
645 total_rgb, &pixel_count, avg_color, 1)) {
646 prev_block_offset = block_offset;
650 /* update this block in the previous frame buffer */
651 update_block_in_prev_frame(&src_pixels[block_offset],
652 &prev_pixels[block_offset], &bi, block_counter + n_blocks);
654 // check for subsequent blocks with the same color
655 while (n_blocks < 32 && block_counter + n_blocks < total_blocks) {
656 block_offset = get_block_info(&bi, block_counter + n_blocks);
658 // multi-block opcodes cannot span multiple rows.
659 // If we've hit end of a row, break out and write the opcode
660 if (block_offset - prev_block_offset > 12) {
664 if (!update_block_stats(s, &bi, &src_pixels[block_offset],
665 min_color, max_color,
666 total_rgb, &pixel_count, avg_color, 0)) {
670 prev_block_offset = block_offset;
672 /* update this block in the previous frame buffer */
673 update_block_in_prev_frame(&src_pixels[block_offset],
674 &prev_pixels[block_offset], &bi, block_counter + n_blocks);
679 // write one color opcode.
680 put_bits(&s->pb, 8, 0xa0 | (n_blocks - 1));
681 // write color to encode.
682 put_bits(&s->pb, 16, rgb24_to_rgb555(avg_color));
683 // skip past the blocks we've just encoded.
684 block_counter += n_blocks;
685 } else { // FOUR COLOR CHECK
688 // get max component diff for block
689 get_max_component_diff(&bi, &src_pixels[block_offset], &min, &max, &chan);
698 // run least squares against other two components
699 for (i = 0; i < 3; i++) {
706 slope = y_intercept = correlation_coef = 0;
708 if (leastsquares(&src_pixels[block_offset], &bi, chan, i,
709 &slope, &y_intercept, &correlation_coef)) {
710 min_color[i] = GET_CHAN(src_pixels[block_offset], i);
711 max_color[i] = GET_CHAN(src_pixels[block_offset], i);
713 tmp_min = (int)(0.5 + min * slope + y_intercept);
714 tmp_max = (int)(0.5 + max * slope + y_intercept);
716 av_assert0(tmp_min <= tmp_max);
717 // clamp min and max color values
718 tmp_min = av_clip_uint8(tmp_min);
719 tmp_max = av_clip_uint8(tmp_max);
721 err = FFMAX(calc_lsq_max_fit_error(&src_pixels[block_offset], &bi,
722 min, max, tmp_min, tmp_max, chan, i), err);
724 min_color[i] = tmp_min;
725 max_color[i] = tmp_max;
729 if (err > s->sixteen_color_thresh) { // DO SIXTEEN COLOR BLOCK
733 block_offset = get_block_info(&bi, block_counter);
735 row_ptr = &src_pixels[block_offset];
737 for (int y = 0; y < 4; y++) {
738 for (int x = 0; x < 4; x++){
739 rgb555 = row_ptr[x] & ~0x8000;
741 put_bits(&s->pb, 16, rgb555);
743 row_ptr += bi.rowstride;
747 } else { // FOUR COLOR BLOCK
748 block_counter += encode_four_color_block(min_color, max_color,
749 &s->pb, &src_pixels[block_offset], &bi);
752 /* update this block in the previous frame buffer */
753 update_block_in_prev_frame(&src_pixels[block_offset],
754 &prev_pixels[block_offset], &bi, block_counter);
759 static int rpza_encode_init(AVCodecContext *avctx)
761 RpzaContext *s = avctx->priv_data;
763 s->frame_width = avctx->width;
764 s->frame_height = avctx->height;
766 s->prev_frame = av_frame_alloc();
768 return AVERROR(ENOMEM);
773 static int rpza_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
774 const AVFrame *frame, int *got_packet)
776 RpzaContext *s = avctx->priv_data;
777 const AVFrame *pict = frame;
781 if ((ret = ff_alloc_packet2(avctx, pkt, 6LL * avctx->height * avctx->width, 0)) < 0)
784 init_put_bits(&s->pb, pkt->data, pkt->size);
786 // skip 4 byte header, write it later once the size of the chunk is known
787 put_bits32(&s->pb, 0x00);
789 if (!s->prev_frame->data[0]) {
791 s->prev_frame->format = pict->format;
792 s->prev_frame->width = pict->width;
793 s->prev_frame->height = pict->height;
794 ret = av_frame_get_buffer(s->prev_frame, 0);
801 rpza_encode_stream(s, pict);
803 flush_put_bits(&s->pb);
805 av_shrink_packet(pkt, put_bytes_output(&s->pb));
808 // write header opcode
809 buf[0] = 0xe1; // chunk opcode
811 // write chunk length
812 AV_WB24(buf + 1, pkt->size);
819 static int rpza_encode_end(AVCodecContext *avctx)
821 RpzaContext *s = (RpzaContext *)avctx->priv_data;
823 av_frame_free(&s->prev_frame);
828 #define OFFSET(x) offsetof(RpzaContext, x)
829 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
830 static const AVOption options[] = {
831 { "skip_frame_thresh", NULL, OFFSET(skip_frame_thresh), AV_OPT_TYPE_INT, {.i64=1}, 0, 24, VE},
832 { "start_one_color_thresh", NULL, OFFSET(start_one_color_thresh), AV_OPT_TYPE_INT, {.i64=1}, 0, 24, VE},
833 { "continue_one_color_thresh", NULL, OFFSET(continue_one_color_thresh), AV_OPT_TYPE_INT, {.i64=0}, 0, 24, VE},
834 { "sixteen_color_thresh", NULL, OFFSET(sixteen_color_thresh), AV_OPT_TYPE_INT, {.i64=1}, 0, 24, VE},
838 static const AVClass rpza_class = {
839 .class_name = "rpza",
840 .item_name = av_default_item_name,
842 .version = LIBAVUTIL_VERSION_INT,
845 AVCodec ff_rpza_encoder = {
847 .long_name = NULL_IF_CONFIG_SMALL("QuickTime video (RPZA)"),
848 .type = AVMEDIA_TYPE_VIDEO,
849 .id = AV_CODEC_ID_RPZA,
850 .priv_data_size = sizeof(RpzaContext),
851 .priv_class = &rpza_class,
852 .init = rpza_encode_init,
853 .encode2 = rpza_encode_frame,
854 .close = rpza_encode_end,
855 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
856 .pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB555,