]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/snow.c
Merge remote-tracking branch 'dwbuiten/master'
[ffmpeg] / libavcodec / snow.c
index 186bc8bd4d01e896adcdc36ff4d6464c62267ae1..a4fe8b603ade694789a2d20bc3d2b2747bdd40bd 100644 (file)
@@ -31,9 +31,6 @@
 #include "mathops.h"
 #include "h263.h"
 
-#undef NDEBUG
-#include <assert.h>
-
 
 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
@@ -143,11 +140,11 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
 
     int x, y, b, r, l;
     int16_t tmpIt   [64*(32+HTAPS_MAX)];
-    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
+    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
     int16_t *tmpI= tmpIt;
     uint8_t *tmp2= tmp2t[0];
     const uint8_t *hpel[11];
-    assert(dx<16 && dy<16);
+    av_assert2(dx<16 && dy<16);
     r= brane[dx + 16*dy]&15;
     l= brane[dx + 16*dy]>>4;
 
@@ -181,7 +178,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                 tmp2[x]= am;
             }
             tmpI+= 64;
-            tmp2+= stride;
+            tmp2+= 64;
             src += stride;
         }
         src -= stride*y;
@@ -210,7 +207,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                 tmp2[x]= am;
             }
             src += stride;
-            tmp2+= stride;
+            tmp2+= 64;
         }
         src -= stride*y;
     }
@@ -237,12 +234,12 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                 tmp2[x]= am;
             }
             tmpI+= 64;
-            tmp2+= stride;
+            tmp2+= 64;
         }
     }
 
     hpel[ 0]= src;
-    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
+    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
     hpel[ 2]= src + 1;
 
     hpel[ 4]= tmp2t[1];
@@ -250,14 +247,21 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
     hpel[ 6]= tmp2t[1] + 1;
 
     hpel[ 8]= src + stride;
-    hpel[ 9]= hpel[1] + stride;
+    hpel[ 9]= hpel[1] + 64;
     hpel[10]= hpel[8] + 1;
 
+#define MC_STRIDE(x) (needs[x] ? 64 : stride)
+
     if(b==15){
-        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
-        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
-        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
-        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
+        int dxy = dx / 8 + dy / 8 * 4;
+        const uint8_t *src1 = hpel[dxy    ];
+        const uint8_t *src2 = hpel[dxy + 1];
+        const uint8_t *src3 = hpel[dxy + 4];
+        const uint8_t *src4 = hpel[dxy + 5];
+        int stride1 = MC_STRIDE(dxy);
+        int stride2 = MC_STRIDE(dxy + 1);
+        int stride3 = MC_STRIDE(dxy + 4);
+        int stride4 = MC_STRIDE(dxy + 5);
         dx&=7;
         dy&=7;
         for(y=0; y < b_h; y++){
@@ -265,23 +269,25 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
             }
-            src1+=stride;
-            src2+=stride;
-            src3+=stride;
-            src4+=stride;
+            src1+=stride1;
+            src2+=stride2;
+            src3+=stride3;
+            src4+=stride4;
             dst +=stride;
         }
     }else{
         const uint8_t *src1= hpel[l];
         const uint8_t *src2= hpel[r];
+        int stride1 = MC_STRIDE(l);
+        int stride2 = MC_STRIDE(r);
         int a= weight[((dx&7) + (8*(dy&7)))];
         int b= 8-a;
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
             }
-            src1+=stride;
-            src2+=stride;
+            src1+=stride1;
+            src2+=stride2;
             dst +=stride;
         }
     }
@@ -328,7 +334,7 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride,
         }
     }else{
         uint8_t *src= s->last_picture[block->ref].data[plane_index];
-        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
+        const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
         int mx= block->mx*scale;
         int my= block->my*scale;
         const int dx= mx&15;
@@ -342,10 +348,13 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride,
             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
             src= tmp + MB_SIZE;
         }
+
+        av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
+
 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
 //        assert(!(b_w&(b_w-1)));
-        assert(b_w>1 && b_h>1);
-        assert((tab_index>=0 && tab_index<4) || b_w==32);
+        av_assert2(b_w>1 && b_h>1);
+        av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
         else if(b_w==32){
@@ -360,7 +369,7 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride,
             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
         }else{
-            assert(2*b_w==b_h);
+            av_assert2(2*b_w==b_h);
             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
         }
@@ -369,7 +378,7 @@ void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride,
 
 #define mca(dx,dy,b_w)\
 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
-    assert(h==b_w);\
+    av_assert2(h==b_w);\
     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
 }
 
@@ -438,24 +447,40 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
     width= s->avctx->width;
     height= s->avctx->height;
 
-    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
-    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
-    s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
-    s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
+    FF_ALLOCZ_OR_GOTO(avctx, s->spatial_idwt_buffer, width * height * sizeof(IDWTELEM), fail);
+    FF_ALLOCZ_OR_GOTO(avctx, s->spatial_dwt_buffer,  width * height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
+    FF_ALLOCZ_OR_GOTO(avctx, s->temp_dwt_buffer,     width * sizeof(DWTELEM),  fail);
+    FF_ALLOCZ_OR_GOTO(avctx, s->temp_idwt_buffer,    width * sizeof(IDWTELEM), fail);
+    FF_ALLOC_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1) * ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
 
     for(i=0; i<MAX_REF_FRAMES; i++)
         for(j=0; j<MAX_REF_FRAMES; j++)
             ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
 
-    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
-    s->scratchbuf = av_mallocz(s->mconly_picture.linesize[0]*7*MB_SIZE);
-
     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }
 
 int ff_snow_common_init_after_header(AVCodecContext *avctx) {
     SnowContext *s = avctx->priv_data;
     int plane_index, level, orientation;
+    int ret, emu_buf_size;
+
+    if(!s->scratchbuf) {
+        if ((ret = s->avctx->get_buffer(s->avctx, &s->mconly_picture)) < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            return ret;
+        }
+        FF_ALLOCZ_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256)*7*MB_SIZE, fail);
+        emu_buf_size = FFMAX(s->mconly_picture.linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
+        FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
+    }
+
+    if(s->mconly_picture.format != avctx->pix_fmt) {
+        av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     for(plane_index=0; plane_index<3; plane_index++){
         int w= s->avctx->width;
@@ -504,6 +529,8 @@ int ff_snow_common_init_after_header(AVCodecContext *avctx) {
     }
 
     return 0;
+fail:
+    return AVERROR(ENOMEM);
 }
 
 #define USE_HALFPEL_PLANE 0
@@ -513,8 +540,8 @@ static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *fr
 
     for(p=0; p<3; p++){
         int is_chroma= !!p;
-        int w= s->avctx->width  >>is_chroma;
-        int h= s->avctx->height >>is_chroma;
+        int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
+        int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
         int ls= frame->linesize[p];
         uint8_t *src= frame->data[p];
 
@@ -573,11 +600,11 @@ int ff_snow_frame_start(SnowContext *s){
                           s->current_picture.linesize[0], w   , h   ,
                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[1],
-                          s->current_picture.linesize[1], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          s->current_picture.linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                          EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
         s->dsp.draw_edges(s->current_picture.data[2],
-                          s->current_picture.linesize[2], w>>1, h>>1,
-                          EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
+                          s->current_picture.linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                          EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
     }
 
     ff_snow_release_buffer(s->avctx);
@@ -623,6 +650,7 @@ av_cold void ff_snow_common_end(SnowContext *s)
     av_freep(&s->temp_dwt_buffer);
     av_freep(&s->spatial_idwt_buffer);
     av_freep(&s->temp_idwt_buffer);
+    av_freep(&s->run_buffer);
 
     s->m.me.temp= NULL;
     av_freep(&s->m.me.scratchpad);
@@ -632,12 +660,15 @@ av_cold void ff_snow_common_end(SnowContext *s)
 
     av_freep(&s->block);
     av_freep(&s->scratchbuf);
+    av_freep(&s->emu_edge_buffer);
 
     for(i=0; i<MAX_REF_FRAMES; i++){
         av_freep(&s->ref_mvs[i]);
         av_freep(&s->ref_scores[i]);
-        if(s->last_picture[i].data[0])
+        if(s->last_picture[i].data[0]) {
+            av_assert0(s->last_picture[i].data[0] != s->current_picture.data[0]);
             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
+        }
     }
 
     for(plane_index=0; plane_index<3; plane_index++){