memset( var, 0, size );\
} while( 0 )
+/* Macros for merging multiple allocations into a single large malloc, for improved
+ * use with huge pages. */
+
+/* Needs to be enough to contain any set of buffers that use combined allocations */
+#define PREALLOC_BUF_SIZE 1024
+
+#define PREALLOC_INIT\
+ int prealloc_idx = 0;\
+ size_t prealloc_size = 0;\
+ uint8_t **preallocs[PREALLOC_BUF_SIZE];
+
+#define PREALLOC( var, size )\
+do {\
+ var = (void*)prealloc_size;\
+ preallocs[prealloc_idx++] = (uint8_t**)&var;\
+ prealloc_size += ALIGN(size, NATIVE_ALIGN);\
+} while(0)
+
+#define PREALLOC_END( ptr )\
+do {\
+ CHECKED_MALLOC( ptr, prealloc_size );\
+ while( prealloc_idx-- )\
+ *preallocs[prealloc_idx] += (intptr_t)ptr;\
+} while(0)
+
#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
#define X264_BFRAME_MAX 16
#include <assert.h>
#include <limits.h>
-#if HAVE_OPENCL
-#include "opencl.h"
-#endif
-
#if HAVE_INTERLACED
# define MB_INTERLACED h->mb.b_interlaced
# define SLICE_MBAFF h->sh.b_mbaff
};
#include "x264.h"
+#if HAVE_OPENCL
+#include "opencl.h"
+#endif
+#include "cabac.h"
#include "bitstream.h"
#include "set.h"
#include "predict.h"
#include "mc.h"
#include "frame.h"
#include "dct.h"
-#include "cabac.h"
#include "quant.h"
#include "cpu.h"
#include "threadpool.h"
/* Current MB DCT coeffs */
struct
{
- ALIGNED_16( dctcoef luma16x16_dc[3][16] );
+ ALIGNED_N( dctcoef luma16x16_dc[3][16] );
ALIGNED_16( dctcoef chroma_dc[2][8] );
// FIXME share memory?
- ALIGNED_16( dctcoef luma8x8[12][64] );
- ALIGNED_16( dctcoef luma4x4[16*3][16] );
+ ALIGNED_N( dctcoef luma8x8[12][64] );
+ ALIGNED_N( dctcoef luma4x4[16*3][16] );
} dct;
/* MB table and cache for current frame/mb */
* and won't be copied from one thread to another */
/* mb table */
+ uint8_t *base; /* base pointer for all malloced data in this mb */
int8_t *type; /* mb type */
uint8_t *partition; /* mb partition */
int8_t *qp; /* mb qp */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
ALIGNED_16( pixel fenc_buf[48*FENC_STRIDE] );
- ALIGNED_16( pixel fdec_buf[52*FDEC_STRIDE] );
+ ALIGNED_N( pixel fdec_buf[52*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
ALIGNED_16( dctcoef fenc_dct4[16][16] );
/* Psy RD SATD/SA8D scores cache */
- ALIGNED_16( uint64_t fenc_hadamard_cache[9] );
- ALIGNED_16( uint32_t fenc_satd_cache[32] );
+ ALIGNED_N( uint64_t fenc_hadamard_cache[9] );
+ ALIGNED_N( uint32_t fenc_satd_cache[32] );
/* pointer over mb of the frame to be compressed */
pixel *p_fenc[3]; /* y,u,v */
uint32_t (*nr_residual_sum)[64];
uint32_t *nr_count;
- ALIGNED_16( udctcoef nr_offset_denoise[4][64] );
- ALIGNED_16( uint32_t nr_residual_sum_buf[2][4][64] );
+ ALIGNED_N( udctcoef nr_offset_denoise[4][64] );
+ ALIGNED_N( uint32_t nr_residual_sum_buf[2][4][64] );
uint32_t nr_count_buf[2][4];
uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */