Improves performance by avoiding extraneous memory copying.
Most beneficial on fast settings.
On average around 5-10% faster overall on ultrafast but the
performance improvement can be even larger in some cases.
# list of all preprocessor HAVE values we can define
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \
- MSA"
+ MSA MMAP"
# parse options
define HAVE_LOG2F
fi
+if [ "$SYS" != "WINDOWS" ] && cpp_check "sys/mman.h unistd.h" "" "defined(MAP_PRIVATE)"; then
+ define HAVE_MMAP
+fi
+
if [ "$SYS" = "LINUX" -a \( "$ARCH" = "X86" -o "$ARCH" = "X86_64" \) ] && cc_check "sys/mman.h" "" "MADV_HUGEPAGE;" ; then
define HAVE_THP
fi
return -1;
h->cur_frame = -1;
- if( cli_input.picture_alloc( &h->pic, info->csp, info->width, info->height ) )
+ if( cli_input.picture_alloc( &h->pic, *handle, info->csp, info->width, info->height ) )
return -1;
h->hin = *handle;
static void free_filter( hnd_t handle )
{
source_hnd_t *h = handle;
- cli_input.picture_clean( &h->pic );
+ cli_input.picture_clean( &h->pic, h->hin );
cli_input.close_file( h->hin );
free( h );
}
return 0;
}
-static int picture_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
{
if( x264_cli_pic_alloc( pic, X264_CSP_NONE, width, height ) )
return -1;
return 0;
}
-static void picture_clean( cli_pic_t *pic )
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
{
memset( pic, 0, sizeof(cli_pic_t) );
}
return 0;
}
-static int picture_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
{
if( x264_cli_pic_alloc( pic, X264_CSP_NONE, width, height ) )
return -1;
return 0;
}
-static void picture_clean( cli_pic_t *pic )
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
{
memset( pic, 0, sizeof(cli_pic_t) );
}
* Copyright (C) 2010-2015 x264 project
*
* Authors: Steven Walters <kemuri9@gmail.com>
+ * Henrik Gramner <henrik@gramner.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include "input.h"
+#ifdef _WIN32
+#include <io.h>
+#include <windows.h>
+#elif HAVE_MMAP
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_I420] = { "i420", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
[X264_CSP_I422] = { "i422", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
return size;
}
-static int x264_cli_pic_alloc_internal( cli_pic_t *pic, int csp, int width, int height, int align )
+static int x264_cli_pic_init_internal( cli_pic_t *pic, int csp, int width, int height, int align, int alloc )
{
memset( pic, 0, sizeof(cli_pic_t) );
int csp_mask = csp & X264_CSP_MASK;
int stride = width * x264_cli_csps[csp_mask].width[i];
stride *= x264_cli_csp_depth_factor( csp );
stride = ALIGN( stride, align );
- uint64_t size = (uint64_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
- pic->img.plane[i] = x264_malloc( size );
- if( !pic->img.plane[i] )
- return -1;
pic->img.stride[i] = stride;
+
+ if( alloc )
+ {
+ size_t size = (size_t)(height * x264_cli_csps[csp_mask].height[i]) * stride;
+ pic->img.plane[i] = x264_malloc( size );
+ if( !pic->img.plane[i] )
+ return -1;
+ }
}
return 0;
int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
{
- return x264_cli_pic_alloc_internal( pic, csp, width, height, 1 );
+ return x264_cli_pic_init_internal( pic, csp, width, height, 1, 1 );
}
int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height )
{
- return x264_cli_pic_alloc_internal( pic, csp, width, height, NATIVE_ALIGN );
+ return x264_cli_pic_init_internal( pic, csp, width, height, NATIVE_ALIGN, 1 );
+}
+
+int x264_cli_pic_init_noalloc( cli_pic_t *pic, int csp, int width, int height )
+{
+ return x264_cli_pic_init_internal( pic, csp, width, height, 1, 0 );
}
void x264_cli_pic_clean( cli_pic_t *pic )
return NULL;
return x264_cli_csps + (csp&X264_CSP_MASK);
}
+
+/* Functions for handling memory-mapped input frames */
+int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh )
+{
+#ifdef _WIN32
+ HANDLE osfhandle = (HANDLE)_get_osfhandle( _fileno( fh ) );
+ if( osfhandle != INVALID_HANDLE_VALUE )
+ {
+ SYSTEM_INFO si;
+ GetSystemInfo( &si );
+ h->align_mask = si.dwAllocationGranularity - 1;
+ h->map_handle = CreateFileMappingW( osfhandle, NULL, PAGE_READONLY, 0, 0, NULL );
+ return !h->map_handle;
+ }
+#elif HAVE_MMAP && defined(_SC_PAGESIZE)
+ h->align_mask = sysconf( _SC_PAGESIZE ) - 1;
+ h->fd = fileno( fh );
+ return h->align_mask < 0 || h->fd < 0;
+#endif
+ return -1;
+}
+
+void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size )
+{
+#if defined(_WIN32) || HAVE_MMAP
+ int align = offset & h->align_mask;
+ offset -= align;
+ size += align;
+#ifdef _WIN32
+ uint8_t *base = MapViewOfFile( h->map_handle, FILE_MAP_READ, offset >> 32, offset, size );
+ /* TODO: Would PrefetchVirtualMemory() (only available on Win8+) be beneficial? */
+ if( base )
+ return base + align;
+#else
+ uint8_t *base = mmap( NULL, size, PROT_READ, MAP_PRIVATE, h->fd, offset );
+ if( base != MAP_FAILED )
+ {
+ /* Ask the OS to readahead pages. This improves performance whereas
+ * forcing page faults by manually accessing every page does not.
+ * Some systems have implemented madvise() but not posix_madvise()
+ * and vice versa, so check both to see if either is available. */
+#ifdef MADV_WILLNEED
+ madvise( base, size, MADV_WILLNEED );
+#elif defined(POSIX_MADV_WILLNEED)
+ posix_madvise( base, size, POSIX_MADV_WILLNEED );
+#endif
+ return base + align;
+ }
+#endif
+#endif
+ return NULL;
+}
+
+int x264_cli_munmap( cli_mmap_t *h, void *addr, size_t size )
+{
+#if defined(_WIN32) || HAVE_MMAP
+ void *base = (void*)((intptr_t)addr & ~h->align_mask);
+#ifdef _WIN32
+ return !UnmapViewOfFile( base );
+#else
+ return munmap( base, size + (intptr_t)addr - (intptr_t)base );
+#endif
+#endif
+ return -1;
+}
+
+void x264_cli_mmap_close( cli_mmap_t *h )
+{
+#ifdef _WIN32
+ CloseHandle( h->map_handle );
+#endif
+}
typedef struct
{
int (*open_file)( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt );
- int (*picture_alloc)( cli_pic_t *pic, int csp, int width, int height );
+ int (*picture_alloc)( cli_pic_t *pic, hnd_t handle, int csp, int width, int height );
int (*read_frame)( cli_pic_t *pic, hnd_t handle, int i_frame );
int (*release_frame)( cli_pic_t *pic, hnd_t handle );
- void (*picture_clean)( cli_pic_t *pic );
+ void (*picture_clean)( cli_pic_t *pic, hnd_t handle );
int (*close_file)( hnd_t handle );
} cli_input_t;
extern const cli_input_t raw_input;
extern const cli_input_t y4m_input;
extern const cli_input_t avs_input;
-extern cli_input_t thread_input;
+extern const cli_input_t thread_input;
extern const cli_input_t lavf_input;
extern const cli_input_t ffms_input;
-extern cli_input_t timecode_input;
+extern const cli_input_t timecode_input;
extern cli_input_t cli_input;
int x264_cli_csp_depth_factor( int csp );
int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
int x264_cli_pic_alloc_aligned( cli_pic_t *pic, int csp, int width, int height );
+int x264_cli_pic_init_noalloc( cli_pic_t *pic, int csp, int width, int height );
void x264_cli_pic_clean( cli_pic_t *pic );
uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
uint64_t x264_cli_pic_size( int csp, int width, int height );
const x264_cli_csp_t *x264_cli_get_csp( int csp );
+typedef struct
+{
+ int align_mask;
+#ifdef _WIN32
+ void *map_handle;
+#elif HAVE_MMAP
+ int fd;
+#endif
+} cli_mmap_t;
+
+int x264_cli_mmap_init( cli_mmap_t *h, FILE *fh );
+void *x264_cli_mmap( cli_mmap_t *h, int64_t offset, size_t size );
+int x264_cli_munmap( cli_mmap_t *h, void *addr, size_t size );
+void x264_cli_mmap_close( cli_mmap_t *h );
+
#endif
XCHG( cli_image_t, p_pic->img, h->first_pic->img );
p_pic->pts = h->first_pic->pts;
}
- lavf_input.picture_clean( h->first_pic );
+ lavf_input.picture_clean( h->first_pic, h );
free( h->first_pic );
h->first_pic = NULL;
if( !i_frame )
/* prefetch the first frame and set/confirm flags */
h->first_pic = malloc( sizeof(cli_pic_t) );
- FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, X264_CSP_OTHER, info->width, info->height ),
+ FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, h, X264_CSP_OTHER, info->width, info->height ),
"malloc failed\n" )
else if( read_frame_internal( h->first_pic, h, 0, info ) )
return -1;
return 0;
}
-static int picture_alloc( cli_pic_t *pic, int csp, int width, int height )
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
{
if( x264_cli_pic_alloc( pic, X264_CSP_NONE, width, height ) )
return -1;
return read_frame_internal( pic, handle, i_frame, NULL );
}
-static void picture_clean( cli_pic_t *pic )
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
{
memset( pic, 0, sizeof(cli_pic_t) );
}
uint64_t plane_size[4];
uint64_t frame_size;
int bit_depth;
+ cli_mmap_t mmap;
+ int use_mmap;
} raw_hnd_t;
static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
uint64_t size = ftell( h->fh );
fseek( h->fh, 0, SEEK_SET );
info->num_frames = size / h->frame_size;
+
+ /* Attempt to use memory-mapped input frames if possible */
+ if( !(h->bit_depth & 7) )
+ h->use_mmap = !x264_cli_mmap_init( &h->mmap, h->fh );
}
*p_handle = h;
static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h, int bit_depth_uc )
{
- int error = 0;
int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
- for( int i = 0; i < pic->img.planes && !error; i++ )
+
+ for( int i = 0; i < pic->img.planes; i++ )
{
- error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
+ if( h->use_mmap )
+ {
+ if( i )
+ pic->img.plane[i] = pic->img.plane[i-1] + pixel_depth * h->plane_size[i-1];
+ }
+ else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i] )
+ return -1;
+
if( bit_depth_uc )
{
/* upconvert non 16bit high depth planes to 16bit using the same
plane[j] = plane[j] << lshift;
}
}
- return error;
+ return 0;
}
static int read_frame( cli_pic_t *pic, hnd_t handle, int i_frame )
{
raw_hnd_t *h = handle;
- if( i_frame > h->next_frame )
+ if( h->use_mmap )
+ {
+ pic->img.plane[0] = x264_cli_mmap( &h->mmap, i_frame * h->frame_size, h->frame_size );
+ if( !pic->img.plane[0] )
+ return -1;
+ }
+ else if( i_frame > h->next_frame )
{
if( x264_is_regular_file( h->fh ) )
fseek( h->fh, i_frame * h->frame_size, SEEK_SET );
return 0;
}
+static int release_frame( cli_pic_t *pic, hnd_t handle )
+{
+ raw_hnd_t *h = handle;
+ if( h->use_mmap )
+ return x264_cli_munmap( &h->mmap, pic->img.plane[0], h->frame_size );
+ return 0;
+}
+
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
+{
+ raw_hnd_t *h = handle;
+ return (h->use_mmap ? x264_cli_pic_init_noalloc : x264_cli_pic_alloc)( pic, csp, width, height );
+}
+
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
+{
+ raw_hnd_t *h = handle;
+ if( h->use_mmap )
+ memset( pic, 0, sizeof(cli_pic_t) );
+ else
+ x264_cli_pic_clean( pic );
+}
+
static int close_file( hnd_t handle )
{
raw_hnd_t *h = handle;
if( !h || !h->fh )
return 0;
+ if( h->use_mmap )
+ x264_cli_mmap_close( &h->mmap );
fclose( h->fh );
free( h );
return 0;
}
-const cli_input_t raw_input = { open_file, x264_cli_pic_alloc, read_frame, NULL, x264_cli_pic_clean, close_file };
+const cli_input_t raw_input = { open_file, picture_alloc, read_frame, release_frame, picture_clean, close_file };
static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
{
thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
- FAIL_IF_ERR( !h || cli_input.picture_alloc( &h->pic, info->csp, info->width, info->height ),
+ FAIL_IF_ERR( !h || cli_input.picture_alloc( &h->pic, *p_handle, info->csp, info->width, info->height ),
"x264", "malloc failed\n" )
h->input = cli_input;
h->p_handle = *p_handle;
h->next_args->h = h;
h->next_args->status = 0;
h->frame_total = info->num_frames;
- thread_input.picture_alloc = h->input.picture_alloc;
- thread_input.picture_clean = h->input.picture_clean;
if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
return -1;
return 0;
}
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
+{
+ thread_hnd_t *h = handle;
+ return h->input.picture_alloc( pic, h->p_handle, csp, width, height );
+}
+
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
+{
+ thread_hnd_t *h = handle;
+ h->input.picture_clean( pic, h->p_handle );
+}
+
static int close_file( hnd_t handle )
{
thread_hnd_t *h = handle;
x264_threadpool_delete( h->pool );
+ h->input.picture_clean( &h->pic, h->p_handle );
h->input.close_file( h->p_handle );
- h->input.picture_clean( &h->pic );
free( h->next_args );
free( h );
return 0;
}
-cli_input_t thread_input = { open_file, NULL, read_frame, release_frame, NULL, close_file };
+const cli_input_t thread_input = { open_file, picture_alloc, read_frame, release_frame, picture_clean, close_file };
h->timebase_num = info->fps_den; /* can be changed later by auto timebase generation */
if( h->auto_timebase_den )
h->timebase_den = 0; /* set later by auto timebase generation */
- timecode_input.picture_alloc = h->input.picture_alloc;
- timecode_input.picture_clean = h->input.picture_clean;
tcfile_in = x264_fopen( psz_filename, "rb" );
FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
return 0;
}
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
+{
+ timecode_hnd_t *h = handle;
+ return h->input.picture_alloc( pic, h->p_handle, csp, width, height );
+}
+
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
+{
+ timecode_hnd_t *h = handle;
+ h->input.picture_clean( pic, h->p_handle );
+}
+
static int close_file( hnd_t handle )
{
timecode_hnd_t *h = handle;
return 0;
}
-cli_input_t timecode_input = { open_file, NULL, read_frame, release_frame, NULL, close_file };
+const cli_input_t timecode_input = { open_file, picture_alloc, read_frame, release_frame, picture_clean, close_file };
uint64_t frame_size;
uint64_t plane_size[3];
int bit_depth;
+ cli_mmap_t mmap;
+ int use_mmap;
} y4m_hnd_t;
#define Y4M_MAGIC "YUV4MPEG2"
break;
}
}
- if( i == MAX_YUV4_HEADER || strncmp( header, Y4M_MAGIC, sizeof(Y4M_MAGIC)-1 ) )
- return -1;
+ FAIL_IF_ERROR( strncmp( header, Y4M_MAGIC, sizeof(Y4M_MAGIC)-1 ), "bad sequence header magic\n" )
+ FAIL_IF_ERROR( i == MAX_YUV4_HEADER, "bad sequence header length\n" )
/* Scan properties */
header_end = &header[i+1]; /* Include space */
uint64_t i_size = ftell( h->fh );
fseek( h->fh, init_pos, SEEK_SET );
info->num_frames = (i_size - h->seq_header_len) / h->frame_size;
+
+ /* Attempt to use memory-mapped input frames if possible */
+ if( !(h->bit_depth & 7) )
+ h->use_mmap = !x264_cli_mmap_init( &h->mmap, h->fh );
}
*p_handle = h;
{
static const size_t slen = sizeof(Y4M_FRAME_MAGIC)-1;
int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
- int i = 0;
- char header[16];
-
- /* Read frame header - without terminating '\n' */
- if( fread( header, 1, slen, h->fh ) != slen )
- return -1;
+ int i = sizeof(Y4M_FRAME_MAGIC);
+ char header_buf[16];
+ char *header;
- header[slen] = 0;
- FAIL_IF_ERROR( strncmp( header, Y4M_FRAME_MAGIC, slen ), "bad header magic (%"PRIx32" <=> %s)\n",
- M32(header), header )
-
- /* Skip most of it */
- while( i < MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
- i++;
- FAIL_IF_ERROR( i == MAX_FRAME_HEADER, "bad frame header!\n" )
+ /* Verify that the frame header is valid */
+ if( h->use_mmap )
+ {
+ header = (char*)pic->img.plane[0];
+ pic->img.plane[0] += h->frame_header_len;
+
+ /* If the header length has changed between frames the size of the mapping will be invalid.
+ * It might be possible to work around it, but I'm not aware of any tool beside fuzzers that
+ * produces y4m files with variable-length frame headers so just error out if that happens. */
+ while( i <= h->frame_header_len && header[i-1] != '\n' )
+ i++;
+ FAIL_IF_ERROR( i != h->frame_header_len, "bad frame header length\n" )
+ }
+ else
+ {
+ header = header_buf;
+ if( fread( header, 1, slen, h->fh ) != slen )
+ return -1;
+ while( i <= MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
+ i++;
+ FAIL_IF_ERROR( i > MAX_FRAME_HEADER, "bad frame header length\n" )
+ }
+ FAIL_IF_ERROR( memcmp( header, Y4M_FRAME_MAGIC, slen ), "bad frame header magic\n" )
- int error = 0;
- for( i = 0; i < pic->img.planes && !error; i++ )
+ for( i = 0; i < pic->img.planes; i++ )
{
- error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
+ if( h->use_mmap )
+ {
+ if( i )
+ pic->img.plane[i] = pic->img.plane[i-1] + pixel_depth * h->plane_size[i-1];
+ }
+ else if( fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i] )
+ return -1;
+
if( bit_depth_uc )
{
/* upconvert non 16bit high depth planes to 16bit using the same
plane[j] = plane[j] << lshift;
}
}
- return error;
+ return 0;
}
static int read_frame( cli_pic_t *pic, hnd_t handle, int i_frame )
{
y4m_hnd_t *h = handle;
- if( i_frame > h->next_frame )
+ if( h->use_mmap )
+ {
+ pic->img.plane[0] = x264_cli_mmap( &h->mmap, h->frame_size * i_frame + h->seq_header_len, h->frame_size );
+ if( !pic->img.plane[0] )
+ return -1;
+ }
+ else if( i_frame > h->next_frame )
{
if( x264_is_regular_file( h->fh ) )
fseek( h->fh, h->frame_size * i_frame + h->seq_header_len, SEEK_SET );
return 0;
}
+static int release_frame( cli_pic_t *pic, hnd_t handle )
+{
+ y4m_hnd_t *h = handle;
+ if( h->use_mmap )
+ return x264_cli_munmap( &h->mmap, pic->img.plane[0] - h->frame_header_len, h->frame_size );
+ return 0;
+}
+
+static int picture_alloc( cli_pic_t *pic, hnd_t handle, int csp, int width, int height )
+{
+ y4m_hnd_t *h = handle;
+ return (h->use_mmap ? x264_cli_pic_init_noalloc : x264_cli_pic_alloc)( pic, csp, width, height );
+}
+
+static void picture_clean( cli_pic_t *pic, hnd_t handle )
+{
+ y4m_hnd_t *h = handle;
+ if( h->use_mmap )
+ memset( pic, 0, sizeof(cli_pic_t) );
+ else
+ x264_cli_pic_clean( pic );
+}
+
static int close_file( hnd_t handle )
{
y4m_hnd_t *h = handle;
if( !h || !h->fh )
return 0;
+ if( h->use_mmap )
+ x264_cli_mmap_close( &h->mmap );
fclose( h->fh );
free( h );
return 0;
}
-const cli_input_t y4m_input = { open_file, x264_cli_pic_alloc, read_frame, NULL, x264_cli_pic_clean, close_file };
+const cli_input_t y4m_input = { open_file, picture_alloc, read_frame, release_frame, picture_clean, close_file };