X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fpthread.c;h=ee7bdb5310a9c29165b9f5329b47ce573921c76e;hb=ddcf67c8a51c67b122a826d8b5819e96d591d813;hp=70845f0ba0363323f0f313034d5738eaafc5288b;hpb=ba9ef8d04ecd009036b7c380e71bac081c56c53e;p=ffmpeg diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index 70845f0ba03..ee7bdb5310a 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -29,11 +29,36 @@ * @see doc/multithreading.txt */ -#include +#include "config.h" + +#if HAVE_SCHED_GETAFFINITY +#define _GNU_SOURCE +#include +#endif +#if HAVE_GETPROCESSAFFINITYMASK +#include +#endif +#if HAVE_SYSCTL +#if HAVE_SYS_PARAM_H +#include +#endif +#include +#include +#endif +#if HAVE_SYSCONF +#include +#endif #include "avcodec.h" +#include "internal.h" #include "thread.h" +#if HAVE_PTHREADS +#include +#elif HAVE_W32THREADS +#include "w32pthreads.h" +#endif + typedef int (action_func)(AVCodecContext *c, void *arg); typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); @@ -55,7 +80,7 @@ typedef struct ThreadContext { } ThreadContext; /// Max number of frame buffers that can be allocated when using frame threads. -#define MAX_BUFFERS 32 +#define MAX_BUFFERS (32+1) /** * Context used by codec threads and stored in their AVCodecContext thread_opaque. @@ -64,6 +89,7 @@ typedef struct PerThreadContext { struct FrameThreadContext *parent; pthread_t thread; + int thread_init; pthread_cond_t input_cond; ///< Used to wait for a new packet from the main thread. pthread_cond_t progress_cond; ///< Used by child threads to wait for progress to change. pthread_cond_t output_cond; ///< Used by the main thread to wait for frames to finish. @@ -126,6 +152,45 @@ typedef struct FrameThreadContext { int die; ///< Set when threads should exit. } FrameThreadContext; + +/* H264 slice threading seems to be buggy with more than 16 threads, + * limit the number of threads to 16 for automatic detection */ +#define MAX_AUTO_THREADS 16 + +static int get_logical_cpus(AVCodecContext *avctx) +{ + int ret, nb_cpus = 1; +#if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT) + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + + ret = sched_getaffinity(0, sizeof(cpuset), &cpuset); + if (!ret) { + nb_cpus = CPU_COUNT(&cpuset); + } +#elif HAVE_GETPROCESSAFFINITYMASK + DWORD_PTR proc_aff, sys_aff; + ret = GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff); + if (ret) + nb_cpus = av_popcount64(proc_aff); +#elif HAVE_SYSCTL && defined(HW_NCPU) + int mib[2] = { CTL_HW, HW_NCPU }; + size_t len = sizeof(nb_cpus); + + ret = sysctl(mib, 2, &nb_cpus, &len, NULL, 0); + if (ret == -1) + nb_cpus = 0; +#elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN) + nb_cpus = sysconf(_SC_NPROC_ONLN); +#elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN) + nb_cpus = sysconf(_SC_NPROCESSORS_ONLN); +#endif + av_log(avctx, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus); + return nb_cpus; +} + + static void* attribute_align_arg worker(void *v) { AVCodecContext *avctx = v; @@ -230,8 +295,19 @@ static int thread_init(AVCodecContext *avctx) ThreadContext *c; int thread_count = avctx->thread_count; - if (thread_count <= 1) + if (!thread_count) { + int nb_cpus = get_logical_cpus(avctx); + // use number of cores + 1 as thread count if there is more than one + if (nb_cpus > 1) + thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS); + else + thread_count = avctx->thread_count = 1; + } + + if (thread_count <= 1) { + avctx->active_thread_type = 0; return 0; + } c = av_mallocz(sizeof(ThreadContext)); if (!c) @@ -315,7 +391,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg) } /** - * Updates the next thread's AVCodecContext with values from the reference thread's context. + * Update the next thread's AVCodecContext with values from the reference thread's context. * * @param dst The destination context. * @param src The source context. @@ -326,15 +402,16 @@ static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int err = 0; if (dst != src) { - dst->sub_id = src->sub_id; dst->time_base = src->time_base; dst->width = src->width; dst->height = src->height; dst->pix_fmt = src->pix_fmt; + dst->coded_width = src->coded_width; + dst->coded_height = src->coded_height; + dst->has_b_frames = src->has_b_frames; dst->idct_algo = src->idct_algo; - dst->slice_count = src->slice_count; dst->bits_per_coded_sample = src->bits_per_coded_sample; dst->sample_aspect_ratio = src->sample_aspect_ratio; @@ -354,8 +431,7 @@ static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, } if (for_user) { - dst->coded_frame = src->coded_frame; - dst->has_b_frames += src->thread_count - 1; + dst->coded_frame = src->coded_frame; } else { if (dst->codec->update_thread_context) err = dst->codec->update_thread_context(dst, src); @@ -369,8 +445,9 @@ static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, * * @param dst The destination context. * @param src The source context. + * @return 0 on success, negative error code on failure */ -static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src) +static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src) { #define copy_fields(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s); dst->flags = src->flags; @@ -380,9 +457,6 @@ static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src) dst->release_buffer = src->release_buffer; dst->opaque = src->opaque; -#if FF_API_HURRY_UP - dst->hurry_up = src->hurry_up; -#endif dst->dsp_mask = src->dsp_mask; dst->debug = src->debug; dst->debug_mv = src->debug_mv; @@ -390,10 +464,26 @@ static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src) dst->slice_flags = src->slice_flags; dst->flags2 = src->flags2; - copy_fields(skip_loop_filter, bidir_refine); + copy_fields(skip_loop_filter, subtitle_header); dst->frame_number = src->frame_number; dst->reordered_opaque = src->reordered_opaque; + + if (src->slice_count && src->slice_offset) { + if (dst->slice_count < src->slice_count) { + int *tmp = av_realloc(dst->slice_offset, src->slice_count * + sizeof(*dst->slice_offset)); + if (!tmp) { + av_free(dst->slice_offset); + return AVERROR(ENOMEM); + } + dst->slice_offset = tmp; + } + memcpy(dst->slice_offset, src->slice_offset, + src->slice_count * sizeof(*dst->slice_offset)); + } + dst->slice_count = src->slice_count; + return 0; #undef copy_fields } @@ -411,9 +501,10 @@ static void release_delayed_buffers(PerThreadContext *p) FrameThreadContext *fctx = p->parent; while (p->num_released_buffers > 0) { - AVFrame *f = &p->released_buffers[--p->num_released_buffers]; + AVFrame *f; pthread_mutex_lock(&fctx->buffer_mutex); + f = &p->released_buffers[--p->num_released_buffers]; free_progress(f); f->thread_opaque = NULL; @@ -484,6 +575,7 @@ static int submit_packet(PerThreadContext *p, AVPacket *avpkt) } fctx->prev_thread = p; + fctx->next_decoding++; return 0; } @@ -502,12 +594,11 @@ int ff_thread_decode_frame(AVCodecContext *avctx, */ p = &fctx->threads[fctx->next_decoding]; - update_context_from_user(p->avctx, avctx); + err = update_context_from_user(p->avctx, avctx); + if (err) return err; err = submit_packet(p, avpkt); if (err) return err; - fctx->next_decoding++; - /* * If we're still receiving the initial packets, don't return a frame. */ @@ -516,7 +607,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx, if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0; *got_picture_ptr=0; - return 0; + return avpkt->size; } /* @@ -557,7 +648,8 @@ int ff_thread_decode_frame(AVCodecContext *avctx, fctx->next_finished = finished; - return p->result; + /* return the size of the consumed packet if no error occurred */ + return (p->result >= 0) ? avpkt->size : p->result; } void ff_thread_report_progress(AVFrame *f, int n, int field) @@ -632,7 +724,7 @@ static void frame_thread_free(AVCodecContext *avctx, int thread_count) park_frame_worker_threads(fctx, thread_count); - if (fctx->prev_thread) + if (fctx->prev_thread && fctx->prev_thread != fctx->threads) update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0); fctx->die = 1; @@ -644,7 +736,8 @@ static void frame_thread_free(AVCodecContext *avctx, int thread_count) pthread_cond_signal(&p->input_cond); pthread_mutex_unlock(&p->mutex); - pthread_join(p->thread, NULL); + if (p->thread_init) + pthread_join(p->thread, NULL); if (codec->close) codec->close(p->avctx); @@ -666,8 +759,11 @@ static void frame_thread_free(AVCodecContext *avctx, int thread_count) pthread_cond_destroy(&p->output_cond); av_freep(&p->avpkt.data); - if (i) + if (i) { av_freep(&p->avctx->priv_data); + av_freep(&p->avctx->internal); + av_freep(&p->avctx->slice_offset); + } av_freep(&p->avctx); } @@ -685,6 +781,15 @@ static int frame_thread_init(AVCodecContext *avctx) FrameThreadContext *fctx; int i, err = 0; + if (!thread_count) { + int nb_cpus = get_logical_cpus(avctx); + // use number of cores + 1 as thread count if there is more than one + if (nb_cpus > 1) + thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS); + else + thread_count = avctx->thread_count = 1; + } + if (thread_count <= 1) { avctx->active_thread_type = 0; return 0; @@ -709,6 +814,11 @@ static int frame_thread_init(AVCodecContext *avctx) p->parent = fctx; p->avctx = copy; + if (!copy) { + err = AVERROR(ENOMEM); + goto error; + } + *copy = *src; copy->thread_opaque = p; copy->pkt = &p->avpkt; @@ -721,9 +831,19 @@ static int frame_thread_init(AVCodecContext *avctx) update_context_from_thread(avctx, copy, 1); } else { - copy->is_copy = 1; copy->priv_data = av_malloc(codec->priv_data_size); + if (!copy->priv_data) { + err = AVERROR(ENOMEM); + goto error; + } memcpy(copy->priv_data, src->priv_data, codec->priv_data_size); + copy->internal = av_malloc(sizeof(AVCodecInternal)); + if (!copy->internal) { + err = AVERROR(ENOMEM); + goto error; + } + *copy->internal = *src->internal; + copy->internal->is_copy = 1; if (codec->init_thread_copy) err = codec->init_thread_copy(copy); @@ -731,7 +851,8 @@ static int frame_thread_init(AVCodecContext *avctx) if (err) goto error; - pthread_create(&p->thread, NULL, frame_worker_thread, p); + if (!pthread_create(&p->thread, NULL, frame_worker_thread, p)) + p->thread_init = 1; } return 0; @@ -749,13 +870,23 @@ void ff_thread_flush(AVCodecContext *avctx) if (!avctx->thread_opaque) return; park_frame_worker_threads(fctx, avctx->thread_count); - - if (fctx->prev_thread) - update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0); + if (fctx->prev_thread) { + if (fctx->prev_thread != &fctx->threads[0]) + update_context_from_thread(fctx->threads[0].avctx, fctx->prev_thread->avctx, 0); + if (avctx->codec->flush) + avctx->codec->flush(fctx->threads[0].avctx); + } fctx->next_decoding = fctx->next_finished = 0; fctx->delaying = 1; fctx->prev_thread = NULL; + for (int i = 0; i < avctx->thread_count; i++) { + PerThreadContext *p = &fctx->threads[i]; + // Make sure decode flush calls with size=0 won't return old frames + p->got_frame = 0; + + release_delayed_buffers(p); + } } static int *allocate_progress(PerThreadContext *p) @@ -824,21 +955,19 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f) ff_thread_finish_setup(avctx); } + if (err) { + free_progress(f); + f->thread_opaque = NULL; + } pthread_mutex_unlock(&p->parent->buffer_mutex); - /* - * Buffer age is difficult to keep track of between - * multiple threads, and the optimizations it allows - * are not worth the effort. It is disabled for now. - */ - f->age = INT_MAX; - return err; } void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f) { PerThreadContext *p = avctx->thread_opaque; + FrameThreadContext *fctx; if (!(avctx->active_thread_type&FF_THREAD_FRAME)) { avctx->release_buffer(avctx, f); @@ -851,10 +980,12 @@ void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f) } if(avctx->debug & FF_DEBUG_BUFFERS) - av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p, %d buffers used\n", - f, f->owner->internal_buffer_count); + av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p\n", f); + fctx = p->parent; + pthread_mutex_lock(&fctx->buffer_mutex); p->released_buffers[p->num_released_buffers++] = *f; + pthread_mutex_unlock(&fctx->buffer_mutex); memset(f->data, 0, sizeof(f->data)); } @@ -877,8 +1008,12 @@ static void validate_thread_parameters(AVCodecContext *avctx) avctx->active_thread_type = 0; } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) { avctx->active_thread_type = FF_THREAD_FRAME; - } else { + } else if (avctx->codec->capabilities & CODEC_CAP_SLICE_THREADS && + avctx->thread_type & FF_THREAD_SLICE) { avctx->active_thread_type = FF_THREAD_SLICE; + } else if (!(avctx->codec->capabilities & CODEC_CAP_AUTO_THREADS)) { + avctx->thread_count = 1; + avctx->active_thread_type = 0; } } @@ -889,6 +1024,10 @@ int ff_thread_init(AVCodecContext *avctx) return -1; } +#if HAVE_W32THREADS + w32thread_init(); +#endif + if (avctx->codec) { validate_thread_parameters(avctx);