* @see doc/multithreading.txt
*/
-#include <pthread.h>
+#include "config.h"
+
+#if HAVE_SCHED_GETAFFINITY
+#define _GNU_SOURCE
+#include <sched.h>
+#endif
+#if HAVE_GETPROCESSAFFINITYMASK
+#include <windows.h>
+#endif
+#if HAVE_SYSCTL
+#if HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+#if HAVE_SYSCONF
+#include <unistd.h>
+#endif
#include "avcodec.h"
+#include "internal.h"
#include "thread.h"
+#if HAVE_PTHREADS
+#include <pthread.h>
+#elif HAVE_W32THREADS
+#include "w32pthreads.h"
+#endif
+
typedef int (action_func)(AVCodecContext *c, void *arg);
typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
struct FrameThreadContext *parent;
pthread_t thread;
+ int thread_init;
pthread_cond_t input_cond; ///< Used to wait for a new packet from the main thread.
pthread_cond_t progress_cond; ///< Used by child threads to wait for progress to change.
pthread_cond_t output_cond; ///< Used by the main thread to wait for frames to finish.
int die; ///< Set when threads should exit.
} FrameThreadContext;
+
+/* H264 slice threading seems to be buggy with more than 16 threads,
+ * limit the number of threads to 16 for automatic detection */
+#define MAX_AUTO_THREADS 16
+
+static int get_logical_cpus(AVCodecContext *avctx)
+{
+ int ret, nb_cpus = 1;
+#if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT)
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+
+ ret = sched_getaffinity(0, sizeof(cpuset), &cpuset);
+ if (!ret) {
+ nb_cpus = CPU_COUNT(&cpuset);
+ }
+#elif HAVE_GETPROCESSAFFINITYMASK
+ DWORD_PTR proc_aff, sys_aff;
+ ret = GetProcessAffinityMask(GetCurrentProcess(), &proc_aff, &sys_aff);
+ if (ret)
+ nb_cpus = av_popcount64(proc_aff);
+#elif HAVE_SYSCTL && defined(HW_NCPU)
+ int mib[2] = { CTL_HW, HW_NCPU };
+ size_t len = sizeof(nb_cpus);
+
+ ret = sysctl(mib, 2, &nb_cpus, &len, NULL, 0);
+ if (ret == -1)
+ nb_cpus = 0;
+#elif HAVE_SYSCONF && defined(_SC_NPROC_ONLN)
+ nb_cpus = sysconf(_SC_NPROC_ONLN);
+#elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN)
+ nb_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+ av_log(avctx, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus);
+ return nb_cpus;
+}
+
+
static void* attribute_align_arg worker(void *v)
{
AVCodecContext *avctx = v;
ThreadContext *c;
int thread_count = avctx->thread_count;
- if (thread_count <= 1)
+ if (!thread_count) {
+ int nb_cpus = get_logical_cpus(avctx);
+ // use number of cores + 1 as thread count if there is more than one
+ if (nb_cpus > 1)
+ thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+ else
+ thread_count = avctx->thread_count = 1;
+ }
+
+ if (thread_count <= 1) {
+ avctx->active_thread_type = 0;
return 0;
+ }
c = av_mallocz(sizeof(ThreadContext));
if (!c)
}
/**
- * Updates the next thread's AVCodecContext with values from the reference thread's context.
+ * Update the next thread's AVCodecContext with values from the reference thread's context.
*
* @param dst The destination context.
* @param src The source context.
int err = 0;
if (dst != src) {
- dst->sub_id = src->sub_id;
dst->time_base = src->time_base;
dst->width = src->width;
dst->height = src->height;
dst->has_b_frames = src->has_b_frames;
dst->idct_algo = src->idct_algo;
- dst->slice_count = src->slice_count;
dst->bits_per_coded_sample = src->bits_per_coded_sample;
dst->sample_aspect_ratio = src->sample_aspect_ratio;
}
if (for_user) {
- dst->coded_frame = src->coded_frame;
- dst->has_b_frames += src->thread_count - 1;
+ dst->coded_frame = src->coded_frame;
} else {
if (dst->codec->update_thread_context)
err = dst->codec->update_thread_context(dst, src);
*
* @param dst The destination context.
* @param src The source context.
+ * @return 0 on success, negative error code on failure
*/
-static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
+static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
{
#define copy_fields(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s);
dst->flags = src->flags;
dst->slice_flags = src->slice_flags;
dst->flags2 = src->flags2;
- copy_fields(skip_loop_filter, bidir_refine);
+ copy_fields(skip_loop_filter, subtitle_header);
dst->frame_number = src->frame_number;
dst->reordered_opaque = src->reordered_opaque;
+
+ if (src->slice_count && src->slice_offset) {
+ if (dst->slice_count < src->slice_count) {
+ int *tmp = av_realloc(dst->slice_offset, src->slice_count *
+ sizeof(*dst->slice_offset));
+ if (!tmp) {
+ av_free(dst->slice_offset);
+ return AVERROR(ENOMEM);
+ }
+ dst->slice_offset = tmp;
+ }
+ memcpy(dst->slice_offset, src->slice_offset,
+ src->slice_count * sizeof(*dst->slice_offset));
+ }
+ dst->slice_count = src->slice_count;
+ return 0;
#undef copy_fields
}
}
fctx->prev_thread = p;
+ fctx->next_decoding++;
return 0;
}
*/
p = &fctx->threads[fctx->next_decoding];
- update_context_from_user(p->avctx, avctx);
+ err = update_context_from_user(p->avctx, avctx);
+ if (err) return err;
err = submit_packet(p, avpkt);
if (err) return err;
- fctx->next_decoding++;
-
/*
* If we're still receiving the initial packets, don't return a frame.
*/
if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0;
*got_picture_ptr=0;
- return 0;
+ return avpkt->size;
}
/*
*picture = p->frame;
*got_picture_ptr = p->got_frame;
picture->pkt_dts = p->avpkt.dts;
+ picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
+ picture->width = avctx->width;
+ picture->height = avctx->height;
+ picture->format = avctx->pix_fmt;
/*
* A later call with avkpt->size == 0 may loop over all threads,
fctx->next_finished = finished;
- return p->result;
+ /* return the size of the consumed packet if no error occurred */
+ return (p->result >= 0) ? avpkt->size : p->result;
}
void ff_thread_report_progress(AVFrame *f, int n, int field)
pthread_cond_signal(&p->input_cond);
pthread_mutex_unlock(&p->mutex);
- pthread_join(p->thread, NULL);
+ if (p->thread_init)
+ pthread_join(p->thread, NULL);
if (codec->close)
codec->close(p->avctx);
pthread_cond_destroy(&p->output_cond);
av_freep(&p->avpkt.data);
- if (i)
+ if (i) {
av_freep(&p->avctx->priv_data);
+ av_freep(&p->avctx->internal);
+ av_freep(&p->avctx->slice_offset);
+ }
av_freep(&p->avctx);
}
FrameThreadContext *fctx;
int i, err = 0;
+ if (!thread_count) {
+ int nb_cpus = get_logical_cpus(avctx);
+ // use number of cores + 1 as thread count if there is more than one
+ if (nb_cpus > 1)
+ thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+ else
+ thread_count = avctx->thread_count = 1;
+ }
+
if (thread_count <= 1) {
avctx->active_thread_type = 0;
return 0;
p->parent = fctx;
p->avctx = copy;
+ if (!copy) {
+ err = AVERROR(ENOMEM);
+ goto error;
+ }
+
*copy = *src;
copy->thread_opaque = p;
copy->pkt = &p->avpkt;
update_context_from_thread(avctx, copy, 1);
} else {
- copy->is_copy = 1;
copy->priv_data = av_malloc(codec->priv_data_size);
+ if (!copy->priv_data) {
+ err = AVERROR(ENOMEM);
+ goto error;
+ }
memcpy(copy->priv_data, src->priv_data, codec->priv_data_size);
+ copy->internal = av_malloc(sizeof(AVCodecInternal));
+ if (!copy->internal) {
+ err = AVERROR(ENOMEM);
+ goto error;
+ }
+ *copy->internal = *src->internal;
+ copy->internal->is_copy = 1;
if (codec->init_thread_copy)
err = codec->init_thread_copy(copy);
if (err) goto error;
- pthread_create(&p->thread, NULL, frame_worker_thread, p);
+ if (!pthread_create(&p->thread, NULL, frame_worker_thread, p))
+ p->thread_init = 1;
}
return 0;
fctx->next_decoding = fctx->next_finished = 0;
fctx->delaying = 1;
fctx->prev_thread = NULL;
+ // Make sure decode flush calls with size=0 won't return old frames
+ for (int i = 0; i < avctx->thread_count; i++)
+ fctx->threads[i].got_frame = 0;
}
static int *allocate_progress(PerThreadContext *p)
pthread_mutex_unlock(&p->parent->buffer_mutex);
- /*
- * Buffer age is difficult to keep track of between
- * multiple threads, and the optimizations it allows
- * are not worth the effort. It is disabled for now.
- */
- f->age = INT_MAX;
-
return err;
}
}
if(avctx->debug & FF_DEBUG_BUFFERS)
- av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p, %d buffers used\n",
- f, f->owner->internal_buffer_count);
+ av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p\n", f);
fctx = p->parent;
pthread_mutex_lock(&fctx->buffer_mutex);
} else if (avctx->codec->capabilities & CODEC_CAP_SLICE_THREADS &&
avctx->thread_type & FF_THREAD_SLICE) {
avctx->active_thread_type = FF_THREAD_SLICE;
+ } else if (!(avctx->codec->capabilities & CODEC_CAP_AUTO_THREADS)) {
+ avctx->thread_count = 1;
+ avctx->active_thread_type = 0;
}
}
return -1;
}
+#if HAVE_W32THREADS
+ w32thread_init();
+#endif
+
if (avctx->codec) {
validate_thread_parameters(avctx);