From: Steinar H. Gunderson Date: Sat, 3 Oct 2015 00:23:14 +0000 (+0200) Subject: Initial checkin. X-Git-Tag: 1.0.0~326 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=e18d9bad93d5bf766d52d0cb66db4c3d3f8a711b;p=nageru Initial checkin. --- e18d9bad93d5bf766d52d0cb66db4c3d3f8a711b diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9958450 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS := -O2 -march=native -g -std=gnu++11 -Wall -Wno-deprecated-declarations -fPIC $(shell pkg-config --cflags Qt5Core Qt5Widgets Qt5OpenGLExtensions libusb-1.0 movit) -pthread +LDFLAGS=$(shell pkg-config --libs Qt5Core Qt5Widgets Qt5OpenGLExtensions libusb-1.0 movit) -lEGL -lGL -pthread -lva -lva-drm -lva-x11 -lX11 -lavformat -lavcodec -lavutil + +# Qt objects +OBJS=glwidget.o main.o mainwindow.o window.o +OBJS += glwidget.moc.o mainwindow.moc.o window.moc.o + +# Mixer objects +OBJS += h264encode.o mixer.o bmusb.o pbo_frame_allocator.o context.o + +%.moc.cpp: %.h + moc $< -o $@ + +all: nageru + +nageru: $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $^ + + diff --git a/bmusb.cpp b/bmusb.cpp new file mode 100644 index 0000000..207d632 --- /dev/null +++ b/bmusb.cpp @@ -0,0 +1,939 @@ +// TODO: Replace with linking to upstream bmusb. + +// Intensity Shuttle USB3 prototype capture driver, v0.3 +// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable +// (can do captures for hours at a time with no drops), except during startup +// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) +// Audio comes out as 8-channel 24-bit raw audio. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __SSE2__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bmusb.h" + +using namespace std; +using namespace std::placeholders; + +#define WIDTH 1280 +#define HEIGHT 750 /* 30 lines ancillary data? */ +//#define WIDTH 1920 +//#define HEIGHT 1125 /* ??? lines ancillary data? */ +#define HEADER_SIZE 44 +//#define HEADER_SIZE 0 +#define AUDIO_HEADER_SIZE 4 + +//#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY +//#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210 +#define FRAME_SIZE (8 << 20) + +FILE *audiofp; + +thread usb_thread; +atomic should_quit; + +FrameAllocator::~FrameAllocator() {} + +#define NUM_QUEUED_FRAMES 16 +class MallocFrameAllocator : public FrameAllocator { +public: + MallocFrameAllocator(size_t frame_size); + Frame alloc_frame() override; + void release_frame(Frame frame) override; + +private: + size_t frame_size; + + mutex freelist_mutex; + stack> freelist; // All of size . +}; + +MallocFrameAllocator::MallocFrameAllocator(size_t frame_size) + : frame_size(frame_size) +{ + for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) { + freelist.push(unique_ptr(new uint8_t[frame_size])); + } +} + +FrameAllocator::Frame MallocFrameAllocator::alloc_frame() +{ + Frame vf; + vf.owner = this; + + unique_lock lock(freelist_mutex); // Meh. + if (freelist.empty()) { + printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n", + frame_size); + } else { + vf.data = freelist.top().release(); + vf.size = frame_size; + freelist.pop(); // Meh. + } + return vf; +} + +void MallocFrameAllocator::release_frame(Frame frame) +{ + unique_lock lock(freelist_mutex); + freelist.push(unique_ptr(frame.data)); +} + +bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b) +{ + if (a == b) { + return false; + } else if (a < b) { + return (b - a < 0x8000); + } else { + int wrap_b = 0x10000 + int(b); + return (wrap_b - a < 0x8000); + } +} + +void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque *q) +{ + if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) { + printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n", + q->back().timecode, timecode); + frame.owner->release_frame(frame); + return; + } + + QueuedFrame qf; + qf.format = format; + qf.timecode = timecode; + qf.frame = frame; + + { + unique_lock lock(queue_lock); + q->push_back(move(qf)); + } + queues_not_empty.notify_one(); // might be spurious +} + +void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) +{ + FILE *fp = fopen(filename, "wb"); + if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) { + printf("short write!\n"); + } + fclose(fp); +} + +void dump_audio_block(uint8_t *audio_start, size_t audio_len) +{ + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); +} + +void BMUSBCapture::dequeue_thread() +{ + for ( ;; ) { + unique_lock lock(queue_lock); + queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); }); + + uint16_t video_timecode = pending_video_frames.front().timecode; + uint16_t audio_timecode = pending_audio_frames.front().timecode; + if (video_timecode < audio_timecode) { + printf("Video block 0x%04x without corresponding audio block, dropping.\n", + video_timecode); + video_frame_allocator->release_frame(pending_video_frames.front().frame); + pending_video_frames.pop_front(); + } else if (audio_timecode < video_timecode) { + printf("Audio block 0x%04x without corresponding video block, dropping.\n", + audio_timecode); + audio_frame_allocator->release_frame(pending_audio_frames.front().frame); + pending_audio_frames.pop_front(); + } else { + QueuedFrame video_frame = pending_video_frames.front(); + QueuedFrame audio_frame = pending_audio_frames.front(); + pending_audio_frames.pop_front(); + pending_video_frames.pop_front(); + lock.unlock(); + +#if 0 + char filename[255]; + snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode); + dump_frame(filename, video_frame.frame.data, video_frame.data_len); + dump_audio_block(audio_frame.frame.data, audio_frame.data_len); +#endif + + frame_callback(video_timecode, + video_frame.frame, HEADER_SIZE, video_frame.format, + audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); + } + } +} + +void BMUSBCapture::start_new_frame(const uint8_t *start) +{ + uint16_t format = (start[3] << 8) | start[2]; + uint16_t timecode = (start[1] << 8) | start[0]; + + if (current_video_frame.len > 0) { + //dump_frame(); + queue_frame(format, timecode, current_video_frame, &pending_video_frames); + } + //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n", + // format, timecode, + // //start[7], start[6], start[5], start[4], + // read_current_frame, FRAME_SIZE); + + current_video_frame = video_frame_allocator->alloc_frame(); + //if (current_video_frame.data == nullptr) { + // read_current_frame = -1; + //} else { + // read_current_frame = 0; + //} +} + +void BMUSBCapture::start_new_audio_block(const uint8_t *start) +{ + uint16_t format = (start[3] << 8) | start[2]; + uint16_t timecode = (start[1] << 8) | start[0]; + if (current_audio_frame.len > 0) { + //dump_audio_block(); + queue_frame(format, timecode, current_audio_frame, &pending_audio_frames); + } + //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n", + // format, timecode, read_current_audio_block); + current_audio_frame = audio_frame_allocator->alloc_frame(); +} + +#if 0 +static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack) +{ + // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset); + for (unsigned j = 0; j < pack->actual_length; j++) { + //for (int j = 0; j < min(pack->actual_length, 16u); j++) { + printf("%02x", xfr->buffer[j + offset]); + if ((j % 16) == 15) + printf("\n"); + else if ((j % 8) == 7) + printf(" "); + else + printf(" "); + } +} +#endif + +void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n) +{ + assert(n % 2 == 0); + uint8_t *dptr1 = dest1; + uint8_t *dptr2 = dest2; + + for (size_t i = 0; i < n; i += 2) { + *dptr1++ = *src++; + *dptr2++ = *src++; + } +} + +void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == end) { + return; + } + + int bytes = end - start; + if (current_frame->len + bytes > current_frame->size) { + printf("%d bytes overflow after last %s frame\n", + int(current_frame->len + bytes - current_frame->size), frame_type_name); + //dump_frame(); + } else { + if (current_frame->interleaved) { + uint8_t *data = current_frame->data + current_frame->len / 2; + uint8_t *data2 = current_frame->data2 + current_frame->len / 2; + if (current_frame->len % 2 == 1) { + ++data; + swap(data, data2); + } + if (bytes % 2 == 1) { + *data++ = *start++; + swap(data, data2); + ++current_frame->len; + --bytes; + } + memcpy_interleaved(data, data2, start, bytes); + current_frame->len += bytes; + } else { + memcpy(current_frame->data + current_frame->len, start, bytes); + current_frame->len += bytes; + } + } +} + +#ifdef __SSE2__ + +#if 0 +void dump(const char *name, __m256i n) +{ + printf("%-10s:", name); + printf(" %02x", _mm256_extract_epi8(n, 0)); + printf(" %02x", _mm256_extract_epi8(n, 1)); + printf(" %02x", _mm256_extract_epi8(n, 2)); + printf(" %02x", _mm256_extract_epi8(n, 3)); + printf(" %02x", _mm256_extract_epi8(n, 4)); + printf(" %02x", _mm256_extract_epi8(n, 5)); + printf(" %02x", _mm256_extract_epi8(n, 6)); + printf(" %02x", _mm256_extract_epi8(n, 7)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 8)); + printf(" %02x", _mm256_extract_epi8(n, 9)); + printf(" %02x", _mm256_extract_epi8(n, 10)); + printf(" %02x", _mm256_extract_epi8(n, 11)); + printf(" %02x", _mm256_extract_epi8(n, 12)); + printf(" %02x", _mm256_extract_epi8(n, 13)); + printf(" %02x", _mm256_extract_epi8(n, 14)); + printf(" %02x", _mm256_extract_epi8(n, 15)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 16)); + printf(" %02x", _mm256_extract_epi8(n, 17)); + printf(" %02x", _mm256_extract_epi8(n, 18)); + printf(" %02x", _mm256_extract_epi8(n, 19)); + printf(" %02x", _mm256_extract_epi8(n, 20)); + printf(" %02x", _mm256_extract_epi8(n, 21)); + printf(" %02x", _mm256_extract_epi8(n, 22)); + printf(" %02x", _mm256_extract_epi8(n, 23)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 24)); + printf(" %02x", _mm256_extract_epi8(n, 25)); + printf(" %02x", _mm256_extract_epi8(n, 26)); + printf(" %02x", _mm256_extract_epi8(n, 27)); + printf(" %02x", _mm256_extract_epi8(n, 28)); + printf(" %02x", _mm256_extract_epi8(n, 29)); + printf(" %02x", _mm256_extract_epi8(n, 30)); + printf(" %02x", _mm256_extract_epi8(n, 31)); + printf("\n"); +} +#endif + +// Does a memcpy and memchr in one to reduce processing time. +// Note that the benefit is somewhat limited if your L3 cache is small, +// as you'll (unfortunately) spend most of the time loading the data +// from main memory. +// +// Complicated cases are left to the slow path; it basically stops copying +// up until the first instance of "sync_char" (usually a bit before, actually). +// This is fine, since 0x00 bytes shouldn't really show up in normal picture +// data, and what we really need this for is the 00 00 ff ff marker in video data. +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == limit) { + return start; + } + size_t orig_bytes = limit - start; + if (orig_bytes < 128) { + // Don't bother. + return start; + } + + // Don't read more bytes than we can write. + limit = min(limit, start + (current_frame->size - current_frame->len)); + + // Align end to 32 bytes. + limit = (const uint8_t *)(intptr_t(limit) & ~31); + + if (start >= limit) { + return start; + } + + // Process [0,31] bytes, such that start gets aligned to 32 bytes. + const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31); + if (aligned_start != start) { + const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start); + if (sync_start == nullptr) { + add_to_frame(current_frame, "", start, aligned_start); + } else { + add_to_frame(current_frame, "", start, sync_start); + return sync_start; + } + } + + // Make the length a multiple of 64. + if (current_frame->interleaved) { + if (((limit - aligned_start) % 64) != 0) { + limit -= 32; + } + assert(((limit - aligned_start) % 64) == 0); + } + +#if __AVX2__ + const __m256i needle = _mm256_set1_epi8(sync_char); + + const __restrict __m256i *in = (const __m256i *)aligned_start; + if (current_frame->interleaved) { + __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); + __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m256i shuffle_cw = _mm256_set_epi8( + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0, + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + while (in < (const __m256i *)limit) { + // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128). + __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh + __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp + + __m256i found1 = _mm256_cmpeq_epi8(data1, needle); + __m256i found2 = _mm256_cmpeq_epi8(data2, needle); + __m256i found = _mm256_or_si256(found1, found2); + + data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh + data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop + + data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh + data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop + + __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000); + __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001); + + _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used. + _mm256_storeu_si256(out2, hi); + + if (!_mm256_testz_si256(found, found)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + while (in < (const __m256i *)limit) { + __m256i data = _mm256_load_si256(in); + _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. + __m256i found = _mm256_cmpeq_epi8(data, needle); + if (!_mm256_testz_si256(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#else + const __m128i needle = _mm_set1_epi8(sync_char); + + const __m128i *in = (const __m128i *)aligned_start; + if (current_frame->interleaved) { + __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); + __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m128i mask_lower_byte = _mm_set1_epi16(0x00ff); + while (in < (const __m128i *)limit) { + __m128i data1 = _mm_load_si128(in); + __m128i data2 = _mm_load_si128(in + 1); + __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte); + __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte); + __m128i data1_hi = _mm_srli_epi16(data1, 8); + __m128i data2_hi = _mm_srli_epi16(data2, 8); + __m128i lo = _mm_packus_epi16(data1_lo, data2_lo); + _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used. + __m128i hi = _mm_packus_epi16(data1_hi, data2_hi); + _mm_storeu_si128(out2, hi); + __m128i found1 = _mm_cmpeq_epi8(data1, needle); + __m128i found2 = _mm_cmpeq_epi8(data2, needle); + if (!_mm_testz_si128(found1, found1) || + !_mm_testz_si128(found2, found2)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + while (in < (const __m128i *)limit) { + __m128i data = _mm_load_si128(in); + _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. + __m128i found = _mm_cmpeq_epi8(data, needle); + if (!_mm_testz_si128(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#endif + + //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); + + return (const uint8_t *)in; +} +#endif + +void decode_packs(const libusb_transfer *xfr, + const char *sync_pattern, + int sync_length, + FrameAllocator::Frame *current_frame, + const char *frame_type_name, + function start_callback) +{ + int offset = 0; + for (int i = 0; i < xfr->num_iso_packets; i++) { + const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i]; + + if (pack->status != LIBUSB_TRANSFER_COMPLETED) { + fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status); + continue; +//exit(5); + } + + const uint8_t *start = xfr->buffer + offset; + const uint8_t *limit = start + pack->actual_length; + while (start < limit) { // Usually runs only one iteration. +#ifdef __SSE2__ + start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); + if (start == limit) break; + assert(start < limit); +#endif + + const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length); + if (start_next_frame == nullptr) { + // add the rest of the buffer + add_to_frame(current_frame, frame_type_name, start, limit); + break; + } else { + add_to_frame(current_frame, frame_type_name, start, start_next_frame); + start = start_next_frame + sync_length; // skip sync + start_callback(start); + } + } +#if 0 + dump_pack(xfr, offset, pack); +#endif + offset += pack->length; + } +} + +void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) +{ + if (xfr->status != LIBUSB_TRANSFER_COMPLETED) { + fprintf(stderr, "transfer status %d\n", xfr->status); + libusb_free_transfer(xfr); + exit(3); + } + + assert(xfr->user_data != nullptr); + BMUSBCapture *usb = static_cast(xfr->user_data); + + if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) { + if (xfr->endpoint == 0x84) { + decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1)); + } else { + decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); + } + } + if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { + //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr); + uint8_t *buf = libusb_control_transfer_get_data(xfr); +#if 0 + if (setup->wIndex == 44) { + printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]); + } else { + printf("read register %2d: 0x%02x%02x%02x%02x\n", + setup->wIndex, buf[0], buf[1], buf[2], buf[3]); + } +#else + memcpy(usb->register_file + usb->current_register, buf, 4); + usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS; + if (usb->current_register == 0) { + // read through all of them + printf("register dump:"); + for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) { + printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]); + } + printf("\n"); + } + libusb_fill_control_setup(xfr->buffer, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/usb->current_register, /*length=*/4); +#endif + } + +#if 0 + printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length); + for (i = 0; i < xfr->actual_length; i++) { + printf("%02x", xfr->buffer[i]); + if (i % 16) + printf("\n"); + else if (i % 8) + printf(" "); + else + printf(" "); + } +#endif + + if (libusb_submit_transfer(xfr) < 0) { + fprintf(stderr, "error re-submitting URB\n"); + exit(1); + } +} + +void BMUSBCapture::usb_thread_func() +{ + sched_param param; + memset(¶m, 0, sizeof(param)); + param.sched_priority = 1; + if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) { + printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); + } + while (!should_quit) { + int rc = libusb_handle_events(nullptr); + if (rc != LIBUSB_SUCCESS) + break; + } +} + +void BMUSBCapture::configure_card() +{ + if (video_frame_allocator == nullptr) { + set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak. + } + if (audio_frame_allocator == nullptr) { + set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak. + } + thread(&BMUSBCapture::dequeue_thread, this).detach(); + + int rc; + struct libusb_transfer *xfr; + + rc = libusb_init(nullptr); + if (rc < 0) { + fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc)); + exit(1); + } + + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f); + struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid); + if (!devh) { + fprintf(stderr, "Error finding USB device\n"); + exit(1); + } + + libusb_config_descriptor *config; + rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config); + if (rc < 0) { + fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc)); + exit(1); + } + printf("%d interface\n", config->bNumInterfaces); + for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) { + printf(" interface %d\n", interface_number); + const libusb_interface *interface = &config->interface[interface_number]; + for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) { + const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting]; + printf(" alternate setting %d\n", interface_desc->bAlternateSetting); + for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) { + const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number]; + printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress); + } + } + } + +#if 0 + rc = libusb_set_configuration(devh, /*configuration=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + + rc = libusb_claim_interface(devh, 0); + if (rc < 0) { + fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc)); + exit(1); + } + +#if 0 + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + +#if 0 + rc = libusb_claim_interface(devh, 3); + if (rc < 0) { + fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + + // theories: + // 44 is some kind of timer register (first 16 bits count upwards) + // 24 is some sort of watchdog? + // you can seemingly set it to 0x73c60001 and that bit will eventually disappear + // (or will go to 0x73c60010?), also seen 0x73c60100 + // 12 also changes all the time, unclear why + // 16 seems to be autodetected mode somehow + // -- this is e00115e0 after reset? + // ed0115e0 after mode change [to output?] + // 2d0015e0 after more mode change [to input] + // ed0115e0 after more mode change + // 2d0015e0 after more mode change + // + // 390115e0 seems to indicate we have signal + // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while + // + // 200015e0 on startup + // changes to 250115e0 when we sync to the signal + // + // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal? + // + // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels). + // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a). + // + // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect. + // perhaps some of them are related to analog output? + // + // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio), + // but the driver sets it to 0x8036802a at some point. + // + // register 16: + // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal + // + // theories: + // 0x01 - stable signal + // 0x04 - deep color + // 0x08 - unknown (audio??) + // 0x20 - 720p?? + // 0x30 - 576p?? + + struct ctrl { + int endpoint; + int request; + int index; + uint32_t data; + }; + static const ctrl ctrls[] = { + { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, + { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, + + // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit + // capture (v210). + // clearing the 0x08000000 bit seems to change the capture format (other source?) + // 0x10000000 = analog audio instead of embedded audio, it seems + // 0x3a000000 = component video? (analog audio) + // 0x3c000000 = composite video? (analog audio) + // 0x3e000000 = s-video? (analog audio) + { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, + { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start? + { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, // + }; + + for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) { + uint32_t flipped = htonl(ctrls[req].data); + static uint8_t value[4]; + memcpy(value, &flipped, sizeof(flipped)); + int size = sizeof(value); + //if (ctrls[req].request == 215) size = 0; + rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint, + /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0); + if (rc < 0) { + fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc)); + exit(1); + } + + printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index); + for (int i = 0; i < rc; ++i) { + printf("%02x", value[i]); + } + printf("\n"); + } + + // Alternate setting 1 is output, alternate setting 2 is input. + // Card is reset when switching alternates, so the driver uses + // this “double switch” when it wants to reset. +#if 0 + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/4); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 4: %s\n", libusb_error_name(rc)); + exit(1); + } + +#if 0 + // DEBUG + for ( ;; ) { + static int my_index = 0; + static uint8_t value[4]; + int size = sizeof(value); + rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, + /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0); + if (rc < 0) { + fprintf(stderr, "Error on control\n"); + exit(1); + } + printf("rc=%d index=%d: 0x", rc, my_index); + for (int i = 0; i < rc; ++i) { + printf("%02x", value[i]); + } + printf("\n"); + } +#endif + +#if 0 + // set up an asynchronous transfer of the timer register + static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/44, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0); + xfr->user_data = this; + libusb_submit_transfer(xfr); + + // set up an asynchronous transfer of register 24 + static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed2 = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf2, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/24, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0); + xfr->user_data = this; + libusb_submit_transfer(xfr); +#endif + + // set up an asynchronous transfer of the register dump + static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed3 = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf3, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/current_register, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0); + xfr->user_data = this; + //libusb_submit_transfer(xfr); + + audiofp = fopen("audio.raw", "wb"); + + // set up isochronous transfers for audio and video + for (int e = 3; e <= 4; ++e) { + //int num_transfers = (e == 3) ? 6 : 6; + int num_transfers = 2; + for (int i = 0; i < num_transfers; ++i) { + int num_iso_pack, size; + if (e == 3) { + // Video seems to require isochronous packets scaled with the width; + // seemingly six lines is about right, rounded up to the required 1kB + // multiple. + size = WIDTH * 2 * 6; + // Note that for 10-bit input, you'll need to increase size accordingly. + //size = size * 4 / 3; + if (size % 1024 != 0) { + size &= ~1023; + size += 1024; + } + num_iso_pack = (2 << 20) / size; // 512 kB. + printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size); + } else { + size = 0xc0; + num_iso_pack = 80; + } + int num_bytes = num_iso_pack * size; + uint8_t *buf = new uint8_t[num_bytes]; + + xfr = libusb_alloc_transfer(num_iso_pack); + if (!xfr) { + fprintf(stderr, "oom\n"); + exit(1); + } + + int ep = LIBUSB_ENDPOINT_IN | e; + libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes, + num_iso_pack, cb_xfr, nullptr, 0); + libusb_set_iso_packet_lengths(xfr, size); + xfr->user_data = this; + iso_xfrs.push_back(xfr); + } + } +} + +void BMUSBCapture::start_bm_capture() +{ + printf("starting capture\n"); + int i = 0; + for (libusb_transfer *xfr : iso_xfrs) { + printf("submitting transfer...\n"); + int rc = libusb_submit_transfer(xfr); + ++i; + if (rc < 0) { + //printf("num_bytes=%d\n", num_bytes); + fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", + xfr->endpoint, i, libusb_error_name(rc)); + exit(1); + } + } + + +#if 0 + libusb_release_interface(devh, 0); +out: + if (devh) + libusb_close(devh); + libusb_exit(nullptr); + return rc; +#endif +} + +void BMUSBCapture::start_bm_thread() +{ + should_quit = false; + usb_thread = thread(&BMUSBCapture::usb_thread_func); +} + +void BMUSBCapture::stop_bm_thread() +{ + should_quit = true; + usb_thread.join(); +} diff --git a/bmusb.h b/bmusb.h new file mode 100644 index 0000000..c881748 --- /dev/null +++ b/bmusb.h @@ -0,0 +1,138 @@ +#ifndef _BMUSB_H +#define _BMUSB_H + +#include +#include +#include +#include +#include +#include +#include + +// An interface for frame allocators; if you do not specify one +// (using set_video_frame_allocator), a default one that pre-allocates +// a freelist of eight frames using new[] will be used. Specifying +// your own can be useful if you have special demands for where you want the +// frame to end up and don't want to spend the extra copy to get it there, for +// instance GPU memory. +class FrameAllocator { + public: + struct Frame { + uint8_t *data = nullptr; + uint8_t *data2 = nullptr; // Only if interleaved == true. + size_t len = 0; // Number of bytes we actually have. + size_t size = 0; // Number of bytes we have room for. + void *userdata = nullptr; + FrameAllocator *owner = nullptr; + + // If set to true, every other byte will go to data and to data2. + // If so, and are still about the number of total bytes + // so if size == 1024, there's 512 bytes in data and 512 in data2. + bool interleaved = false; + }; + + virtual ~FrameAllocator(); + + // Request a video frame. Note that this is called from the + // USB thread, which runs with realtime priority and is + // very sensitive to delays. Thus, you should not do anything + // here that might sleep, including calling malloc(). + // (Taking a mutex is borderline.) + // + // The Frame object will be given to the frame callback, + // which is responsible for releasing the video frame back + // once it is usable for new frames (ie., it will no longer + // be read from). You can use the "userdata" pointer for + // whatever you want to identify this frame if you need to. + // + // Returning a Frame with data==nullptr is allowed; + // if so, the frame in progress will be dropped. + virtual Frame alloc_frame() = 0; + + virtual void release_frame(Frame frame) = 0; +}; + +typedef std::function + frame_callback_t; + +// The actual capturing class, representing capture from a single card. +class BMUSBCapture { + public: + BMUSBCapture(int vid = 0x1edb, int pid = 0xbd3b) + : vid(vid), pid(pid) + { + } + + // Does not take ownership. + void set_video_frame_allocator(FrameAllocator *allocator) + { + video_frame_allocator = allocator; + } + + FrameAllocator *get_video_frame_allocator() + { + return video_frame_allocator; + } + + // Does not take ownership. + void set_audio_frame_allocator(FrameAllocator *allocator) + { + audio_frame_allocator = allocator; + } + + FrameAllocator *get_audio_frame_allocator() + { + return audio_frame_allocator; + } + + void set_frame_callback(frame_callback_t callback) + { + frame_callback = callback; + } + + void configure_card(); + void start_bm_capture(); + + static void start_bm_thread(); + static void stop_bm_thread(); + + private: + struct QueuedFrame { + uint16_t timecode; + uint16_t format; + FrameAllocator::Frame frame; + }; + + void start_new_audio_block(const uint8_t *start); + void start_new_frame(const uint8_t *start); + + void queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, std::deque *q); + void dequeue_thread(); + + static void usb_thread_func(); + static void cb_xfr(struct libusb_transfer *xfr); + + FrameAllocator::Frame current_video_frame; + FrameAllocator::Frame current_audio_frame; + + std::mutex queue_lock; + std::condition_variable queues_not_empty; + std::deque pending_video_frames; + std::deque pending_audio_frames; + + FrameAllocator *video_frame_allocator = nullptr; + FrameAllocator *audio_frame_allocator = nullptr; + frame_callback_t frame_callback = nullptr; + + int current_register = 0; + + static constexpr int NUM_BMUSB_REGISTERS = 60; + uint8_t register_file[NUM_BMUSB_REGISTERS]; + + int vid, pid; + std::vector iso_xfrs; +}; + +#endif diff --git a/context.cpp b/context.cpp new file mode 100644 index 0000000..72275e9 --- /dev/null +++ b/context.cpp @@ -0,0 +1,32 @@ +#include +#include + +#include +#include +#include + +QSurface *create_surface(const QSurfaceFormat &format) +{ + QOffscreenSurface *surface = new QOffscreenSurface; + surface->setFormat(format); +// QWindow *surface = new QWindow; + surface->create(); + if (!surface->isValid()) { + printf("ERROR: surface not valid!\n"); +// abort(); + } + return surface; +} + +QOpenGLContext *create_context() +{ + QOpenGLContext *context = new QOpenGLContext; + context->setShareContext(QOpenGLContext::globalShareContext()); + context->create(); + return context; +} + +bool make_current(QOpenGLContext *context, QSurface *surface) +{ + return context->makeCurrent(surface); +} diff --git a/context.h b/context.h new file mode 100644 index 0000000..5a75766 --- /dev/null +++ b/context.h @@ -0,0 +1,11 @@ + +// Needs to be in its own file because Qt and libepoxy seemingly don't coexist well +// within the same file. + +class QSurface; +class QOpenGLContext; +class QSurfaceFormat; + +QSurface *create_surface(const QSurfaceFormat &format); +QOpenGLContext *create_context(); +bool make_current(QOpenGLContext *context, QSurface *surface); diff --git a/glwidget.cpp b/glwidget.cpp new file mode 100644 index 0000000..cc2b4fd --- /dev/null +++ b/glwidget.cpp @@ -0,0 +1,49 @@ +#include "context.h" +#include "glwidget.h" +#include "mixer.h" +#include +#include +#include +#include +#include +#include +#include + +GLWidget::GLWidget(QWidget *parent) + : QOpenGLWidget(parent) +{ +} + +GLWidget::~GLWidget() +{ +} + +QSize GLWidget::minimumSizeHint() const +{ + return QSize(50, 50); +} + +QSize GLWidget::sizeHint() const +{ + return QSize(400, 400); +} + +void GLWidget::initializeGL() +{ + printf("egl=%p glx=%p\n", eglGetCurrentContext(), glXGetCurrentContext()); + //printf("threads: %p %p\n", QThread::currentThread(), qGuiApp->thread()); + + QSurface *surface = create_surface(format()); + QSurface *surface2 = create_surface(format()); + QSurface *surface3 = create_surface(format()); + QSurface *surface4 = create_surface(format()); + std::thread([surface, surface2, surface3, surface4]{ + mixer_thread(surface, surface2, surface3, surface4); + }).detach(); +} + +void GLWidget::paintGL() +{ + glClearColor(1.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); +} diff --git a/glwidget.h b/glwidget.h new file mode 100644 index 0000000..eb26d38 --- /dev/null +++ b/glwidget.h @@ -0,0 +1,26 @@ +#ifndef GLWIDGET_H +#define GLWIDGET_H + +#include +#include +#include +#include +#include + +class GLWidget : public QOpenGLWidget +{ + Q_OBJECT + +public: + GLWidget(QWidget *parent = 0); + ~GLWidget(); + + QSize minimumSizeHint() const Q_DECL_OVERRIDE; + QSize sizeHint() const Q_DECL_OVERRIDE; + +protected: + void initializeGL() Q_DECL_OVERRIDE; + void paintGL() Q_DECL_OVERRIDE; +}; + +#endif diff --git a/h264encode.cpp b/h264encode.cpp new file mode 100644 index 0000000..d4fc0ad --- /dev/null +++ b/h264encode.cpp @@ -0,0 +1,1949 @@ +//#include "sysdeps.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "h264encode.h" + +#define CHECK_VASTATUS(va_status, func) \ + if (va_status != VA_STATUS_SUCCESS) { \ + fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ + exit(1); \ + } + +//#include "loadsurface.h" + +#define NAL_REF_IDC_NONE 0 +#define NAL_REF_IDC_LOW 1 +#define NAL_REF_IDC_MEDIUM 2 +#define NAL_REF_IDC_HIGH 3 + +#define NAL_NON_IDR 1 +#define NAL_IDR 5 +#define NAL_SPS 7 +#define NAL_PPS 8 +#define NAL_SEI 6 + +#define SLICE_TYPE_P 0 +#define SLICE_TYPE_B 1 +#define SLICE_TYPE_I 2 +#define IS_P_SLICE(type) (SLICE_TYPE_P == (type)) +#define IS_B_SLICE(type) (SLICE_TYPE_B == (type)) +#define IS_I_SLICE(type) (SLICE_TYPE_I == (type)) + + +#define ENTROPY_MODE_CAVLC 0 +#define ENTROPY_MODE_CABAC 1 + +#define PROFILE_IDC_BASELINE 66 +#define PROFILE_IDC_MAIN 77 +#define PROFILE_IDC_HIGH 100 + +#define BITSTREAM_ALLOCATE_STEPPING 4096 + +#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ +static VADisplay va_dpy; +static VAProfile h264_profile = (VAProfile)~0; +static VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; +static int config_attrib_num = 0, enc_packed_header_idx; + +struct GLSurface { + VASurfaceID src_surface, ref_surface; + VABufferID coded_buf; + + VAImage surface_image; + GLuint y_tex, cbcr_tex; + EGLImage y_egl_image, cbcr_egl_image; +}; +GLSurface gl_surfaces[SURFACE_NUM]; + +static VAConfigID config_id; +static VAContextID context_id; +static VAEncSequenceParameterBufferH264 seq_param; +static VAEncPictureParameterBufferH264 pic_param; +static VAEncSliceParameterBufferH264 slice_param; +static VAPictureH264 CurrentCurrPic; +static VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32]; + +static unsigned int MaxFrameNum = (2<<16); +static unsigned int MaxPicOrderCntLsb = (2<<8); +static unsigned int Log2MaxFrameNum = 16; +static unsigned int Log2MaxPicOrderCntLsb = 8; + +static unsigned int num_ref_frames = 2; +static unsigned int numShortTerm = 0; +static int constraint_set_flag = 0; +static int h264_packedheader = 0; /* support pack header? */ +static int h264_maxref = (1<<16|1); +static int h264_entropy_mode = 1; /* cabac */ + +static char *coded_fn = NULL; +static FILE *coded_fp = NULL; + +static int frame_width = 176; +static int frame_height = 144; +static int frame_width_mbaligned; +static int frame_height_mbaligned; +static int frame_rate = 60; +static unsigned int frame_bitrate = 0; +static unsigned int frame_slices = 1; +static double frame_size = 0; +static int initial_qp = 15; +//static int initial_qp = 28; +static int minimal_qp = 0; +static int intra_period = 30; +static int intra_idr_period = 60; +static int ip_period = 1; +static int rc_mode = -1; +static int rc_default_modes[] = { + VA_RC_VBR, + VA_RC_CQP, + VA_RC_VBR_CONSTRAINED, + VA_RC_CBR, + VA_RC_VCM, + VA_RC_NONE, +}; +static unsigned long long current_frame_encoding = 0; +static unsigned long long current_frame_display = 0; +static unsigned long long current_IDR_display = 0; +static unsigned int current_frame_num = 0; +static int current_frame_type; + +static int misc_priv_type = 0; +static int misc_priv_value = 0; + +/* thread to save coded data */ +#define SRC_SURFACE_FREE 0 +#define SRC_SURFACE_IN_ENCODING 1 + +struct __bitstream { + unsigned int *buffer; + int bit_offset; + int max_size_in_dword; +}; +typedef struct __bitstream bitstream; + +using namespace std; + +static unsigned int +va_swap32(unsigned int val) +{ + unsigned char *pval = (unsigned char *)&val; + + return ((pval[0] << 24) | + (pval[1] << 16) | + (pval[2] << 8) | + (pval[3] << 0)); +} + +static void +bitstream_start(bitstream *bs) +{ + bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING; + bs->buffer = (unsigned int *)calloc(bs->max_size_in_dword * sizeof(int), 1); + bs->bit_offset = 0; +} + +static void +bitstream_end(bitstream *bs) +{ + int pos = (bs->bit_offset >> 5); + int bit_offset = (bs->bit_offset & 0x1f); + int bit_left = 32 - bit_offset; + + if (bit_offset) { + bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left)); + } +} + +static void +bitstream_put_ui(bitstream *bs, unsigned int val, int size_in_bits) +{ + int pos = (bs->bit_offset >> 5); + int bit_offset = (bs->bit_offset & 0x1f); + int bit_left = 32 - bit_offset; + + if (!size_in_bits) + return; + + bs->bit_offset += size_in_bits; + + if (bit_left > size_in_bits) { + bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val); + } else { + size_in_bits -= bit_left; + bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits); + bs->buffer[pos] = va_swap32(bs->buffer[pos]); + + if (pos + 1 == bs->max_size_in_dword) { + bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING; + bs->buffer = (unsigned int *)realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int)); + } + + bs->buffer[pos + 1] = val; + } +} + +static void +bitstream_put_ue(bitstream *bs, unsigned int val) +{ + int size_in_bits = 0; + int tmp_val = ++val; + + while (tmp_val) { + tmp_val >>= 1; + size_in_bits++; + } + + bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero + bitstream_put_ui(bs, val, size_in_bits); +} + +static void +bitstream_put_se(bitstream *bs, int val) +{ + unsigned int new_val; + + if (val <= 0) + new_val = -2 * val; + else + new_val = 2 * val - 1; + + bitstream_put_ue(bs, new_val); +} + +static void +bitstream_byte_aligning(bitstream *bs, int bit) +{ + int bit_offset = (bs->bit_offset & 0x7); + int bit_left = 8 - bit_offset; + int new_val; + + if (!bit_offset) + return; + + assert(bit == 0 || bit == 1); + + if (bit) + new_val = (1 << bit_left) - 1; + else + new_val = 0; + + bitstream_put_ui(bs, new_val, bit_left); +} + +static void +rbsp_trailing_bits(bitstream *bs) +{ + bitstream_put_ui(bs, 1, 1); + bitstream_byte_aligning(bs, 0); +} + +static void nal_start_code_prefix(bitstream *bs) +{ + bitstream_put_ui(bs, 0x00000001, 32); +} + +static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type) +{ + bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */ + bitstream_put_ui(bs, nal_ref_idc, 2); + bitstream_put_ui(bs, nal_unit_type, 5); +} + +static void sps_rbsp(bitstream *bs) +{ + int profile_idc = PROFILE_IDC_BASELINE; + + if (h264_profile == VAProfileH264High) + profile_idc = PROFILE_IDC_HIGH; + else if (h264_profile == VAProfileH264Main) + profile_idc = PROFILE_IDC_MAIN; + + bitstream_put_ui(bs, profile_idc, 8); /* profile_idc */ + bitstream_put_ui(bs, !!(constraint_set_flag & 1), 1); /* constraint_set0_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 2), 1); /* constraint_set1_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 4), 1); /* constraint_set2_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 8), 1); /* constraint_set3_flag */ + bitstream_put_ui(bs, 0, 4); /* reserved_zero_4bits */ + bitstream_put_ui(bs, seq_param.level_idc, 8); /* level_idc */ + bitstream_put_ue(bs, seq_param.seq_parameter_set_id); /* seq_parameter_set_id */ + + if ( profile_idc == PROFILE_IDC_HIGH) { + bitstream_put_ue(bs, 1); /* chroma_format_idc = 1, 4:2:0 */ + bitstream_put_ue(bs, 0); /* bit_depth_luma_minus8 */ + bitstream_put_ue(bs, 0); /* bit_depth_chroma_minus8 */ + bitstream_put_ui(bs, 0, 1); /* qpprime_y_zero_transform_bypass_flag */ + bitstream_put_ui(bs, 0, 1); /* seq_scaling_matrix_present_flag */ + } + + bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_frame_num_minus4); /* log2_max_frame_num_minus4 */ + bitstream_put_ue(bs, seq_param.seq_fields.bits.pic_order_cnt_type); /* pic_order_cnt_type */ + + if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) + bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4); /* log2_max_pic_order_cnt_lsb_minus4 */ + else { + assert(0); + } + + bitstream_put_ue(bs, seq_param.max_num_ref_frames); /* num_ref_frames */ + bitstream_put_ui(bs, 0, 1); /* gaps_in_frame_num_value_allowed_flag */ + + bitstream_put_ue(bs, seq_param.picture_width_in_mbs - 1); /* pic_width_in_mbs_minus1 */ + bitstream_put_ue(bs, seq_param.picture_height_in_mbs - 1); /* pic_height_in_map_units_minus1 */ + bitstream_put_ui(bs, seq_param.seq_fields.bits.frame_mbs_only_flag, 1); /* frame_mbs_only_flag */ + + if (!seq_param.seq_fields.bits.frame_mbs_only_flag) { + assert(0); + } + + bitstream_put_ui(bs, seq_param.seq_fields.bits.direct_8x8_inference_flag, 1); /* direct_8x8_inference_flag */ + bitstream_put_ui(bs, seq_param.frame_cropping_flag, 1); /* frame_cropping_flag */ + + if (seq_param.frame_cropping_flag) { + bitstream_put_ue(bs, seq_param.frame_crop_left_offset); /* frame_crop_left_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_right_offset); /* frame_crop_right_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_top_offset); /* frame_crop_top_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_bottom_offset); /* frame_crop_bottom_offset */ + } + + //if ( frame_bit_rate < 0 ) { //TODO EW: the vui header isn't correct + if ( false ) { + bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */ + } else { + bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */ + bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */ + bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */ + bitstream_put_ui(bs, 0, 1); /* video_signal_type_present_flag */ + bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */ + bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */ + { + bitstream_put_ui(bs, 1, 32); // FPS + bitstream_put_ui(bs, frame_rate * 2, 32); // FPS + bitstream_put_ui(bs, 1, 1); + } + bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */ + { + // hrd_parameters + bitstream_put_ue(bs, 0); /* cpb_cnt_minus1 */ + bitstream_put_ui(bs, 4, 4); /* bit_rate_scale */ + bitstream_put_ui(bs, 6, 4); /* cpb_size_scale */ + + bitstream_put_ue(bs, frame_bitrate - 1); /* bit_rate_value_minus1[0] */ + bitstream_put_ue(bs, frame_bitrate*8 - 1); /* cpb_size_value_minus1[0] */ + bitstream_put_ui(bs, 1, 1); /* cbr_flag[0] */ + + bitstream_put_ui(bs, 23, 5); /* initial_cpb_removal_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* cpb_removal_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* dpb_output_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* time_offset_length */ + } + bitstream_put_ui(bs, 0, 1); /* vcl_hrd_parameters_present_flag */ + bitstream_put_ui(bs, 0, 1); /* low_delay_hrd_flag */ + + bitstream_put_ui(bs, 0, 1); /* pic_struct_present_flag */ + bitstream_put_ui(bs, 0, 1); /* bitstream_restriction_flag */ + } + + rbsp_trailing_bits(bs); /* rbsp_trailing_bits */ +} + + +static void pps_rbsp(bitstream *bs) +{ + bitstream_put_ue(bs, pic_param.pic_parameter_set_id); /* pic_parameter_set_id */ + bitstream_put_ue(bs, pic_param.seq_parameter_set_id); /* seq_parameter_set_id */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.entropy_coding_mode_flag, 1); /* entropy_coding_mode_flag */ + + bitstream_put_ui(bs, 0, 1); /* pic_order_present_flag: 0 */ + + bitstream_put_ue(bs, 0); /* num_slice_groups_minus1 */ + + bitstream_put_ue(bs, pic_param.num_ref_idx_l0_active_minus1); /* num_ref_idx_l0_active_minus1 */ + bitstream_put_ue(bs, pic_param.num_ref_idx_l1_active_minus1); /* num_ref_idx_l1_active_minus1 1 */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_pred_flag, 1); /* weighted_pred_flag: 0 */ + bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_bipred_idc, 2); /* weighted_bipred_idc: 0 */ + + bitstream_put_se(bs, pic_param.pic_init_qp - 26); /* pic_init_qp_minus26 */ + bitstream_put_se(bs, 0); /* pic_init_qs_minus26 */ + bitstream_put_se(bs, 0); /* chroma_qp_index_offset */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.deblocking_filter_control_present_flag, 1); /* deblocking_filter_control_present_flag */ + bitstream_put_ui(bs, 0, 1); /* constrained_intra_pred_flag */ + bitstream_put_ui(bs, 0, 1); /* redundant_pic_cnt_present_flag */ + + /* more_rbsp_data */ + bitstream_put_ui(bs, pic_param.pic_fields.bits.transform_8x8_mode_flag, 1); /*transform_8x8_mode_flag */ + bitstream_put_ui(bs, 0, 1); /* pic_scaling_matrix_present_flag */ + bitstream_put_se(bs, pic_param.second_chroma_qp_index_offset ); /*second_chroma_qp_index_offset */ + + rbsp_trailing_bits(bs); +} + +static void slice_header(bitstream *bs) +{ + int first_mb_in_slice = slice_param.macroblock_address; + + bitstream_put_ue(bs, first_mb_in_slice); /* first_mb_in_slice: 0 */ + bitstream_put_ue(bs, slice_param.slice_type); /* slice_type */ + bitstream_put_ue(bs, slice_param.pic_parameter_set_id); /* pic_parameter_set_id: 0 */ + bitstream_put_ui(bs, pic_param.frame_num, seq_param.seq_fields.bits.log2_max_frame_num_minus4 + 4); /* frame_num */ + + /* frame_mbs_only_flag == 1 */ + if (!seq_param.seq_fields.bits.frame_mbs_only_flag) { + /* FIXME: */ + assert(0); + } + + if (pic_param.pic_fields.bits.idr_pic_flag) + bitstream_put_ue(bs, slice_param.idr_pic_id); /* idr_pic_id: 0 */ + + if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) { + bitstream_put_ui(bs, pic_param.CurrPic.TopFieldOrderCnt, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 + 4); + /* pic_order_present_flag == 0 */ + } else { + /* FIXME: */ + assert(0); + } + + /* redundant_pic_cnt_present_flag == 0 */ + /* slice type */ + if (IS_P_SLICE(slice_param.slice_type)) { + bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param.num_ref_idx_active_override_flag) + bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1); + + /* ref_pic_list_reordering */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ + } else if (IS_B_SLICE(slice_param.slice_type)) { + bitstream_put_ui(bs, slice_param.direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */ + + bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param.num_ref_idx_active_override_flag) { + bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1); + bitstream_put_ue(bs, slice_param.num_ref_idx_l1_active_minus1); + } + + /* ref_pic_list_reordering */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l1: 0 */ + } + + if ((pic_param.pic_fields.bits.weighted_pred_flag && + IS_P_SLICE(slice_param.slice_type)) || + ((pic_param.pic_fields.bits.weighted_bipred_idc == 1) && + IS_B_SLICE(slice_param.slice_type))) { + /* FIXME: fill weight/offset table */ + assert(0); + } + + /* dec_ref_pic_marking */ + if (pic_param.pic_fields.bits.reference_pic_flag) { /* nal_ref_idc != 0 */ + unsigned char no_output_of_prior_pics_flag = 0; + unsigned char long_term_reference_flag = 0; + unsigned char adaptive_ref_pic_marking_mode_flag = 0; + + if (pic_param.pic_fields.bits.idr_pic_flag) { + bitstream_put_ui(bs, no_output_of_prior_pics_flag, 1); /* no_output_of_prior_pics_flag: 0 */ + bitstream_put_ui(bs, long_term_reference_flag, 1); /* long_term_reference_flag: 0 */ + } else { + bitstream_put_ui(bs, adaptive_ref_pic_marking_mode_flag, 1); /* adaptive_ref_pic_marking_mode_flag: 0 */ + } + } + + if (pic_param.pic_fields.bits.entropy_coding_mode_flag && + !IS_I_SLICE(slice_param.slice_type)) + bitstream_put_ue(bs, slice_param.cabac_init_idc); /* cabac_init_idc: 0 */ + + bitstream_put_se(bs, slice_param.slice_qp_delta); /* slice_qp_delta: 0 */ + + /* ignore for SP/SI */ + + if (pic_param.pic_fields.bits.deblocking_filter_control_present_flag) { + bitstream_put_ue(bs, slice_param.disable_deblocking_filter_idc); /* disable_deblocking_filter_idc: 0 */ + + if (slice_param.disable_deblocking_filter_idc != 1) { + bitstream_put_se(bs, slice_param.slice_alpha_c0_offset_div2); /* slice_alpha_c0_offset_div2: 2 */ + bitstream_put_se(bs, slice_param.slice_beta_offset_div2); /* slice_beta_offset_div2: 2 */ + } + } + + if (pic_param.pic_fields.bits.entropy_coding_mode_flag) { + bitstream_byte_aligning(bs, 1); + } +} + +static int +build_packed_pic_buffer(unsigned char **header_buffer) +{ + bitstream bs; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + nal_header(&bs, NAL_REF_IDC_HIGH, NAL_PPS); + pps_rbsp(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + +static int +build_packed_seq_buffer(unsigned char **header_buffer) +{ + bitstream bs; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS); + sps_rbsp(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + +static int build_packed_slice_buffer(unsigned char **header_buffer) +{ + bitstream bs; + int is_idr = !!pic_param.pic_fields.bits.idr_pic_flag; + int is_ref = !!pic_param.pic_fields.bits.reference_pic_flag; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + + if (IS_I_SLICE(slice_param.slice_type)) { + nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR); + } else if (IS_P_SLICE(slice_param.slice_type)) { + nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR); + } else { + assert(IS_B_SLICE(slice_param.slice_type)); + nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR); + } + + slice_header(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + + +/* + Assume frame sequence is: Frame#0, #1, #2, ..., #M, ..., #X, ... (encoding order) + 1) period between Frame #X and Frame #N = #X - #N + 2) 0 means infinite for intra_period/intra_idr_period, and 0 is invalid for ip_period + 3) intra_idr_period % intra_period (intra_period > 0) and intra_period % ip_period must be 0 + 4) intra_period and intra_idr_period take precedence over ip_period + 5) if ip_period > 1, intra_period and intra_idr_period are not the strict periods + of I/IDR frames, see bellow examples + ------------------------------------------------------------------- + intra_period intra_idr_period ip_period frame sequence (intra_period/intra_idr_period/ip_period) + 0 ignored 1 IDRPPPPPPP ... (No IDR/I any more) + 0 ignored >=2 IDR(PBB)(PBB)... (No IDR/I any more) + 1 0 ignored IDRIIIIIII... (No IDR any more) + 1 1 ignored IDR IDR IDR IDR... + 1 >=2 ignored IDRII IDRII IDR... (1/3/ignore) + >=2 0 1 IDRPPP IPPP I... (3/0/1) + >=2 0 >=2 IDR(PBB)(PBB)(IBB) (6/0/3) + (PBB)(IBB)(PBB)(IBB)... + >=2 >=2 1 IDRPPPPP IPPPPP IPPPPP (6/18/1) + IDRPPPPP IPPPPP IPPPPP... + >=2 >=2 >=2 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)} (6/18/3) + {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)}... + {IDR(PBB)(PBB)(IBB)(PBB)} (6/12/3) + {IDR(PBB)(PBB)(IBB)(PBB)}... + {IDR(PBB)(PBB)} (6/6/3) + {IDR(PBB)(PBB)}. +*/ + +/* + * Return displaying order with specified periods and encoding order + * displaying_order: displaying order + * frame_type: frame type + */ +#define FRAME_P 0 +#define FRAME_B 1 +#define FRAME_I 2 +#define FRAME_IDR 7 +void encoding2display_order( + unsigned long long encoding_order, int intra_period, + int intra_idr_period, int ip_period, + unsigned long long *displaying_order, + int *frame_type) +{ + int encoding_order_gop = 0; + + if (intra_period == 1) { /* all are I/IDR frames */ + *displaying_order = encoding_order; + if (intra_idr_period == 0) + *frame_type = (encoding_order == 0)?FRAME_IDR:FRAME_I; + else + *frame_type = (encoding_order % intra_idr_period == 0)?FRAME_IDR:FRAME_I; + return; + } + + if (intra_period == 0) + intra_idr_period = 0; + + /* new sequence like + * IDR PPPPP IPPPPP + * IDR (PBB)(PBB)(IBB)(PBB) + */ + encoding_order_gop = (intra_idr_period == 0)? encoding_order: + (encoding_order % (intra_idr_period + ((ip_period == 1)?0:1))); + + if (encoding_order_gop == 0) { /* the first frame */ + *frame_type = FRAME_IDR; + *displaying_order = encoding_order; + } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */ + *frame_type = FRAME_B; + *displaying_order = encoding_order - 1; + } else if ((intra_period != 0) && /* have I frames */ + (encoding_order_gop >= 2) && + ((ip_period == 1 && encoding_order_gop % intra_period == 0) || /* for IDR PPPPP IPPPP */ + /* for IDR (PBB)(PBB)(IBB) */ + (ip_period >= 2 && ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0))) { + *frame_type = FRAME_I; + *displaying_order = encoding_order + ip_period - 1; + } else { + *frame_type = FRAME_P; + *displaying_order = encoding_order + ip_period - 1; + } +} + + +static const char *rc_to_string(int rcmode) +{ + switch (rc_mode) { + case VA_RC_NONE: + return "NONE"; + case VA_RC_CBR: + return "CBR"; + case VA_RC_VBR: + return "VBR"; + case VA_RC_VCM: + return "VCM"; + case VA_RC_CQP: + return "CQP"; + case VA_RC_VBR_CONSTRAINED: + return "VBR_CONSTRAINED"; + default: + return "Unknown"; + } +} + +#if 0 +static int process_cmdline(int argc, char *argv[]) +{ + char c; + const struct option long_opts[] = { + {"help", no_argument, NULL, 0 }, + {"bitrate", required_argument, NULL, 1 }, + {"minqp", required_argument, NULL, 2 }, + {"initialqp", required_argument, NULL, 3 }, + {"intra_period", required_argument, NULL, 4 }, + {"idr_period", required_argument, NULL, 5 }, + {"ip_period", required_argument, NULL, 6 }, + {"rcmode", required_argument, NULL, 7 }, + {"srcyuv", required_argument, NULL, 9 }, + {"recyuv", required_argument, NULL, 10 }, + {"fourcc", required_argument, NULL, 11 }, + {"syncmode", no_argument, NULL, 12 }, + {"enablePSNR", no_argument, NULL, 13 }, + {"prit", required_argument, NULL, 14 }, + {"priv", required_argument, NULL, 15 }, + {"framecount", required_argument, NULL, 16 }, + {"entropy", required_argument, NULL, 17 }, + {"profile", required_argument, NULL, 18 }, + {NULL, no_argument, NULL, 0 }}; + int long_index; + + while ((c =getopt_long_only(argc, argv, "w:h:n:f:o:?", long_opts, &long_index)) != EOF) { + switch (c) { + case 'w': + frame_width = atoi(optarg); + break; + case 'h': + frame_height = atoi(optarg); + break; + case 'n': + case 'f': + frame_rate = atoi(optarg); + break; + case 'o': + coded_fn = strdup(optarg); + break; + case 0: + print_help(); + exit(0); + case 1: + frame_bitrate = atoi(optarg); + break; + case 2: + minimal_qp = atoi(optarg); + break; + case 3: + initial_qp = atoi(optarg); + break; + case 4: + intra_period = atoi(optarg); + break; + case 5: + intra_idr_period = atoi(optarg); + break; + case 6: + ip_period = atoi(optarg); + break; + case 7: + rc_mode = string_to_rc(optarg); + if (rc_mode < 0) { + print_help(); + exit(1); + } + break; + case 9: + srcyuv_fn = strdup(optarg); + break; + case 11: + srcyuv_fourcc = string_to_fourcc(optarg); + if (srcyuv_fourcc <= 0) { + print_help(); + exit(1); + } + break; + case 13: + calc_psnr = 1; + break; + case 14: + misc_priv_type = strtol(optarg, NULL, 0); + break; + case 15: + misc_priv_value = strtol(optarg, NULL, 0); + break; + case 17: + h264_entropy_mode = atoi(optarg) ? 1: 0; + break; + case 18: + if (strncmp(optarg, "BP", 2) == 0) + h264_profile = VAProfileH264Baseline; + else if (strncmp(optarg, "MP", 2) == 0) + h264_profile = VAProfileH264Main; + else if (strncmp(optarg, "HP", 2) == 0) + h264_profile = VAProfileH264High; + else + h264_profile = (VAProfile)0; + break; + case ':': + case '?': + print_help(); + exit(0); + } + } + + if (ip_period < 1) { + printf(" ip_period must be greater than 0\n"); + exit(0); + } + if (intra_period != 1 && intra_period % ip_period != 0) { + printf(" intra_period must be a multiplier of ip_period\n"); + exit(0); + } + if (intra_period != 0 && intra_idr_period % intra_period != 0) { + printf(" intra_idr_period must be a multiplier of intra_period\n"); + exit(0); + } + + if (frame_bitrate == 0) + frame_bitrate = frame_width * frame_height * 12 * frame_rate / 50; + + if (coded_fn == NULL) { + struct stat buf; + if (stat("/tmp", &buf) == 0) + coded_fn = strdup("/tmp/test.264"); + else if (stat("/sdcard", &buf) == 0) + coded_fn = strdup("/sdcard/test.264"); + else + coded_fn = strdup("./test.264"); + } + + /* store coded data into a file */ + coded_fp = fopen(coded_fn, "w+"); + if (coded_fp == NULL) { + printf("Open file %s failed, exit\n", coded_fn); + exit(1); + } + + frame_width_mbaligned = (frame_width + 15) & (~15); + frame_height_mbaligned = (frame_height + 15) & (~15); + if (frame_width != frame_width_mbaligned || + frame_height != frame_height_mbaligned) { + printf("Source frame is %dx%d and will code clip to %dx%d with crop\n", + frame_width, frame_height, + frame_width_mbaligned, frame_height_mbaligned + ); + } + + return 0; +} +#endif + +static Display *x11_display; +static Window x11_window; + +VADisplay +va_open_display(void) +{ + x11_display = XOpenDisplay(NULL); + if (!x11_display) { + fprintf(stderr, "error: can't connect to X server!\n"); + return NULL; + } + return vaGetDisplay(x11_display); +} + +void +va_close_display(VADisplay va_dpy) +{ + if (!x11_display) + return; + + if (x11_window) { + XUnmapWindow(x11_display, x11_window); + XDestroyWindow(x11_display, x11_window); + x11_window = None; + } + XCloseDisplay(x11_display); + x11_display = NULL; +} + +static int init_va(void) +{ + VAProfile profile_list[]={VAProfileH264High, VAProfileH264Main, VAProfileH264Baseline, VAProfileH264ConstrainedBaseline}; + VAEntrypoint *entrypoints; + int num_entrypoints, slice_entrypoint; + int support_encode = 0; + int major_ver, minor_ver; + VAStatus va_status; + unsigned int i; + + va_dpy = va_open_display(); + va_status = vaInitialize(va_dpy, &major_ver, &minor_ver); + CHECK_VASTATUS(va_status, "vaInitialize"); + + num_entrypoints = vaMaxNumEntrypoints(va_dpy); + entrypoints = (VAEntrypoint *)malloc(num_entrypoints * sizeof(*entrypoints)); + if (!entrypoints) { + fprintf(stderr, "error: failed to initialize VA entrypoints array\n"); + exit(1); + } + + /* use the highest profile */ + for (i = 0; i < sizeof(profile_list)/sizeof(profile_list[0]); i++) { + if ((h264_profile != ~0) && h264_profile != profile_list[i]) + continue; + + h264_profile = profile_list[i]; + vaQueryConfigEntrypoints(va_dpy, h264_profile, entrypoints, &num_entrypoints); + for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) { + if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice) { + support_encode = 1; + break; + } + } + if (support_encode == 1) + break; + } + + if (support_encode == 0) { + printf("Can't find VAEntrypointEncSlice for H264 profiles\n"); + exit(1); + } else { + switch (h264_profile) { + case VAProfileH264Baseline: + printf("Use profile VAProfileH264Baseline\n"); + ip_period = 1; + constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ + h264_entropy_mode = 0; + break; + case VAProfileH264ConstrainedBaseline: + printf("Use profile VAProfileH264ConstrainedBaseline\n"); + constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */ + ip_period = 1; + break; + + case VAProfileH264Main: + printf("Use profile VAProfileH264Main\n"); + constraint_set_flag |= (1 << 1); /* Annex A.2.2 */ + break; + + case VAProfileH264High: + constraint_set_flag |= (1 << 3); /* Annex A.2.4 */ + printf("Use profile VAProfileH264High\n"); + break; + default: + printf("unknow profile. Set to Baseline"); + h264_profile = VAProfileH264Baseline; + ip_period = 1; + constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ + break; + } + } + + VAConfigAttrib attrib[VAConfigAttribTypeMax]; + + /* find out the format for the render target, and rate control mode */ + for (i = 0; i < VAConfigAttribTypeMax; i++) + attrib[i].type = (VAConfigAttribType)i; + + va_status = vaGetConfigAttributes(va_dpy, h264_profile, VAEntrypointEncSlice, + &attrib[0], VAConfigAttribTypeMax); + CHECK_VASTATUS(va_status, "vaGetConfigAttributes"); + /* check the interested configattrib */ + if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) { + printf("Not find desired YUV420 RT format\n"); + exit(1); + } else { + config_attrib[config_attrib_num].type = VAConfigAttribRTFormat; + config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420; + config_attrib_num++; + } + + if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribRateControl].value; + + printf("Support rate control mode (0x%x):", tmp); + + if (tmp & VA_RC_NONE) + printf("NONE "); + if (tmp & VA_RC_CBR) + printf("CBR "); + if (tmp & VA_RC_VBR) + printf("VBR "); + if (tmp & VA_RC_VCM) + printf("VCM "); + if (tmp & VA_RC_CQP) + printf("CQP "); + if (tmp & VA_RC_VBR_CONSTRAINED) + printf("VBR_CONSTRAINED "); + + printf("\n"); + + if (rc_mode == -1 || !(rc_mode & tmp)) { + if (rc_mode != -1) { + printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode)); + } + + for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) { + if (rc_default_modes[i] & tmp) { + rc_mode = rc_default_modes[i]; + break; + } + } + + printf("RateControl mode: %s\n", rc_to_string(rc_mode)); + } + + config_attrib[config_attrib_num].type = VAConfigAttribRateControl; + config_attrib[config_attrib_num].value = rc_mode; + config_attrib_num++; + } + + + if (attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncPackedHeaders].value; + + printf("Support VAConfigAttribEncPackedHeaders\n"); + + h264_packedheader = 1; + config_attrib[config_attrib_num].type = VAConfigAttribEncPackedHeaders; + config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; + + if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) { + printf("Support packed sequence headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE; + } + + if (tmp & VA_ENC_PACKED_HEADER_PICTURE) { + printf("Support packed picture headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE; + } + + if (tmp & VA_ENC_PACKED_HEADER_SLICE) { + printf("Support packed slice headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE; + } + + if (tmp & VA_ENC_PACKED_HEADER_MISC) { + printf("Support packed misc headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC; + } + + enc_packed_header_idx = config_attrib_num; + config_attrib_num++; + } + + if (attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncInterlaced].value; + + printf("Support VAConfigAttribEncInterlaced\n"); + + if (tmp & VA_ENC_INTERLACED_FRAME) + printf("support VA_ENC_INTERLACED_FRAME\n"); + if (tmp & VA_ENC_INTERLACED_FIELD) + printf("Support VA_ENC_INTERLACED_FIELD\n"); + if (tmp & VA_ENC_INTERLACED_MBAFF) + printf("Support VA_ENC_INTERLACED_MBAFF\n"); + if (tmp & VA_ENC_INTERLACED_PAFF) + printf("Support VA_ENC_INTERLACED_PAFF\n"); + + config_attrib[config_attrib_num].type = VAConfigAttribEncInterlaced; + config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; + config_attrib_num++; + } + + if (attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) { + h264_maxref = attrib[VAConfigAttribEncMaxRefFrames].value; + + printf("Support %d RefPicList0 and %d RefPicList1\n", + h264_maxref & 0xffff, (h264_maxref >> 16) & 0xffff ); + } + + if (attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED) + printf("Support %d slices\n", attrib[VAConfigAttribEncMaxSlices].value); + + if (attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncSliceStructure].value; + + printf("Support VAConfigAttribEncSliceStructure\n"); + + if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS) + printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n"); + if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) + printf("Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n"); + if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) + printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n"); + } + if (attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) { + printf("Support VAConfigAttribEncMacroblockInfo\n"); + } + + free(entrypoints); + return 0; +} + +static int setup_encode() +{ + VAStatus va_status; + VASurfaceID *tmp_surfaceid; + int codedbuf_size, i; + static VASurfaceID src_surface[SURFACE_NUM]; + static VASurfaceID ref_surface[SURFACE_NUM]; + + va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, + &config_attrib[0], config_attrib_num, &config_id); + CHECK_VASTATUS(va_status, "vaCreateConfig"); + + /* create source surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &src_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + /* create reference surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &ref_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); + memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); + memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); + + /* Create a context for this encode pipe */ + va_status = vaCreateContext(va_dpy, config_id, + frame_width_mbaligned, frame_height_mbaligned, + VA_PROGRESSIVE, + tmp_surfaceid, 2 * SURFACE_NUM, + &context_id); + CHECK_VASTATUS(va_status, "vaCreateContext"); + free(tmp_surfaceid); + + codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); + + for (i = 0; i < SURFACE_NUM; i++) { + /* create coded buffer once for all + * other VA buffers which won't be used again after vaRenderPicture. + * so APP can always vaCreateBuffer for every frame + * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture + * so VA won't maintain the coded buffer + */ + va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, + codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + } + + /* create OpenGL objects */ + //glGenFramebuffers(SURFACE_NUM, fbos); + + for (i = 0; i < SURFACE_NUM; i++) { + glGenTextures(1, &gl_surfaces[i].y_tex); + glGenTextures(1, &gl_surfaces[i].cbcr_tex); + } + + for (i = 0; i < SURFACE_NUM; i++) { + gl_surfaces[i].src_surface = src_surface[i]; + gl_surfaces[i].ref_surface = ref_surface[i]; + } + + return 0; +} + + + +#define partition(ref, field, key, ascending) \ + while (i <= j) { \ + if (ascending) { \ + while (ref[i].field < key) \ + i++; \ + while (ref[j].field > key) \ + j--; \ + } else { \ + while (ref[i].field > key) \ + i++; \ + while (ref[j].field < key) \ + j--; \ + } \ + if (i <= j) { \ + tmp = ref[i]; \ + ref[i] = ref[j]; \ + ref[j] = tmp; \ + i++; \ + j--; \ + } \ + } \ + +static void sort_one(VAPictureH264 ref[], int left, int right, + int ascending, int frame_idx) +{ + int i = left, j = right; + unsigned int key; + VAPictureH264 tmp; + + if (frame_idx) { + key = ref[(left + right) / 2].frame_idx; + partition(ref, frame_idx, key, ascending); + } else { + key = ref[(left + right) / 2].TopFieldOrderCnt; + partition(ref, TopFieldOrderCnt, (signed int)key, ascending); + } + + /* recursion */ + if (left < j) + sort_one(ref, left, j, ascending, frame_idx); + + if (i < right) + sort_one(ref, i, right, ascending, frame_idx); +} + +static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx, + int partition_ascending, int list0_ascending, int list1_ascending) +{ + int i = left, j = right; + VAPictureH264 tmp; + + if (frame_idx) { + partition(ref, frame_idx, key, partition_ascending); + } else { + partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending); + } + + + sort_one(ref, left, i-1, list0_ascending, frame_idx); + sort_one(ref, j+1, right, list1_ascending, frame_idx); +} + +static int update_ReferenceFrames(void) +{ + int i; + + if (current_frame_type == FRAME_B) + return 0; + + CurrentCurrPic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE; + numShortTerm++; + if (numShortTerm > num_ref_frames) + numShortTerm = num_ref_frames; + for (i=numShortTerm-1; i>0; i--) + ReferenceFrames[i] = ReferenceFrames[i-1]; + ReferenceFrames[0] = CurrentCurrPic; + + if (current_frame_type != FRAME_B) + current_frame_num++; + if (current_frame_num > MaxFrameNum) + current_frame_num = 0; + + return 0; +} + + +static int update_RefPicList(void) +{ + unsigned int current_poc = CurrentCurrPic.TopFieldOrderCnt; + + if (current_frame_type == FRAME_P) { + memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_one(RefPicList0_P, 0, numShortTerm-1, 0, 1); + } + + if (current_frame_type == FRAME_B) { + memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_two(RefPicList0_B, 0, numShortTerm-1, current_poc, 0, + 1, 0, 1); + + memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_two(RefPicList1_B, 0, numShortTerm-1, current_poc, 0, + 0, 1, 0); + } + + return 0; +} + + +static int render_sequence(void) +{ + VABufferID seq_param_buf, rc_param_buf, misc_param_tmpbuf, render_id[2]; + VAStatus va_status; + VAEncMiscParameterBuffer *misc_param, *misc_param_tmp; + VAEncMiscParameterRateControl *misc_rate_ctrl; + + seq_param.level_idc = 41 /*SH_LEVEL_3*/; + seq_param.picture_width_in_mbs = frame_width_mbaligned / 16; + seq_param.picture_height_in_mbs = frame_height_mbaligned / 16; + seq_param.bits_per_second = frame_bitrate; + + seq_param.intra_period = intra_period; + seq_param.intra_idr_period = intra_idr_period; + seq_param.ip_period = ip_period; + + seq_param.max_num_ref_frames = num_ref_frames; + seq_param.seq_fields.bits.frame_mbs_only_flag = 1; + seq_param.time_scale = frame_rate * 2; + seq_param.num_units_in_tick = 1; /* Tc = num_units_in_tick / scale */ + seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = Log2MaxPicOrderCntLsb - 4; + seq_param.seq_fields.bits.log2_max_frame_num_minus4 = Log2MaxFrameNum - 4;; + seq_param.seq_fields.bits.frame_mbs_only_flag = 1; + seq_param.seq_fields.bits.chroma_format_idc = 1; + seq_param.seq_fields.bits.direct_8x8_inference_flag = 1; + + if (frame_width != frame_width_mbaligned || + frame_height != frame_height_mbaligned) { + seq_param.frame_cropping_flag = 1; + seq_param.frame_crop_left_offset = 0; + seq_param.frame_crop_right_offset = (frame_width_mbaligned - frame_width)/2; + seq_param.frame_crop_top_offset = 0; + seq_param.frame_crop_bottom_offset = (frame_height_mbaligned - frame_height)/2; + } + + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncSequenceParameterBufferType, + sizeof(seq_param), 1, &seq_param, &seq_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncMiscParameterBufferType, + sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl), + 1, NULL, &rc_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + vaMapBuffer(va_dpy, rc_param_buf, (void **)&misc_param); + misc_param->type = VAEncMiscParameterTypeRateControl; + misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data; + memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl)); + misc_rate_ctrl->bits_per_second = frame_bitrate; + misc_rate_ctrl->target_percentage = 66; + misc_rate_ctrl->window_size = 1000; + misc_rate_ctrl->initial_qp = initial_qp; + misc_rate_ctrl->min_qp = minimal_qp; + misc_rate_ctrl->basic_unit_size = 0; + vaUnmapBuffer(va_dpy, rc_param_buf); + + render_id[0] = seq_param_buf; + render_id[1] = rc_param_buf; + + va_status = vaRenderPicture(va_dpy, context_id, &render_id[0], 2); + CHECK_VASTATUS(va_status, "vaRenderPicture");; + + if (misc_priv_type != 0) { + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncMiscParameterBufferType, + sizeof(VAEncMiscParameterBuffer), + 1, NULL, &misc_param_tmpbuf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + vaMapBuffer(va_dpy, misc_param_tmpbuf, (void **)&misc_param_tmp); + misc_param_tmp->type = (VAEncMiscParameterType)misc_priv_type; + misc_param_tmp->data[0] = misc_priv_value; + vaUnmapBuffer(va_dpy, misc_param_tmpbuf); + + va_status = vaRenderPicture(va_dpy, context_id, &misc_param_tmpbuf, 1); + } + + return 0; +} + +static int calc_poc(int pic_order_cnt_lsb) +{ + static int PicOrderCntMsb_ref = 0, pic_order_cnt_lsb_ref = 0; + int prevPicOrderCntMsb, prevPicOrderCntLsb; + int PicOrderCntMsb, TopFieldOrderCnt; + + if (current_frame_type == FRAME_IDR) + prevPicOrderCntMsb = prevPicOrderCntLsb = 0; + else { + prevPicOrderCntMsb = PicOrderCntMsb_ref; + prevPicOrderCntLsb = pic_order_cnt_lsb_ref; + } + + if ((pic_order_cnt_lsb < prevPicOrderCntLsb) && + ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (int)(MaxPicOrderCntLsb / 2))) + PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; + else if ((pic_order_cnt_lsb > prevPicOrderCntLsb) && + ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (int)(MaxPicOrderCntLsb / 2))) + PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; + else + PicOrderCntMsb = prevPicOrderCntMsb; + + TopFieldOrderCnt = PicOrderCntMsb + pic_order_cnt_lsb; + + if (current_frame_type != FRAME_B) { + PicOrderCntMsb_ref = PicOrderCntMsb; + pic_order_cnt_lsb_ref = pic_order_cnt_lsb; + } + + return TopFieldOrderCnt; +} + +static int render_picture(void) +{ + VABufferID pic_param_buf; + VAStatus va_status; + int i = 0; + + pic_param.CurrPic.picture_id = gl_surfaces[current_frame_display % SURFACE_NUM].ref_surface; + pic_param.CurrPic.frame_idx = current_frame_num; + pic_param.CurrPic.flags = 0; + pic_param.CurrPic.TopFieldOrderCnt = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb); + pic_param.CurrPic.BottomFieldOrderCnt = pic_param.CurrPic.TopFieldOrderCnt; + CurrentCurrPic = pic_param.CurrPic; + + if (getenv("TO_DEL")) { /* set RefPicList into ReferenceFrames */ + update_RefPicList(); /* calc RefPicList */ + memset(pic_param.ReferenceFrames, 0xff, 16 * sizeof(VAPictureH264)); /* invalid all */ + if (current_frame_type == FRAME_P) { + pic_param.ReferenceFrames[0] = RefPicList0_P[0]; + } else if (current_frame_type == FRAME_B) { + pic_param.ReferenceFrames[0] = RefPicList0_B[0]; + pic_param.ReferenceFrames[1] = RefPicList1_B[0]; + } + } else { + memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264)); + for (i = numShortTerm; i < SURFACE_NUM; i++) { + pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE; + pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID; + } + } + + pic_param.pic_fields.bits.idr_pic_flag = (current_frame_type == FRAME_IDR); + pic_param.pic_fields.bits.reference_pic_flag = (current_frame_type != FRAME_B); + pic_param.pic_fields.bits.entropy_coding_mode_flag = h264_entropy_mode; + pic_param.pic_fields.bits.deblocking_filter_control_present_flag = 1; + pic_param.frame_num = current_frame_num; + pic_param.coded_buf = gl_surfaces[current_frame_display % SURFACE_NUM].coded_buf; + pic_param.last_picture = false; // FIXME + pic_param.pic_init_qp = initial_qp; + + va_status = vaCreateBuffer(va_dpy, context_id, VAEncPictureParameterBufferType, + sizeof(pic_param), 1, &pic_param, &pic_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer");; + + va_status = vaRenderPicture(va_dpy, context_id, &pic_param_buf, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + return 0; +} + +static int render_packedsequence(void) +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedseq_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_seq_buffer(&packedseq_buffer); + + packedheader_param_buffer.type = VAEncPackedHeaderSequence; + + packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/ + packedheader_param_buffer.has_emulation_bytes = 0; + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedseq_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedseq_buffer, + &packedseq_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedseq_para_bufid; + render_id[1] = packedseq_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedseq_buffer); + + return 0; +} + + +static int render_packedpicture(void) +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedpic_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_pic_buffer(&packedpic_buffer); + packedheader_param_buffer.type = VAEncPackedHeaderPicture; + packedheader_param_buffer.bit_length = length_in_bits; + packedheader_param_buffer.has_emulation_bytes = 0; + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedpic_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedpic_buffer, + &packedpic_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedpic_para_bufid; + render_id[1] = packedpic_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedpic_buffer); + + return 0; +} + +static void render_packedslice() +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedslice_para_bufid, packedslice_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedslice_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_slice_buffer(&packedslice_buffer); + packedheader_param_buffer.type = VAEncPackedHeaderSlice; + packedheader_param_buffer.bit_length = length_in_bits; + packedheader_param_buffer.has_emulation_bytes = 0; + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedslice_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedslice_buffer, + &packedslice_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedslice_para_bufid; + render_id[1] = packedslice_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedslice_buffer); +} + +static int render_slice(void) +{ + VABufferID slice_param_buf; + VAStatus va_status; + int i; + + update_RefPicList(); + + /* one frame, one slice */ + slice_param.macroblock_address = 0; + slice_param.num_macroblocks = frame_width_mbaligned * frame_height_mbaligned/(16*16); /* Measured by MB */ + slice_param.slice_type = (current_frame_type == FRAME_IDR)?2:current_frame_type; + if (current_frame_type == FRAME_IDR) { + if (current_frame_encoding != 0) + ++slice_param.idr_pic_id; + } else if (current_frame_type == FRAME_P) { + int refpiclist0_max = h264_maxref & 0xffff; + memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264)); + + for (i = refpiclist0_max; i < 32; i++) { + slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; + } + } else if (current_frame_type == FRAME_B) { + int refpiclist0_max = h264_maxref & 0xffff; + int refpiclist1_max = (h264_maxref >> 16) & 0xffff; + + memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264)); + for (i = refpiclist0_max; i < 32; i++) { + slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; + } + + memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264)); + for (i = refpiclist1_max; i < 32; i++) { + slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID; + } + } + + slice_param.slice_alpha_c0_offset_div2 = 0; + slice_param.slice_beta_offset_div2 = 0; + slice_param.direct_spatial_mv_pred_flag = 1; + slice_param.pic_order_cnt_lsb = (current_frame_display - current_IDR_display) % MaxPicOrderCntLsb; + + + if (h264_packedheader && + config_attrib[enc_packed_header_idx].value & VA_ENC_PACKED_HEADER_SLICE) + render_packedslice(); + + va_status = vaCreateBuffer(va_dpy, context_id, VAEncSliceParameterBufferType, + sizeof(slice_param), 1, &slice_param, &slice_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer");; + + va_status = vaRenderPicture(va_dpy, context_id, &slice_param_buf, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + return 0; +} + + + +int H264Encoder::save_codeddata(unsigned long long display_order, unsigned long long encode_order, int frame_type) +{ + VACodedBufferSegment *buf_list = NULL; + VAStatus va_status; + unsigned int coded_size = 0; + + string data; + + va_status = vaMapBuffer(va_dpy, gl_surfaces[display_order % SURFACE_NUM].coded_buf, (void **)(&buf_list)); + CHECK_VASTATUS(va_status, "vaMapBuffer"); + while (buf_list != NULL) { + data.append(reinterpret_cast(buf_list->buf), buf_list->size); + if (coded_fp != nullptr) + coded_size += fwrite(buf_list->buf, 1, buf_list->size, coded_fp); + buf_list = (VACodedBufferSegment *) buf_list->next; + + frame_size += coded_size; + } + vaUnmapBuffer(va_dpy, gl_surfaces[display_order % SURFACE_NUM].coded_buf); + + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.pts = av_rescale_q(display_order, AVRational{1, frame_rate}, avstream->time_base); + pkt.dts = av_rescale_q(encode_order, AVRational{1, frame_rate}, avstream->time_base); + pkt.data = reinterpret_cast(&data[0]); + pkt.size = data.size(); + pkt.stream_index = 0; + if (frame_type == FRAME_IDR || frame_type == FRAME_I) { + pkt.flags = AV_PKT_FLAG_KEY; + } else { + pkt.flags = 0; + } + pkt.duration = 1; + av_interleaved_write_frame(avctx, &pkt); + +#if 0 + printf("\r "); /* return back to startpoint */ + switch (encode_order % 4) { + case 0: + printf("|"); + break; + case 1: + printf("/"); + break; + case 2: + printf("-"); + break; + case 3: + printf("\\"); + break; + } + printf("%08lld", encode_order); + printf("(%06d bytes coded)", coded_size); +#endif + + return 0; +} + + +// this is weird. but it seems to put a new frame onto the queue +void H264Encoder::storage_task_enqueue(unsigned long long display_order, unsigned long long encode_order, int frame_type) +{ + std::unique_lock lock(storage_task_queue_mutex); + + storage_task tmp; + tmp.display_order = display_order; + tmp.encode_order = encode_order; + tmp.frame_type = frame_type; + storage_task_queue.push(tmp); + srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING; + + storage_task_queue_changed.notify_all(); +} + +void H264Encoder::storage_task_thread() +{ + for ( ;; ) { + storage_task current; + { + // wait until there's an encoded frame + std::unique_lock lock(storage_task_queue_mutex); + storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || !storage_task_queue.empty(); }); + if (storage_thread_should_quit) return; + current = storage_task_queue.front(); + storage_task_queue.pop(); + } + + VAStatus va_status; + + // waits for data, then saves it to disk. + va_status = vaSyncSurface(va_dpy, gl_surfaces[current.display_order % SURFACE_NUM].src_surface); + CHECK_VASTATUS(va_status, "vaSyncSurface"); + save_codeddata(current.display_order, current.encode_order, current.frame_type); + + { + std::unique_lock lock(storage_task_queue_mutex); + srcsurface_status[current.display_order % SURFACE_NUM] = SRC_SURFACE_FREE; + storage_task_queue_changed.notify_all(); + } + } +} + +static int release_encode() +{ + int i; + + for (i = 0; i < SURFACE_NUM; i++) { + vaDestroyBuffer(va_dpy, gl_surfaces[i].coded_buf); + vaDestroySurfaces(va_dpy, &gl_surfaces[i].src_surface, 1); + vaDestroySurfaces(va_dpy, &gl_surfaces[i].ref_surface, 1); + } + + vaDestroyContext(va_dpy, context_id); + vaDestroyConfig(va_dpy, config_id); + + return 0; +} + +static int deinit_va() +{ + vaTerminate(va_dpy); + + va_close_display(va_dpy); + + return 0; +} + + +static int print_input() +{ + printf("\n\nINPUT:Try to encode H264...\n"); + if (rc_mode != -1) + printf("INPUT: RateControl : %s\n", rc_to_string(rc_mode)); + printf("INPUT: Resolution : %dx%dframes\n", frame_width, frame_height); + printf("INPUT: FrameRate : %d\n", frame_rate); + printf("INPUT: Bitrate : %d\n", frame_bitrate); + printf("INPUT: Slieces : %d\n", frame_slices); + printf("INPUT: IntraPeriod : %d\n", intra_period); + printf("INPUT: IDRPeriod : %d\n", intra_idr_period); + printf("INPUT: IpPeriod : %d\n", ip_period); + printf("INPUT: Initial QP : %d\n", initial_qp); + printf("INPUT: Min QP : %d\n", minimal_qp); + printf("INPUT: Coded Clip : %s\n", coded_fn); + + printf("\n\n"); /* return back to startpoint */ + + return 0; +} + + +//H264Encoder::H264Encoder(SDL_Window *window, SDL_GLContext context, int width, int height, const char *output_filename) +H264Encoder::H264Encoder(QSurface *surface, int width, int height, const char *output_filename) + : current_storage_frame(0), surface(surface) + //: width(width), height(height), current_encoding_frame(0) +{ + av_register_all(); + avctx = avformat_alloc_context(); + avctx->oformat = av_guess_format(NULL, output_filename, NULL); + strcpy(avctx->filename, output_filename); + if (avio_open2(&avctx->pb, output_filename, AVIO_FLAG_WRITE, &avctx->interrupt_callback, NULL) < 0) { + fprintf(stderr, "%s: avio_open2() failed\n", output_filename); + exit(1); + } + AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264); + avstream = avformat_new_stream(avctx, codec); + if (avstream == nullptr) { + fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename); + exit(1); + } + avstream->time_base = AVRational{1, frame_rate}; // TODO + avstream->codec->width = width; + avstream->codec->height = height; + //avstream->codec->time_base = AVRational{1, 60}; // TODO + avstream->codec->time_base = AVRational{1, frame_rate}; + avstream->codec->ticks_per_frame = 1; // or 2? + + if (avformat_write_header(avctx, NULL) < 0) { + fprintf(stderr, "%s: avformat_write_header() failed\n", output_filename); + exit(1); + } + + coded_fp = fopen("dump.h264", "wb"); + assert(coded_fp != NULL); + + frame_width = width; + frame_height = height; + frame_width_mbaligned = (frame_width + 15) & (~15); + frame_height_mbaligned = (frame_height + 15) & (~15); + frame_bitrate = 15000000; // / 60; + current_frame_encoding = 0; + + print_input(); + + init_va(); + setup_encode(); + + // No frames are ready yet. + memset(srcsurface_status, SRC_SURFACE_FREE, sizeof(srcsurface_status)); + + memset(&seq_param, 0, sizeof(seq_param)); + memset(&pic_param, 0, sizeof(pic_param)); + memset(&slice_param, 0, sizeof(slice_param)); + + storage_thread = std::thread(&H264Encoder::storage_task_thread, this); + + copy_thread = std::thread([this]{ + //SDL_GL_MakeCurrent(window, context); + QOpenGLContext *context = create_context(); + eglBindAPI(EGL_OPENGL_API); + if (!make_current(context, this->surface)) { + printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(), + eglGetError()); + exit(1); + } + copy_thread_func(); + }); +} + +H264Encoder::~H264Encoder() +{ + { + unique_lock lock(storage_task_queue_mutex); + storage_thread_should_quit = true; + storage_task_queue_changed.notify_all(); + } + { + unique_lock lock(frame_queue_mutex); + copy_thread_should_quit = true; + frame_queue_nonempty.notify_one(); + } + storage_thread.join(); + copy_thread.join(); + + release_encode(); + deinit_va(); + + av_write_trailer(avctx); + avformat_free_context(avctx); +} + +bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) +{ + { + // Wait until this frame slot is done encoding. + std::unique_lock lock(storage_task_queue_mutex); + storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || (srcsurface_status[current_storage_frame % SURFACE_NUM] == SRC_SURFACE_FREE); }); + if (storage_thread_should_quit) return false; + } + + //*fbo = fbos[current_storage_frame % SURFACE_NUM]; + GLSurface *surf = &gl_surfaces[current_storage_frame % SURFACE_NUM]; + *y_tex = surf->y_tex; + *cbcr_tex = surf->cbcr_tex; + + VASurfaceID surface = surf->src_surface; + VAStatus va_status = vaDeriveImage(va_dpy, surface, &surf->surface_image); + CHECK_VASTATUS(va_status, "vaDeriveImage"); + + VABufferInfo buf_info; + buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? + va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); + CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); + + // Create Y image. + surf->y_egl_image = EGL_NO_IMAGE_KHR; + EGLint y_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), + EGL_NONE + }; + + surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); + assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); + + // Associate Y image to a texture. + glBindTexture(GL_TEXTURE_2D, *y_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); + + // Create CbCr image. + surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; + EGLint cbcr_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), + EGL_NONE + }; + + surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); + assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); + + // Associate CbCr image to a texture. + glBindTexture(GL_TEXTURE_2D, *cbcr_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + + return true; +} + +void H264Encoder::end_frame(GLsync fence) +{ + { + unique_lock lock(frame_queue_mutex); + pending_frames[current_storage_frame++] = fence; + } + frame_queue_nonempty.notify_one(); +} + +void H264Encoder::copy_thread_func() +{ + for ( ;; ) { + GLsync fence; + encoding2display_order(current_frame_encoding, intra_period, intra_idr_period, ip_period, + ¤t_frame_display, ¤t_frame_type); + if (current_frame_type == FRAME_IDR) { + numShortTerm = 0; + current_frame_num = 0; + current_IDR_display = current_frame_display; + } + + { + unique_lock lock(frame_queue_mutex); + frame_queue_nonempty.wait(lock, [this]{ return copy_thread_should_quit || pending_frames.count(current_frame_display) != 0; }); + if (copy_thread_should_quit) return; + fence = pending_frames[current_frame_display]; + pending_frames.erase(current_frame_display); + } + + // Wait for the GPU to be done with the frame. + glClientWaitSync(fence, 0, 0); + glDeleteSync(fence); + + // Unmap the image. + GLSurface *surf = &gl_surfaces[current_frame_display % SURFACE_NUM]; + eglDestroyImageKHR(eglGetCurrentDisplay(), surf->y_egl_image); + eglDestroyImageKHR(eglGetCurrentDisplay(), surf->cbcr_egl_image); + VAStatus va_status = vaReleaseBufferHandle(va_dpy, surf->surface_image.buf); + CHECK_VASTATUS(va_status, "vaReleaseBufferHandle"); + va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id); + CHECK_VASTATUS(va_status, "vaDestroyImage"); + + VASurfaceID surface = surf->src_surface; + + // Schedule the frame for encoding. + va_status = vaBeginPicture(va_dpy, context_id, surface); + CHECK_VASTATUS(va_status, "vaBeginPicture"); + + if (current_frame_type == FRAME_IDR) { + render_sequence(); + render_picture(); + if (h264_packedheader) { + render_packedsequence(); + render_packedpicture(); + } + } else { + //render_sequence(); + render_picture(); + } + render_slice(); + + va_status = vaEndPicture(va_dpy, context_id); + CHECK_VASTATUS(va_status, "vaEndPicture"); + + // so now the data is done encoding (well, async job kicked off)... + // we send that to the storage thread + storage_task_enqueue(current_frame_display, current_frame_encoding, current_frame_type); + + update_ReferenceFrames(); + ++current_frame_encoding; + } +} diff --git a/h264encode.h b/h264encode.h new file mode 100644 index 0000000..f355116 --- /dev/null +++ b/h264encode.h @@ -0,0 +1,105 @@ +// Hardware H.264 encoding via VAAPI. Heavily modified based on example +// code by Intel. Intel's original copyright and license is reproduced below: +// +// Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sub license, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice (including the +// next paragraph) shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +// IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +// ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#ifndef _H264ENCODE_H +#define _H264ENCODE_H + +extern "C" { +#include +} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pbo_frame_allocator.h" +#include "context.h" + +#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ + +class H264Encoder { +public: + H264Encoder(QSurface *surface, int width, int height, const char *output_filename); + ~H264Encoder(); + //void add_frame(FrameAllocator::Frame frame, GLsync fence); + +#if 0 + struct Frame { + public: + GLuint fbo; + GLuint y_tex, cbcr_tex; + + private: + //int surface_subnum; + }; + void +#endif + bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); + void end_frame(GLsync fence); + +private: + struct storage_task { + unsigned long long display_order; + unsigned long long encode_order; + int frame_type; + }; + + void copy_thread_func(); + void storage_task_thread(); + void storage_task_enqueue(unsigned long long display_order, unsigned long long encode_order, int frame_type); + int save_codeddata(unsigned long long display_order, unsigned long long encode_order, int frame_type); + + std::thread copy_thread, storage_thread; + + std::mutex storage_task_queue_mutex; + std::condition_variable storage_task_queue_changed; + int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex + std::queue storage_task_queue; // protected by storage_task_queue_mutex + bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex + + std::mutex frame_queue_mutex; + std::condition_variable frame_queue_nonempty; + bool copy_thread_should_quit = false; // under frame_queue_mutex + + //int frame_width, frame_height; + //int ; + int current_storage_frame; +#if 0 + std::map> pending_frames; +#endif + std::map pending_frames; + QSurface *surface; + + AVFormatContext *avctx; + AVStream *avstream; +}; + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..34a1c49 --- /dev/null +++ b/main.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#include "mainwindow.h" + +int main(int argc, char *argv[]) +{ + setenv("QT_XCB_GL_INTEGRATION", "xcb_egl", 0); + + QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts, true); + QApplication app(argc, argv); + + QSurfaceFormat fmt; + fmt.setDepthBufferSize(0); + fmt.setStencilBufferSize(0); + fmt.setProfile(QSurfaceFormat::CoreProfile); + fmt.setMajorVersion(3); + fmt.setMinorVersion(1); + QSurfaceFormat::setDefaultFormat(fmt); + + MainWindow mainWindow; + mainWindow.resize(mainWindow.sizeHint()); + mainWindow.show(); + + return app.exec(); +} diff --git a/mainwindow.cpp b/mainwindow.cpp new file mode 100644 index 0000000..4804c08 --- /dev/null +++ b/mainwindow.cpp @@ -0,0 +1,13 @@ +#include "mainwindow.h" +#include "window.h" +#include + +#include "context.h" +#include "mixer.h" + +using std::thread; + +MainWindow::MainWindow() +{ + setCentralWidget(new Window(this)); +} diff --git a/mainwindow.h b/mainwindow.h new file mode 100644 index 0000000..fd60640 --- /dev/null +++ b/mainwindow.h @@ -0,0 +1,14 @@ +#ifndef MAINWINDOW_H +#define MAINWINDOW_H + +#include + +class MainWindow : public QMainWindow +{ + Q_OBJECT + +public: + MainWindow(); +}; + +#endif diff --git a/mixer.cpp b/mixer.cpp new file mode 100644 index 0000000..b8cdbe6 --- /dev/null +++ b/mixer.cpp @@ -0,0 +1,654 @@ +#define GL_GLEXT_PROTOTYPES 1 +#define NO_SDL_GLEXT 1 +#define NUM_CARDS 2 + +#define WIDTH 1280 +#define HEIGHT 720 + +#include +#include + +#undef Success + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "h264encode.h" +#include "context.h" +#include "bmusb.h" +#include "pbo_frame_allocator.h" + +using namespace movit; +using namespace std; +using namespace std::placeholders; + +// shared between all EGL contexts +EGLDisplay egl_display; +EGLSurface egl_surface; +EGLConfig ecfg; +EGLint ctxattr[] = { + EGL_CONTEXT_CLIENT_VERSION, 2, + EGL_CONTEXT_MAJOR_VERSION_KHR, 3, + EGL_CONTEXT_MINOR_VERSION_KHR, 1, + //EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR, + EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, + EGL_NONE +}; + +EGLConfig pbuffer_ecfg; + +std::mutex bmusb_mutex; // protects + +struct CaptureCard { + BMUSBCapture *usb; + + // Threading stuff + bool thread_initialized; + QSurface *surface; + QOpenGLContext *context; + + bool new_data_ready; // Whether new_frame contains anything. + PBOFrameAllocator::Frame new_frame; + GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. + std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. +}; +CaptureCard cards[NUM_CARDS]; + +void bm_frame(int card_index, uint16_t timecode, + FrameAllocator::Frame video_frame, size_t video_offset, uint16_t video_format, + FrameAllocator::Frame audio_frame, size_t audio_offset, uint16_t audio_format) +{ + CaptureCard *card = &cards[card_index]; + if (!card->thread_initialized) { + printf("initializing context for bmusb thread %d\n", card_index); + eglBindAPI(EGL_OPENGL_API); + card->context = create_context(); + if (!make_current(card->context, card->surface)) { + printf("failed to create bmusb context\n"); + exit(1); + } + card->thread_initialized = true; + } + + if (video_frame.len - video_offset != 1280 * 750 * 2) { + printf("dropping frame with wrong length (%ld)\n", video_frame.len - video_offset); + FILE *fp = fopen("frame.raw", "wb"); + fwrite(video_frame.data, video_frame.len, 1, fp); + fclose(fp); + //exit(1); + card->usb->get_video_frame_allocator()->release_frame(video_frame); + card->usb->get_audio_frame_allocator()->release_frame(audio_frame); + return; + } + { + // Wait until the previous frame was consumed. + std::unique_lock lock(bmusb_mutex); + card->new_data_ready_changed.wait(lock, [card]{ return !card->new_data_ready; }); + } + GLuint pbo = (GLint)(intptr_t)video_frame.userdata; + check_error(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + check_error(); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, video_frame.size); + check_error(); + //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + //check_error(); + GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); + check_error(); + assert(fence != nullptr); + { + std::unique_lock lock(bmusb_mutex); + card->new_data_ready = true; + card->new_frame = video_frame; + card->new_data_ready_fence = fence; + card->new_data_ready_changed.notify_all(); + } + + // Video frame will be released later. + card->usb->get_audio_frame_allocator()->release_frame(audio_frame); +} + +void place_rectangle(Effect *resample_effect, Effect *padding_effect, float x0, float y0, float x1, float y1) +{ + float srcx0 = 0.0f; + float srcx1 = 1.0f; + float srcy0 = 0.0f; + float srcy1 = 1.0f; + + // Cull. + if (x0 > 1280.0 || x1 < 0.0 || y0 > 720.0 || y1 < 0.0) { + CHECK(resample_effect->set_int("width", 1)); + CHECK(resample_effect->set_int("height", 1)); + CHECK(resample_effect->set_float("zoom_x", 1280.0)); + CHECK(resample_effect->set_float("zoom_y", 720.0)); + CHECK(padding_effect->set_int("left", 2000)); + CHECK(padding_effect->set_int("top", 2000)); + return; + } + + // Clip. (TODO: Clip on upper/left sides, too.) + if (x1 > 1280.0) { + srcx1 = (1280.0 - x0) / (x1 - x0); + x1 = 1280.0; + } + if (y1 > 720.0) { + srcy1 = (720.0 - y0) / (y1 - y0); + y1 = 720.0; + } + + float x_subpixel_offset = x0 - floor(x0); + float y_subpixel_offset = y0 - floor(y0); + + // Resampling must be to an integral number of pixels. Round up, + // and then add an extra pixel so we have some leeway for the border. + int width = int(ceil(x1 - x0)) + 1; + int height = int(ceil(y1 - y0)) + 1; + CHECK(resample_effect->set_int("width", width)); + CHECK(resample_effect->set_int("height", height)); + + // Correct the discrepancy with zoom. (This will leave a small + // excess edge of pixels and subpixels, which we'll correct for soon.) + float zoom_x = (x1 - x0) / (width * (srcx1 - srcx0)); + float zoom_y = (y1 - y0) / (height * (srcy1 - srcy0)); + CHECK(resample_effect->set_float("zoom_x", zoom_x)); + CHECK(resample_effect->set_float("zoom_y", zoom_y)); + CHECK(resample_effect->set_float("zoom_center_x", 0.0f)); + CHECK(resample_effect->set_float("zoom_center_y", 0.0f)); + + // Padding must also be to a whole-pixel offset. + CHECK(padding_effect->set_int("left", floor(x0))); + CHECK(padding_effect->set_int("top", floor(y0))); + + // Correct _that_ discrepancy by subpixel offset in the resampling. + CHECK(resample_effect->set_float("left", -x_subpixel_offset / zoom_x)); + CHECK(resample_effect->set_float("top", -y_subpixel_offset / zoom_y)); + + // Finally, adjust the border so it is exactly where we want it. + CHECK(padding_effect->set_float("border_offset_left", x_subpixel_offset)); + CHECK(padding_effect->set_float("border_offset_right", x1 - (floor(x0) + width))); + CHECK(padding_effect->set_float("border_offset_top", y_subpixel_offset)); + CHECK(padding_effect->set_float("border_offset_bottom", y1 - (floor(y0) + height))); +} + +void mixer_thread(QSurface *surface, QSurface *surface2, QSurface *surface3, QSurface *surface4) +{ + bool quit = false; + + cards[0].surface = surface3; +#if NUM_CARDS == 2 + cards[1].surface = surface4; +#endif + + eglBindAPI(EGL_OPENGL_API); + //QSurface *surface = create_surface(); + QOpenGLContext *context = create_context(); + if (!make_current(context, surface)) { + printf("oops\n"); + exit(1); + } + printf("egl=%p\n", eglGetCurrentContext()); + + CHECK(init_movit("/usr/share/movit", MOVIT_DEBUG_ON)); + printf("GPU texture subpixel precision: about %.1f bits\n", + log2(1.0f / movit_texel_subpixel_precision)); + printf("Wrongly rounded x+0.48 or x+0.52 values: %d/510\n", + movit_num_wrongly_rounded); + if (movit_num_wrongly_rounded > 0) { + if (movit_shader_rounding_supported) { + printf("Rounding off in the shader to compensate.\n"); + } else { + printf("No shader roundoff available; cannot compensate.\n"); + } + } + + //printf("egl_api=%d EGL_OPENGL_API=%d\n", epoxy_egl_get_current_gl_context_api(), EGL_OPENGL_API); + //exit(0); + + check_error(); + + EffectChain chain(WIDTH, HEIGHT); + check_error(); +#if 0 + glViewport(0, 0, WIDTH, HEIGHT); + check_error(); + + glMatrixMode(GL_PROJECTION); + check_error(); + glLoadIdentity(); + check_error(); + glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + check_error(); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); +#endif + + ImageFormat inout_format; + inout_format.color_space = COLORSPACE_sRGB; + inout_format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.chroma_subsampling_x = 2; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.0; + ycbcr_format.cr_x_position = 0.0; + ycbcr_format.cb_y_position = 0.5; + ycbcr_format.cr_y_position = 0.5; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + + YCbCrInput *input[NUM_CARDS]; + + input[0] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR); + chain.add_input(input[0]); + //if (NUM_CARDS == 2) { + input[1] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR); + chain.add_input(input[1]); + //} + //YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(inout_format, ycbcr_format, WIDTH, HEIGHT); + //YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(inout_format, ycbcr_format, 2, 1); + Effect *resample_effect = chain.add_effect(new ResampleEffect(), input[0]); + Effect *padding_effect = chain.add_effect(new IntegralPaddingEffect()); + float border_color[] = { 0.0f, 0.0f, 0.0f, 1.0f }; + CHECK(padding_effect->set_vec4("border_color", border_color)); + + //Effect *resample2_effect = chain.add_effect(new ResampleEffect(), input[1 % NUM_CARDS]); + Effect *resample2_effect = chain.add_effect(new ResampleEffect(), input[1]); + Effect *saturation_effect = chain.add_effect(new SaturationEffect()); + CHECK(saturation_effect->set_float("saturation", 0.3f)); + Effect *wb_effect = chain.add_effect(new WhiteBalanceEffect()); + CHECK(wb_effect->set_float("output_color_temperature", 3500.0)); + Effect *padding2_effect = chain.add_effect(new IntegralPaddingEffect()); + + chain.add_effect(new OverlayEffect(), padding_effect, padding2_effect); + + ycbcr_format.chroma_subsampling_x = 1; + + chain.add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR); + chain.set_dither_bits(8); + chain.set_output_origin(OUTPUT_ORIGIN_TOP_LEFT); + chain.finalize(); + +#if 0 + // generate a PBO to hold the data we read back with glReadPixels() + // (Intel/DRI goes into a slow path if we don't read to PBO) + GLuint pbo; + glGenBuffers(1, &pbo); + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); + glBufferData(GL_PIXEL_PACK_BUFFER_ARB, WIDTH * HEIGHT * 4, NULL, GL_STREAM_READ); +#endif + + //make_hsv_wheel_texture(); + +// QSurface *create_surface(const QSurfaceFormat &format); + H264Encoder h264_encoder(surface2, WIDTH, HEIGHT, "test.mp4"); + + printf("Configuring first card...\n"); + cards[0].usb = new BMUSBCapture(0x1edb, 0xbd3b); // 0xbd4f + //cards[0].usb = new BMUSBCapture(0x1edb, 0xbd4f); + cards[0].usb->set_frame_callback(std::bind(bm_frame, 0, _1, _2, _3, _4, _5, _6, _7)); + std::unique_ptr pbo_allocator1(new PBOFrameAllocator(1280 * 750 * 2 + 44)); + cards[0].usb->set_video_frame_allocator(pbo_allocator1.get()); + cards[0].usb->configure_card(); + + std::unique_ptr pbo_allocator2(new PBOFrameAllocator(1280 * 750 * 2 + 44)); + if (NUM_CARDS == 2) { + printf("Configuring second card...\n"); + cards[1].usb = new BMUSBCapture(0x1edb, 0xbd4f); + cards[1].usb->set_frame_callback(std::bind(bm_frame, 1, _1, _2, _3, _4, _5, _6, _7)); + cards[1].usb->set_video_frame_allocator(pbo_allocator2.get()); + cards[1].usb->configure_card(); + } + + BMUSBCapture::start_bm_thread(); + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + cards[card_index].usb->start_bm_capture(); + } + + int frame = 0; +#if _POSIX_C_SOURCE >= 199309L + struct timespec start, now; + clock_gettime(CLOCK_MONOTONIC, &start); +#else + struct timeval start, now; + gettimeofday(&start, NULL); +#endif + + PBOFrameAllocator::Frame bmusb_current_rendering_frame[NUM_CARDS]; + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + bmusb_current_rendering_frame[card_index] = + cards[card_index].usb->get_video_frame_allocator()->alloc_frame(); + GLint input_tex_pbo = (GLint)(intptr_t)bmusb_current_rendering_frame[card_index].userdata; + input[card_index]->set_pixel_data(0, nullptr, input_tex_pbo); + input[card_index]->set_pixel_data(1, nullptr, input_tex_pbo); + check_error(); + } + + //chain.enable_phase_timing(true); + + // Set up stuff for NV12 conversion. +#if 0 + PBOFrameAllocator nv12_frame_pool(WIDTH * HEIGHT * 3 / 2, /*num_frames=*/24, + GL_PIXEL_PACK_BUFFER_ARB, GL_MAP_READ_BIT | GL_MAP_COHERENT_BIT, 0); +#endif + ResourcePool *resource_pool = chain.get_resource_pool(); + //GLuint ycbcr_tex = resource_pool->create_2d_texture(GL_RGBA8, WIDTH, HEIGHT); + GLuint chroma_tex = resource_pool->create_2d_texture(GL_RG8, WIDTH, HEIGHT); + +#if 0 + // Y shader. + string y_vert_shader = read_version_dependent_file("vs-y", "vert"); + string y_frag_shader = + "#version 130 \n" + "in vec2 tc; \n" + "uniform sampler2D ycbcr_tex; \n" + "void main() { \n" + " gl_FragColor = texture2D(ycbcr_tex, tc); \n" + "} \n"; + GLuint y_program_num = resource_pool->compile_glsl_program(y_vert_shader, y_frag_shader); +#endif + +#if 1 + // Cb/Cr shader. + string cbcr_vert_shader = read_version_dependent_file("vs-cbcr", "vert"); + string cbcr_frag_shader = + "#version 130 \n" + "in vec2 tc0; \n" +// "in vec2 tc1; \n" + "uniform sampler2D cbcr_tex; \n" + "void main() { \n" + " gl_FragColor = texture2D(cbcr_tex, tc0); \n" +// " gl_FragColor.ba = texture2D(cbcr_tex, tc1).gb; \n" + "} \n"; + GLuint cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader); +#endif + + GLuint vao; + glGenVertexArrays(1, &vao); + check_error(); + + while (!quit) { + ++frame; + + //int width0 = lrintf(848 * (1.0 + 0.2 * sin(frame * 0.02))); + int width0 = 848; + int height0 = lrintf(width0 * 9.0 / 16.0); + + //float top0 = 96 + 48 * sin(frame * 0.005); + //float left0 = 96 + 48 * cos(frame * 0.006); + float top0 = 48; + float left0 = 16; + float bottom0 = top0 + height0; + float right0 = left0 + width0; + + int width1 = 384; + int height1 = 216; + + float bottom1 = 720 - 48; + float right1 = 1280 - 16; + float top1 = bottom1 - height1; + float left1 = right1 - width1; + + float t = 0.5 + 0.5 * cos(frame * 0.006); + //float t = 0.0; + float scale0 = 1.0 + t * (1280.0 / 848.0 - 1.0); + float tx0 = 0.0 + t * (-16.0 * scale0); + float ty0 = 0.0 + t * (-48.0 * scale0); + + top0 = top0 * scale0 + ty0; + bottom0 = bottom0 * scale0 + ty0; + left0 = left0 * scale0 + tx0; + right0 = right0 * scale0 + tx0; + + top1 = top1 * scale0 + ty0; + bottom1 = bottom1 * scale0 + ty0; + left1 = left1 * scale0 + tx0; + right1 = right1 * scale0 + tx0; + + place_rectangle(resample_effect, padding_effect, left0, top0, right0, bottom0); + place_rectangle(resample2_effect, padding2_effect, left1, top1, right1, bottom1); + + CaptureCard card_copy[NUM_CARDS]; + + { + std::unique_lock lock(bmusb_mutex); + + // The first card is the master timer, so wait for it to have a new frame. + // TODO: Make configurable, and with a timeout. + cards[0].new_data_ready_changed.wait(lock, []{ return cards[0].new_data_ready; }); + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + CaptureCard *card = &cards[card_index]; + card_copy[card_index].usb = card->usb; + card_copy[card_index].new_data_ready = card->new_data_ready; + card_copy[card_index].new_frame = card->new_frame; + card_copy[card_index].new_data_ready_fence = card->new_data_ready_fence; + card->new_data_ready = false; + card->new_data_ready_changed.notify_all(); + } + } + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + CaptureCard *card = &card_copy[card_index]; + if (!card->new_data_ready) + continue; + + // FIXME: We could still be rendering from it! + card->usb->get_video_frame_allocator()->release_frame(bmusb_current_rendering_frame[card_index]); + bmusb_current_rendering_frame[card_index] = card->new_frame; + + // The new texture might still be uploaded, + // tell the GPU to wait until it's there. + if (card->new_data_ready_fence) + glWaitSync(card->new_data_ready_fence, /*flags=*/0, GL_TIMEOUT_IGNORED); + check_error(); + glDeleteSync(card->new_data_ready_fence); + check_error(); + GLint input_tex_pbo = (GLint)(intptr_t)bmusb_current_rendering_frame[card_index].userdata; + input[card_index]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo); + input[card_index]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo); + + if (NUM_CARDS == 1) { + // Set to the other one, too. + input[1]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo); + input[1]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo); + } + } + + GLuint y_tex, cbcr_tex; + bool got_frame = h264_encoder.begin_frame(&y_tex, &cbcr_tex); + assert(got_frame); + + // Render chain. + { + GLuint ycbcr_fbo = resource_pool->create_fbo(y_tex, chroma_tex); + chain.render_to_fbo(ycbcr_fbo, WIDTH, HEIGHT); + resource_pool->release_fbo(ycbcr_fbo); + } + if (false) { + glViewport(0, 0, WIDTH, HEIGHT); + chain.render_to_screen(); + } + +#if 0 + PBOFrameAllocator::Frame nv12_frame = nv12_frame_pool.alloc_frame(); + assert(nv12_frame.data != nullptr); // should never happen... maybe? + GLuint pbo = (GLuint)(intptr_t)nv12_frame.userdata; + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); + check_error(); +#endif + + // Set up for extraction. + float vertices[] = { + 0.0f, 2.0f, + 0.0f, 0.0f, + 2.0f, 0.0f + }; + + glBindVertexArray(vao); + check_error(); + +#if 0 + // Extract Y. + GLuint y_fbo = resource_pool->create_fbo(y_tex); + glBindFramebuffer(GL_FRAMEBUFFER, y_fbo); + glViewport(0, 0, WIDTH, HEIGHT); + check_error(); + { + glUseProgram(y_program_num); + check_error(); + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, ycbcr_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + GLuint position_vbo = fill_vertex_attribute(y_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(y_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + cleanup_vertex_attribute(y_program_num, "position", position_vbo); + check_error(); + cleanup_vertex_attribute(y_program_num, "texcoord", texcoord_vbo); + check_error(); + + resource_pool->release_fbo(y_fbo); + } +#endif + + // Extract Cb/Cr. + GLuint cbcr_fbo = resource_pool->create_fbo(cbcr_tex); + glBindFramebuffer(GL_FRAMEBUFFER, cbcr_fbo); + glViewport(0, 0, WIDTH/2, HEIGHT/2); + check_error(); + GLsync fence; + { + glUseProgram(cbcr_program_num); + check_error(); + + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, chroma_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + float chroma_offset_0[] = { -0.5f / WIDTH, 0.0f }; +// float chroma_offset_1[] = { +0.5f / WIDTH, 0.0f }; + set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0); +// set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1); + + GLuint position_vbo = fill_vertex_attribute(cbcr_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(cbcr_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + cleanup_vertex_attribute(cbcr_program_num, "position", position_vbo); + cleanup_vertex_attribute(cbcr_program_num, "texcoord", texcoord_vbo); + + glUseProgram(0); + check_error(); + +#if 0 + glReadPixels(0, 0, WIDTH/4, HEIGHT/2, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(WIDTH * HEIGHT)); + check_error(); +#endif + + fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); + check_error(); + + resource_pool->release_fbo(cbcr_fbo); + } + +#if 0 + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); +#endif + +#if 1 + h264_encoder.end_frame(fence); +#else + nv12_frame_pool.release_frame(nv12_frame); +#endif + + //eglSwapBuffers(egl_display, egl_surface); + + +#if 1 +#if _POSIX_C_SOURCE >= 199309L + clock_gettime(CLOCK_MONOTONIC, &now); + double elapsed = now.tv_sec - start.tv_sec + + 1e-9 * (now.tv_nsec - start.tv_nsec); +#else + gettimeofday(&now, NULL); + double elapsed = now.tv_sec - start.tv_sec + + 1e-6 * (now.tv_usec - start.tv_usec); +#endif + if (frame % 100 == 0) { + printf("%d frames in %.3f seconds = %.1f fps (%.1f ms/frame)\n", + frame, elapsed, frame / elapsed, + 1e3 * elapsed / frame); + // chain.print_phase_timing(); + } + + // Reset every 100 frames, so that local variations in frame times + // (especially for the first few frames, when the shaders are + // compiled etc.) don't make it hard to measure for the entire + // remaining duration of the program. + if (frame == 10000) { + frame = 0; + start = now; + } +#endif + } + glDeleteVertexArrays(1, &vao); + //resource_pool->release_glsl_program(y_program_num); + resource_pool->release_glsl_program(cbcr_program_num); + resource_pool->release_2d_texture(chroma_tex); + BMUSBCapture::stop_bm_thread(); +} diff --git a/mixer.h b/mixer.h new file mode 100644 index 0000000..ef7701f --- /dev/null +++ b/mixer.h @@ -0,0 +1,2 @@ +class QSurface; +void mixer_thread(QSurface *surface, QSurface *surface2, QSurface *surface3, QSurface *surface4); diff --git a/pbo_frame_allocator.cpp b/pbo_frame_allocator.cpp new file mode 100644 index 0000000..59013bf --- /dev/null +++ b/pbo_frame_allocator.cpp @@ -0,0 +1,70 @@ +#include "pbo_frame_allocator.h" +#include "util.h" + +using namespace std; + +PBOFrameAllocator::PBOFrameAllocator(size_t frame_size, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits) + : frame_size(frame_size), buffer(buffer) +{ + for (size_t i = 0; i < num_queued_frames; ++i) { + GLuint pbo; + glGenBuffers(1, &pbo); + check_error(); + glBindBuffer(buffer, pbo); + check_error(); + glBufferStorage(buffer, frame_size, NULL, permissions | GL_MAP_PERSISTENT_BIT); + check_error(); + + Frame frame; + frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT); + frame.data2 = frame.data + frame_size / 2; + check_error(); + frame.size = frame_size; + frame.userdata = (void *)(intptr_t)pbo; + frame.owner = this; + frame.interleaved = true; + freelist.push(frame); + } + glBindBuffer(buffer, 0); + check_error(); +} + +PBOFrameAllocator::~PBOFrameAllocator() +{ + while (!freelist.empty()) { + Frame frame = freelist.front(); + freelist.pop(); + GLuint pbo = (intptr_t)frame.userdata; + glBindBuffer(buffer, pbo); + check_error(); + glUnmapBuffer(buffer); + check_error(); + glBindBuffer(buffer, 0); + check_error(); + glDeleteBuffers(1, &pbo); + } +} +//static int sumsum = 0; + +FrameAllocator::Frame PBOFrameAllocator::alloc_frame() +{ + Frame vf; + + std::unique_lock lock(freelist_mutex); // Meh. + if (freelist.empty()) { + printf("Frame overrun (no more spare PBO frames), dropping frame!\n"); + } else { + //fprintf(stderr, "freelist has %d allocated\n", ++sumsum); + vf = freelist.front(); + freelist.pop(); // Meh. + } + vf.len = 0; + return vf; +} + +void PBOFrameAllocator::release_frame(Frame frame) +{ + std::unique_lock lock(freelist_mutex); + freelist.push(frame); + //--sumsum; +} diff --git a/pbo_frame_allocator.h b/pbo_frame_allocator.h new file mode 100644 index 0000000..1155d20 --- /dev/null +++ b/pbo_frame_allocator.h @@ -0,0 +1,34 @@ +#ifndef _PBO_FRAME_ALLOCATOR +#define _PBO_FRAME_ALLOCATOR 1 + +#include +#include +#include + +#include "bmusb.h" + +// An allocator that allocates straight into OpenGL pinned memory. +// Meant for video frames only. We use a queue rather than a stack, +// since we want to maximize pipelineability. +class PBOFrameAllocator : public FrameAllocator { +public: + // Note: You need to have an OpenGL context when calling + // the constructor. + PBOFrameAllocator(size_t frame_size, + size_t num_queued_frames = 16, // FIXME: should be 6 + GLenum buffer = GL_PIXEL_UNPACK_BUFFER_ARB, + GLenum permissions = GL_MAP_WRITE_BIT, + GLenum map_bits = GL_MAP_FLUSH_EXPLICIT_BIT); + ~PBOFrameAllocator() override; + Frame alloc_frame() override; + void release_frame(Frame frame) override; + +private: + size_t frame_size; + + std::mutex freelist_mutex; + std::queue freelist; + GLenum buffer; +}; + +#endif // !defined(_PBO_FRAME_ALLOCATOR) diff --git a/vs-cbcr.130.vert b/vs-cbcr.130.vert new file mode 100644 index 0000000..3c639ce --- /dev/null +++ b/vs-cbcr.130.vert @@ -0,0 +1,23 @@ +#version 130 + +in vec2 position; +in vec2 texcoord; +out vec2 tc0; +//out vec2 tc1; +uniform vec2 foo_chroma_offset_0; +//uniform vec2 foo_chroma_offset_1; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + vec2 flipped_tc = texcoord; +// flipped_tc.y = 1.0 - flipped_tc.y; + tc0 = flipped_tc + foo_chroma_offset_0; +// tc1 = flipped_tc + foo_chroma_offset_1; +} diff --git a/window.cpp b/window.cpp new file mode 100644 index 0000000..b8eced7 --- /dev/null +++ b/window.cpp @@ -0,0 +1,23 @@ +#include "glwidget.h" +#include "window.h" +#include "mainwindow.h" +#include +#include + +Window::Window(MainWindow *mw) + : main_window(mw) +{ + gl_widget = new GLWidget; + + QVBoxLayout *mainLayout = new QVBoxLayout; + QHBoxLayout *container = new QHBoxLayout; + container->addWidget(gl_widget); + + QWidget *w = new QWidget; + w->setLayout(container); + mainLayout->addWidget(w); + + setLayout(mainLayout); + + setWindowTitle(tr("Nageru")); +} diff --git a/window.h b/window.h new file mode 100644 index 0000000..ff883f4 --- /dev/null +++ b/window.h @@ -0,0 +1,21 @@ +#ifndef WINDOW_H +#define WINDOW_H + +#include + +class GLWidget; +class MainWindow; + +class Window : public QWidget +{ + Q_OBJECT + +public: + Window(MainWindow *mw); + +private: + GLWidget *gl_widget; + MainWindow *main_window; +}; + +#endif