From e18d9bad93d5bf766d52d0cb66db4c3d3f8a711b Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 3 Oct 2015 02:23:14 +0200 Subject: [PATCH] Initial checkin. --- Makefile | 20 + bmusb.cpp | 939 +++++++++++++++++++ bmusb.h | 138 +++ context.cpp | 32 + context.h | 11 + glwidget.cpp | 49 + glwidget.h | 26 + h264encode.cpp | 1949 +++++++++++++++++++++++++++++++++++++++ h264encode.h | 105 +++ main.cpp | 28 + mainwindow.cpp | 13 + mainwindow.h | 14 + mixer.cpp | 654 +++++++++++++ mixer.h | 2 + pbo_frame_allocator.cpp | 70 ++ pbo_frame_allocator.h | 34 + vs-cbcr.130.vert | 23 + window.cpp | 23 + window.h | 21 + 19 files changed, 4151 insertions(+) create mode 100644 Makefile create mode 100644 bmusb.cpp create mode 100644 bmusb.h create mode 100644 context.cpp create mode 100644 context.h create mode 100644 glwidget.cpp create mode 100644 glwidget.h create mode 100644 h264encode.cpp create mode 100644 h264encode.h create mode 100644 main.cpp create mode 100644 mainwindow.cpp create mode 100644 mainwindow.h create mode 100644 mixer.cpp create mode 100644 mixer.h create mode 100644 pbo_frame_allocator.cpp create mode 100644 pbo_frame_allocator.h create mode 100644 vs-cbcr.130.vert create mode 100644 window.cpp create mode 100644 window.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9958450 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CXX=g++ +CXXFLAGS := -O2 -march=native -g -std=gnu++11 -Wall -Wno-deprecated-declarations -fPIC $(shell pkg-config --cflags Qt5Core Qt5Widgets Qt5OpenGLExtensions libusb-1.0 movit) -pthread +LDFLAGS=$(shell pkg-config --libs Qt5Core Qt5Widgets Qt5OpenGLExtensions libusb-1.0 movit) -lEGL -lGL -pthread -lva -lva-drm -lva-x11 -lX11 -lavformat -lavcodec -lavutil + +# Qt objects +OBJS=glwidget.o main.o mainwindow.o window.o +OBJS += glwidget.moc.o mainwindow.moc.o window.moc.o + +# Mixer objects +OBJS += h264encode.o mixer.o bmusb.o pbo_frame_allocator.o context.o + +%.moc.cpp: %.h + moc $< -o $@ + +all: nageru + +nageru: $(OBJS) + $(CXX) $(LDFLAGS) -o $@ $^ + + diff --git a/bmusb.cpp b/bmusb.cpp new file mode 100644 index 0000000..207d632 --- /dev/null +++ b/bmusb.cpp @@ -0,0 +1,939 @@ +// TODO: Replace with linking to upstream bmusb. + +// Intensity Shuttle USB3 prototype capture driver, v0.3 +// Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable +// (can do captures for hours at a time with no drops), except during startup +// 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation) +// Audio comes out as 8-channel 24-bit raw audio. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef __SSE2__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bmusb.h" + +using namespace std; +using namespace std::placeholders; + +#define WIDTH 1280 +#define HEIGHT 750 /* 30 lines ancillary data? */ +//#define WIDTH 1920 +//#define HEIGHT 1125 /* ??? lines ancillary data? */ +#define HEADER_SIZE 44 +//#define HEADER_SIZE 0 +#define AUDIO_HEADER_SIZE 4 + +//#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE) // UYVY +//#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE) // v210 +#define FRAME_SIZE (8 << 20) + +FILE *audiofp; + +thread usb_thread; +atomic should_quit; + +FrameAllocator::~FrameAllocator() {} + +#define NUM_QUEUED_FRAMES 16 +class MallocFrameAllocator : public FrameAllocator { +public: + MallocFrameAllocator(size_t frame_size); + Frame alloc_frame() override; + void release_frame(Frame frame) override; + +private: + size_t frame_size; + + mutex freelist_mutex; + stack> freelist; // All of size . +}; + +MallocFrameAllocator::MallocFrameAllocator(size_t frame_size) + : frame_size(frame_size) +{ + for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) { + freelist.push(unique_ptr(new uint8_t[frame_size])); + } +} + +FrameAllocator::Frame MallocFrameAllocator::alloc_frame() +{ + Frame vf; + vf.owner = this; + + unique_lock lock(freelist_mutex); // Meh. + if (freelist.empty()) { + printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n", + frame_size); + } else { + vf.data = freelist.top().release(); + vf.size = frame_size; + freelist.pop(); // Meh. + } + return vf; +} + +void MallocFrameAllocator::release_frame(Frame frame) +{ + unique_lock lock(freelist_mutex); + freelist.push(unique_ptr(frame.data)); +} + +bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b) +{ + if (a == b) { + return false; + } else if (a < b) { + return (b - a < 0x8000); + } else { + int wrap_b = 0x10000 + int(b); + return (wrap_b - a < 0x8000); + } +} + +void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque *q) +{ + if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) { + printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n", + q->back().timecode, timecode); + frame.owner->release_frame(frame); + return; + } + + QueuedFrame qf; + qf.format = format; + qf.timecode = timecode; + qf.frame = frame; + + { + unique_lock lock(queue_lock); + q->push_back(move(qf)); + } + queues_not_empty.notify_one(); // might be spurious +} + +void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len) +{ + FILE *fp = fopen(filename, "wb"); + if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) { + printf("short write!\n"); + } + fclose(fp); +} + +void dump_audio_block(uint8_t *audio_start, size_t audio_len) +{ + fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp); +} + +void BMUSBCapture::dequeue_thread() +{ + for ( ;; ) { + unique_lock lock(queue_lock); + queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); }); + + uint16_t video_timecode = pending_video_frames.front().timecode; + uint16_t audio_timecode = pending_audio_frames.front().timecode; + if (video_timecode < audio_timecode) { + printf("Video block 0x%04x without corresponding audio block, dropping.\n", + video_timecode); + video_frame_allocator->release_frame(pending_video_frames.front().frame); + pending_video_frames.pop_front(); + } else if (audio_timecode < video_timecode) { + printf("Audio block 0x%04x without corresponding video block, dropping.\n", + audio_timecode); + audio_frame_allocator->release_frame(pending_audio_frames.front().frame); + pending_audio_frames.pop_front(); + } else { + QueuedFrame video_frame = pending_video_frames.front(); + QueuedFrame audio_frame = pending_audio_frames.front(); + pending_audio_frames.pop_front(); + pending_video_frames.pop_front(); + lock.unlock(); + +#if 0 + char filename[255]; + snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode); + dump_frame(filename, video_frame.frame.data, video_frame.data_len); + dump_audio_block(audio_frame.frame.data, audio_frame.data_len); +#endif + + frame_callback(video_timecode, + video_frame.frame, HEADER_SIZE, video_frame.format, + audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format); + } + } +} + +void BMUSBCapture::start_new_frame(const uint8_t *start) +{ + uint16_t format = (start[3] << 8) | start[2]; + uint16_t timecode = (start[1] << 8) | start[0]; + + if (current_video_frame.len > 0) { + //dump_frame(); + queue_frame(format, timecode, current_video_frame, &pending_video_frames); + } + //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n", + // format, timecode, + // //start[7], start[6], start[5], start[4], + // read_current_frame, FRAME_SIZE); + + current_video_frame = video_frame_allocator->alloc_frame(); + //if (current_video_frame.data == nullptr) { + // read_current_frame = -1; + //} else { + // read_current_frame = 0; + //} +} + +void BMUSBCapture::start_new_audio_block(const uint8_t *start) +{ + uint16_t format = (start[3] << 8) | start[2]; + uint16_t timecode = (start[1] << 8) | start[0]; + if (current_audio_frame.len > 0) { + //dump_audio_block(); + queue_frame(format, timecode, current_audio_frame, &pending_audio_frames); + } + //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n", + // format, timecode, read_current_audio_block); + current_audio_frame = audio_frame_allocator->alloc_frame(); +} + +#if 0 +static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack) +{ + // printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset); + for (unsigned j = 0; j < pack->actual_length; j++) { + //for (int j = 0; j < min(pack->actual_length, 16u); j++) { + printf("%02x", xfr->buffer[j + offset]); + if ((j % 16) == 15) + printf("\n"); + else if ((j % 8) == 7) + printf(" "); + else + printf(" "); + } +} +#endif + +void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n) +{ + assert(n % 2 == 0); + uint8_t *dptr1 = dest1; + uint8_t *dptr2 = dest2; + + for (size_t i = 0; i < n; i += 2) { + *dptr1++ = *src++; + *dptr2++ = *src++; + } +} + +void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == end) { + return; + } + + int bytes = end - start; + if (current_frame->len + bytes > current_frame->size) { + printf("%d bytes overflow after last %s frame\n", + int(current_frame->len + bytes - current_frame->size), frame_type_name); + //dump_frame(); + } else { + if (current_frame->interleaved) { + uint8_t *data = current_frame->data + current_frame->len / 2; + uint8_t *data2 = current_frame->data2 + current_frame->len / 2; + if (current_frame->len % 2 == 1) { + ++data; + swap(data, data2); + } + if (bytes % 2 == 1) { + *data++ = *start++; + swap(data, data2); + ++current_frame->len; + --bytes; + } + memcpy_interleaved(data, data2, start, bytes); + current_frame->len += bytes; + } else { + memcpy(current_frame->data + current_frame->len, start, bytes); + current_frame->len += bytes; + } + } +} + +#ifdef __SSE2__ + +#if 0 +void dump(const char *name, __m256i n) +{ + printf("%-10s:", name); + printf(" %02x", _mm256_extract_epi8(n, 0)); + printf(" %02x", _mm256_extract_epi8(n, 1)); + printf(" %02x", _mm256_extract_epi8(n, 2)); + printf(" %02x", _mm256_extract_epi8(n, 3)); + printf(" %02x", _mm256_extract_epi8(n, 4)); + printf(" %02x", _mm256_extract_epi8(n, 5)); + printf(" %02x", _mm256_extract_epi8(n, 6)); + printf(" %02x", _mm256_extract_epi8(n, 7)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 8)); + printf(" %02x", _mm256_extract_epi8(n, 9)); + printf(" %02x", _mm256_extract_epi8(n, 10)); + printf(" %02x", _mm256_extract_epi8(n, 11)); + printf(" %02x", _mm256_extract_epi8(n, 12)); + printf(" %02x", _mm256_extract_epi8(n, 13)); + printf(" %02x", _mm256_extract_epi8(n, 14)); + printf(" %02x", _mm256_extract_epi8(n, 15)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 16)); + printf(" %02x", _mm256_extract_epi8(n, 17)); + printf(" %02x", _mm256_extract_epi8(n, 18)); + printf(" %02x", _mm256_extract_epi8(n, 19)); + printf(" %02x", _mm256_extract_epi8(n, 20)); + printf(" %02x", _mm256_extract_epi8(n, 21)); + printf(" %02x", _mm256_extract_epi8(n, 22)); + printf(" %02x", _mm256_extract_epi8(n, 23)); + printf(" "); + printf(" %02x", _mm256_extract_epi8(n, 24)); + printf(" %02x", _mm256_extract_epi8(n, 25)); + printf(" %02x", _mm256_extract_epi8(n, 26)); + printf(" %02x", _mm256_extract_epi8(n, 27)); + printf(" %02x", _mm256_extract_epi8(n, 28)); + printf(" %02x", _mm256_extract_epi8(n, 29)); + printf(" %02x", _mm256_extract_epi8(n, 30)); + printf(" %02x", _mm256_extract_epi8(n, 31)); + printf("\n"); +} +#endif + +// Does a memcpy and memchr in one to reduce processing time. +// Note that the benefit is somewhat limited if your L3 cache is small, +// as you'll (unfortunately) spend most of the time loading the data +// from main memory. +// +// Complicated cases are left to the slow path; it basically stops copying +// up until the first instance of "sync_char" (usually a bit before, actually). +// This is fine, since 0x00 bytes shouldn't really show up in normal picture +// data, and what we really need this for is the 00 00 ff ff marker in video data. +const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char) +{ + if (current_frame->data == nullptr || + current_frame->len > current_frame->size || + start == limit) { + return start; + } + size_t orig_bytes = limit - start; + if (orig_bytes < 128) { + // Don't bother. + return start; + } + + // Don't read more bytes than we can write. + limit = min(limit, start + (current_frame->size - current_frame->len)); + + // Align end to 32 bytes. + limit = (const uint8_t *)(intptr_t(limit) & ~31); + + if (start >= limit) { + return start; + } + + // Process [0,31] bytes, such that start gets aligned to 32 bytes. + const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31); + if (aligned_start != start) { + const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start); + if (sync_start == nullptr) { + add_to_frame(current_frame, "", start, aligned_start); + } else { + add_to_frame(current_frame, "", start, sync_start); + return sync_start; + } + } + + // Make the length a multiple of 64. + if (current_frame->interleaved) { + if (((limit - aligned_start) % 64) != 0) { + limit -= 32; + } + assert(((limit - aligned_start) % 64) == 0); + } + +#if __AVX2__ + const __m256i needle = _mm256_set1_epi8(sync_char); + + const __restrict __m256i *in = (const __m256i *)aligned_start; + if (current_frame->interleaved) { + __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2); + __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m256i shuffle_cw = _mm256_set_epi8( + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0, + 15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + while (in < (const __m256i *)limit) { + // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128). + __m256i data1 = _mm256_stream_load_si256(in); // AaBbCcDd EeFfGgHh + __m256i data2 = _mm256_stream_load_si256(in + 1); // IiJjKkLl MmNnOoPp + + __m256i found1 = _mm256_cmpeq_epi8(data1, needle); + __m256i found2 = _mm256_cmpeq_epi8(data2, needle); + __m256i found = _mm256_or_si256(found1, found2); + + data1 = _mm256_shuffle_epi8(data1, shuffle_cw); // ABCDabcd EFGHefgh + data2 = _mm256_shuffle_epi8(data2, shuffle_cw); // IJKLijkl MNOPmnop + + data1 = _mm256_permute4x64_epi64(data1, 0b11011000); // ABCDEFGH abcdefgh + data2 = _mm256_permute4x64_epi64(data2, 0b11011000); // IJKLMNOP ijklmnop + + __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000); + __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001); + + _mm256_storeu_si256(out1, lo); // Store as early as possible, even if the data isn't used. + _mm256_storeu_si256(out2, hi); + + if (!_mm256_testz_si256(found, found)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m256i *out = (__m256i *)(current_frame->data + current_frame->len); + while (in < (const __m256i *)limit) { + __m256i data = _mm256_load_si256(in); + _mm256_storeu_si256(out, data); // Store as early as possible, even if the data isn't used. + __m256i found = _mm256_cmpeq_epi8(data, needle); + if (!_mm256_testz_si256(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#else + const __m128i needle = _mm_set1_epi8(sync_char); + + const __m128i *in = (const __m128i *)aligned_start; + if (current_frame->interleaved) { + __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2); + __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2); + if (current_frame->len % 2 == 1) { + swap(out1, out2); + } + + __m128i mask_lower_byte = _mm_set1_epi16(0x00ff); + while (in < (const __m128i *)limit) { + __m128i data1 = _mm_load_si128(in); + __m128i data2 = _mm_load_si128(in + 1); + __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte); + __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte); + __m128i data1_hi = _mm_srli_epi16(data1, 8); + __m128i data2_hi = _mm_srli_epi16(data2, 8); + __m128i lo = _mm_packus_epi16(data1_lo, data2_lo); + _mm_storeu_si128(out1, lo); // Store as early as possible, even if the data isn't used. + __m128i hi = _mm_packus_epi16(data1_hi, data2_hi); + _mm_storeu_si128(out2, hi); + __m128i found1 = _mm_cmpeq_epi8(data1, needle); + __m128i found2 = _mm_cmpeq_epi8(data2, needle); + if (!_mm_testz_si128(found1, found1) || + !_mm_testz_si128(found2, found2)) { + break; + } + + in += 2; + ++out1; + ++out2; + } + current_frame->len += (uint8_t *)in - aligned_start; + } else { + __m128i *out = (__m128i *)(current_frame->data + current_frame->len); + while (in < (const __m128i *)limit) { + __m128i data = _mm_load_si128(in); + _mm_storeu_si128(out, data); // Store as early as possible, even if the data isn't used. + __m128i found = _mm_cmpeq_epi8(data, needle); + if (!_mm_testz_si128(found, found)) { + break; + } + + ++in; + ++out; + } + current_frame->len = (uint8_t *)out - current_frame->data; + } +#endif + + //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes); + + return (const uint8_t *)in; +} +#endif + +void decode_packs(const libusb_transfer *xfr, + const char *sync_pattern, + int sync_length, + FrameAllocator::Frame *current_frame, + const char *frame_type_name, + function start_callback) +{ + int offset = 0; + for (int i = 0; i < xfr->num_iso_packets; i++) { + const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i]; + + if (pack->status != LIBUSB_TRANSFER_COMPLETED) { + fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status); + continue; +//exit(5); + } + + const uint8_t *start = xfr->buffer + offset; + const uint8_t *limit = start + pack->actual_length; + while (start < limit) { // Usually runs only one iteration. +#ifdef __SSE2__ + start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]); + if (start == limit) break; + assert(start < limit); +#endif + + const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length); + if (start_next_frame == nullptr) { + // add the rest of the buffer + add_to_frame(current_frame, frame_type_name, start, limit); + break; + } else { + add_to_frame(current_frame, frame_type_name, start, start_next_frame); + start = start_next_frame + sync_length; // skip sync + start_callback(start); + } + } +#if 0 + dump_pack(xfr, offset, pack); +#endif + offset += pack->length; + } +} + +void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr) +{ + if (xfr->status != LIBUSB_TRANSFER_COMPLETED) { + fprintf(stderr, "transfer status %d\n", xfr->status); + libusb_free_transfer(xfr); + exit(3); + } + + assert(xfr->user_data != nullptr); + BMUSBCapture *usb = static_cast(xfr->user_data); + + if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) { + if (xfr->endpoint == 0x84) { + decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1)); + } else { + decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1)); + } + } + if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) { + //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr); + uint8_t *buf = libusb_control_transfer_get_data(xfr); +#if 0 + if (setup->wIndex == 44) { + printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]); + } else { + printf("read register %2d: 0x%02x%02x%02x%02x\n", + setup->wIndex, buf[0], buf[1], buf[2], buf[3]); + } +#else + memcpy(usb->register_file + usb->current_register, buf, 4); + usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS; + if (usb->current_register == 0) { + // read through all of them + printf("register dump:"); + for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) { + printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]); + } + printf("\n"); + } + libusb_fill_control_setup(xfr->buffer, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/usb->current_register, /*length=*/4); +#endif + } + +#if 0 + printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length); + for (i = 0; i < xfr->actual_length; i++) { + printf("%02x", xfr->buffer[i]); + if (i % 16) + printf("\n"); + else if (i % 8) + printf(" "); + else + printf(" "); + } +#endif + + if (libusb_submit_transfer(xfr) < 0) { + fprintf(stderr, "error re-submitting URB\n"); + exit(1); + } +} + +void BMUSBCapture::usb_thread_func() +{ + sched_param param; + memset(¶m, 0, sizeof(param)); + param.sched_priority = 1; + if (sched_setscheduler(0, SCHED_RR, ¶m) == -1) { + printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno)); + } + while (!should_quit) { + int rc = libusb_handle_events(nullptr); + if (rc != LIBUSB_SUCCESS) + break; + } +} + +void BMUSBCapture::configure_card() +{ + if (video_frame_allocator == nullptr) { + set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE)); // FIXME: leak. + } + if (audio_frame_allocator == nullptr) { + set_audio_frame_allocator(new MallocFrameAllocator(65536)); // FIXME: leak. + } + thread(&BMUSBCapture::dequeue_thread, this).detach(); + + int rc; + struct libusb_transfer *xfr; + + rc = libusb_init(nullptr); + if (rc < 0) { + fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc)); + exit(1); + } + + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b); + //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f); + struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid); + if (!devh) { + fprintf(stderr, "Error finding USB device\n"); + exit(1); + } + + libusb_config_descriptor *config; + rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config); + if (rc < 0) { + fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc)); + exit(1); + } + printf("%d interface\n", config->bNumInterfaces); + for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) { + printf(" interface %d\n", interface_number); + const libusb_interface *interface = &config->interface[interface_number]; + for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) { + const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting]; + printf(" alternate setting %d\n", interface_desc->bAlternateSetting); + for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) { + const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number]; + printf(" endpoint address 0x%02x\n", endpoint->bEndpointAddress); + } + } + } + +#if 0 + rc = libusb_set_configuration(devh, /*configuration=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + + rc = libusb_claim_interface(devh, 0); + if (rc < 0) { + fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc)); + exit(1); + } + +#if 0 + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + +#if 0 + rc = libusb_claim_interface(devh, 3); + if (rc < 0) { + fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + + // theories: + // 44 is some kind of timer register (first 16 bits count upwards) + // 24 is some sort of watchdog? + // you can seemingly set it to 0x73c60001 and that bit will eventually disappear + // (or will go to 0x73c60010?), also seen 0x73c60100 + // 12 also changes all the time, unclear why + // 16 seems to be autodetected mode somehow + // -- this is e00115e0 after reset? + // ed0115e0 after mode change [to output?] + // 2d0015e0 after more mode change [to input] + // ed0115e0 after more mode change + // 2d0015e0 after more mode change + // + // 390115e0 seems to indicate we have signal + // changes to 200115e0 when resolution changes/we lose signal, driver resets after a while + // + // 200015e0 on startup + // changes to 250115e0 when we sync to the signal + // + // so only first 16 bits count, and 0x0100 is a mask for ok/stable signal? + // + // 28 and 32 seems to be analog audio input levels (one byte for each of the eight channels). + // however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a). + // + // 4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect. + // perhaps some of them are related to analog output? + // + // 36 can be set to 0 with no apparent effect (all of this tested on both video and audio), + // but the driver sets it to 0x8036802a at some point. + // + // register 16: + // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal + // + // theories: + // 0x01 - stable signal + // 0x04 - deep color + // 0x08 - unknown (audio??) + // 0x20 - 720p?? + // 0x30 - 576p?? + + struct ctrl { + int endpoint; + int request; + int index; + uint32_t data; + }; + static const ctrl ctrls[] = { + { LIBUSB_ENDPOINT_IN, 214, 16, 0 }, + { LIBUSB_ENDPOINT_IN, 214, 0, 0 }, + + // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit + // capture (v210). + // clearing the 0x08000000 bit seems to change the capture format (other source?) + // 0x10000000 = analog audio instead of embedded audio, it seems + // 0x3a000000 = component video? (analog audio) + // 0x3c000000 = composite video? (analog audio) + // 0x3e000000 = s-video? (analog audio) + { LIBUSB_ENDPOINT_OUT, 215, 0, 0x29000000 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x80000100 }, + //{ LIBUSB_ENDPOINT_OUT, 215, 0, 0x09000000 }, + { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 }, // latch for frame start? + { LIBUSB_ENDPOINT_IN, 214, 24, 0 }, // + }; + + for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) { + uint32_t flipped = htonl(ctrls[req].data); + static uint8_t value[4]; + memcpy(value, &flipped, sizeof(flipped)); + int size = sizeof(value); + //if (ctrls[req].request == 215) size = 0; + rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint, + /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0); + if (rc < 0) { + fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc)); + exit(1); + } + + printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index); + for (int i = 0; i < rc; ++i) { + printf("%02x", value[i]); + } + printf("\n"); + } + + // Alternate setting 1 is output, alternate setting 2 is input. + // Card is reset when switching alternates, so the driver uses + // this “double switch” when it wants to reset. +#if 0 + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc)); + exit(1); + } +#endif + rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/4); + if (rc < 0) { + fprintf(stderr, "Error setting alternate 4: %s\n", libusb_error_name(rc)); + exit(1); + } + +#if 0 + // DEBUG + for ( ;; ) { + static int my_index = 0; + static uint8_t value[4]; + int size = sizeof(value); + rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, + /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0); + if (rc < 0) { + fprintf(stderr, "Error on control\n"); + exit(1); + } + printf("rc=%d index=%d: 0x", rc, my_index); + for (int i = 0; i < rc; ++i) { + printf("%02x", value[i]); + } + printf("\n"); + } +#endif + +#if 0 + // set up an asynchronous transfer of the timer register + static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/44, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0); + xfr->user_data = this; + libusb_submit_transfer(xfr); + + // set up an asynchronous transfer of register 24 + static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed2 = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf2, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/24, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0); + xfr->user_data = this; + libusb_submit_transfer(xfr); +#endif + + // set up an asynchronous transfer of the register dump + static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4]; + static int completed3 = 0; + + xfr = libusb_alloc_transfer(0); + libusb_fill_control_setup(cmdbuf3, + LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0, + /*index=*/current_register, /*length=*/4); + libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0); + xfr->user_data = this; + //libusb_submit_transfer(xfr); + + audiofp = fopen("audio.raw", "wb"); + + // set up isochronous transfers for audio and video + for (int e = 3; e <= 4; ++e) { + //int num_transfers = (e == 3) ? 6 : 6; + int num_transfers = 2; + for (int i = 0; i < num_transfers; ++i) { + int num_iso_pack, size; + if (e == 3) { + // Video seems to require isochronous packets scaled with the width; + // seemingly six lines is about right, rounded up to the required 1kB + // multiple. + size = WIDTH * 2 * 6; + // Note that for 10-bit input, you'll need to increase size accordingly. + //size = size * 4 / 3; + if (size % 1024 != 0) { + size &= ~1023; + size += 1024; + } + num_iso_pack = (2 << 20) / size; // 512 kB. + printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size); + } else { + size = 0xc0; + num_iso_pack = 80; + } + int num_bytes = num_iso_pack * size; + uint8_t *buf = new uint8_t[num_bytes]; + + xfr = libusb_alloc_transfer(num_iso_pack); + if (!xfr) { + fprintf(stderr, "oom\n"); + exit(1); + } + + int ep = LIBUSB_ENDPOINT_IN | e; + libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes, + num_iso_pack, cb_xfr, nullptr, 0); + libusb_set_iso_packet_lengths(xfr, size); + xfr->user_data = this; + iso_xfrs.push_back(xfr); + } + } +} + +void BMUSBCapture::start_bm_capture() +{ + printf("starting capture\n"); + int i = 0; + for (libusb_transfer *xfr : iso_xfrs) { + printf("submitting transfer...\n"); + int rc = libusb_submit_transfer(xfr); + ++i; + if (rc < 0) { + //printf("num_bytes=%d\n", num_bytes); + fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n", + xfr->endpoint, i, libusb_error_name(rc)); + exit(1); + } + } + + +#if 0 + libusb_release_interface(devh, 0); +out: + if (devh) + libusb_close(devh); + libusb_exit(nullptr); + return rc; +#endif +} + +void BMUSBCapture::start_bm_thread() +{ + should_quit = false; + usb_thread = thread(&BMUSBCapture::usb_thread_func); +} + +void BMUSBCapture::stop_bm_thread() +{ + should_quit = true; + usb_thread.join(); +} diff --git a/bmusb.h b/bmusb.h new file mode 100644 index 0000000..c881748 --- /dev/null +++ b/bmusb.h @@ -0,0 +1,138 @@ +#ifndef _BMUSB_H +#define _BMUSB_H + +#include +#include +#include +#include +#include +#include +#include + +// An interface for frame allocators; if you do not specify one +// (using set_video_frame_allocator), a default one that pre-allocates +// a freelist of eight frames using new[] will be used. Specifying +// your own can be useful if you have special demands for where you want the +// frame to end up and don't want to spend the extra copy to get it there, for +// instance GPU memory. +class FrameAllocator { + public: + struct Frame { + uint8_t *data = nullptr; + uint8_t *data2 = nullptr; // Only if interleaved == true. + size_t len = 0; // Number of bytes we actually have. + size_t size = 0; // Number of bytes we have room for. + void *userdata = nullptr; + FrameAllocator *owner = nullptr; + + // If set to true, every other byte will go to data and to data2. + // If so, and are still about the number of total bytes + // so if size == 1024, there's 512 bytes in data and 512 in data2. + bool interleaved = false; + }; + + virtual ~FrameAllocator(); + + // Request a video frame. Note that this is called from the + // USB thread, which runs with realtime priority and is + // very sensitive to delays. Thus, you should not do anything + // here that might sleep, including calling malloc(). + // (Taking a mutex is borderline.) + // + // The Frame object will be given to the frame callback, + // which is responsible for releasing the video frame back + // once it is usable for new frames (ie., it will no longer + // be read from). You can use the "userdata" pointer for + // whatever you want to identify this frame if you need to. + // + // Returning a Frame with data==nullptr is allowed; + // if so, the frame in progress will be dropped. + virtual Frame alloc_frame() = 0; + + virtual void release_frame(Frame frame) = 0; +}; + +typedef std::function + frame_callback_t; + +// The actual capturing class, representing capture from a single card. +class BMUSBCapture { + public: + BMUSBCapture(int vid = 0x1edb, int pid = 0xbd3b) + : vid(vid), pid(pid) + { + } + + // Does not take ownership. + void set_video_frame_allocator(FrameAllocator *allocator) + { + video_frame_allocator = allocator; + } + + FrameAllocator *get_video_frame_allocator() + { + return video_frame_allocator; + } + + // Does not take ownership. + void set_audio_frame_allocator(FrameAllocator *allocator) + { + audio_frame_allocator = allocator; + } + + FrameAllocator *get_audio_frame_allocator() + { + return audio_frame_allocator; + } + + void set_frame_callback(frame_callback_t callback) + { + frame_callback = callback; + } + + void configure_card(); + void start_bm_capture(); + + static void start_bm_thread(); + static void stop_bm_thread(); + + private: + struct QueuedFrame { + uint16_t timecode; + uint16_t format; + FrameAllocator::Frame frame; + }; + + void start_new_audio_block(const uint8_t *start); + void start_new_frame(const uint8_t *start); + + void queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, std::deque *q); + void dequeue_thread(); + + static void usb_thread_func(); + static void cb_xfr(struct libusb_transfer *xfr); + + FrameAllocator::Frame current_video_frame; + FrameAllocator::Frame current_audio_frame; + + std::mutex queue_lock; + std::condition_variable queues_not_empty; + std::deque pending_video_frames; + std::deque pending_audio_frames; + + FrameAllocator *video_frame_allocator = nullptr; + FrameAllocator *audio_frame_allocator = nullptr; + frame_callback_t frame_callback = nullptr; + + int current_register = 0; + + static constexpr int NUM_BMUSB_REGISTERS = 60; + uint8_t register_file[NUM_BMUSB_REGISTERS]; + + int vid, pid; + std::vector iso_xfrs; +}; + +#endif diff --git a/context.cpp b/context.cpp new file mode 100644 index 0000000..72275e9 --- /dev/null +++ b/context.cpp @@ -0,0 +1,32 @@ +#include +#include + +#include +#include +#include + +QSurface *create_surface(const QSurfaceFormat &format) +{ + QOffscreenSurface *surface = new QOffscreenSurface; + surface->setFormat(format); +// QWindow *surface = new QWindow; + surface->create(); + if (!surface->isValid()) { + printf("ERROR: surface not valid!\n"); +// abort(); + } + return surface; +} + +QOpenGLContext *create_context() +{ + QOpenGLContext *context = new QOpenGLContext; + context->setShareContext(QOpenGLContext::globalShareContext()); + context->create(); + return context; +} + +bool make_current(QOpenGLContext *context, QSurface *surface) +{ + return context->makeCurrent(surface); +} diff --git a/context.h b/context.h new file mode 100644 index 0000000..5a75766 --- /dev/null +++ b/context.h @@ -0,0 +1,11 @@ + +// Needs to be in its own file because Qt and libepoxy seemingly don't coexist well +// within the same file. + +class QSurface; +class QOpenGLContext; +class QSurfaceFormat; + +QSurface *create_surface(const QSurfaceFormat &format); +QOpenGLContext *create_context(); +bool make_current(QOpenGLContext *context, QSurface *surface); diff --git a/glwidget.cpp b/glwidget.cpp new file mode 100644 index 0000000..cc2b4fd --- /dev/null +++ b/glwidget.cpp @@ -0,0 +1,49 @@ +#include "context.h" +#include "glwidget.h" +#include "mixer.h" +#include +#include +#include +#include +#include +#include +#include + +GLWidget::GLWidget(QWidget *parent) + : QOpenGLWidget(parent) +{ +} + +GLWidget::~GLWidget() +{ +} + +QSize GLWidget::minimumSizeHint() const +{ + return QSize(50, 50); +} + +QSize GLWidget::sizeHint() const +{ + return QSize(400, 400); +} + +void GLWidget::initializeGL() +{ + printf("egl=%p glx=%p\n", eglGetCurrentContext(), glXGetCurrentContext()); + //printf("threads: %p %p\n", QThread::currentThread(), qGuiApp->thread()); + + QSurface *surface = create_surface(format()); + QSurface *surface2 = create_surface(format()); + QSurface *surface3 = create_surface(format()); + QSurface *surface4 = create_surface(format()); + std::thread([surface, surface2, surface3, surface4]{ + mixer_thread(surface, surface2, surface3, surface4); + }).detach(); +} + +void GLWidget::paintGL() +{ + glClearColor(1.0f, 0.0f, 0.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); +} diff --git a/glwidget.h b/glwidget.h new file mode 100644 index 0000000..eb26d38 --- /dev/null +++ b/glwidget.h @@ -0,0 +1,26 @@ +#ifndef GLWIDGET_H +#define GLWIDGET_H + +#include +#include +#include +#include +#include + +class GLWidget : public QOpenGLWidget +{ + Q_OBJECT + +public: + GLWidget(QWidget *parent = 0); + ~GLWidget(); + + QSize minimumSizeHint() const Q_DECL_OVERRIDE; + QSize sizeHint() const Q_DECL_OVERRIDE; + +protected: + void initializeGL() Q_DECL_OVERRIDE; + void paintGL() Q_DECL_OVERRIDE; +}; + +#endif diff --git a/h264encode.cpp b/h264encode.cpp new file mode 100644 index 0000000..d4fc0ad --- /dev/null +++ b/h264encode.cpp @@ -0,0 +1,1949 @@ +//#include "sysdeps.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "h264encode.h" + +#define CHECK_VASTATUS(va_status, func) \ + if (va_status != VA_STATUS_SUCCESS) { \ + fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ + exit(1); \ + } + +//#include "loadsurface.h" + +#define NAL_REF_IDC_NONE 0 +#define NAL_REF_IDC_LOW 1 +#define NAL_REF_IDC_MEDIUM 2 +#define NAL_REF_IDC_HIGH 3 + +#define NAL_NON_IDR 1 +#define NAL_IDR 5 +#define NAL_SPS 7 +#define NAL_PPS 8 +#define NAL_SEI 6 + +#define SLICE_TYPE_P 0 +#define SLICE_TYPE_B 1 +#define SLICE_TYPE_I 2 +#define IS_P_SLICE(type) (SLICE_TYPE_P == (type)) +#define IS_B_SLICE(type) (SLICE_TYPE_B == (type)) +#define IS_I_SLICE(type) (SLICE_TYPE_I == (type)) + + +#define ENTROPY_MODE_CAVLC 0 +#define ENTROPY_MODE_CABAC 1 + +#define PROFILE_IDC_BASELINE 66 +#define PROFILE_IDC_MAIN 77 +#define PROFILE_IDC_HIGH 100 + +#define BITSTREAM_ALLOCATE_STEPPING 4096 + +#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ +static VADisplay va_dpy; +static VAProfile h264_profile = (VAProfile)~0; +static VAConfigAttrib config_attrib[VAConfigAttribTypeMax]; +static int config_attrib_num = 0, enc_packed_header_idx; + +struct GLSurface { + VASurfaceID src_surface, ref_surface; + VABufferID coded_buf; + + VAImage surface_image; + GLuint y_tex, cbcr_tex; + EGLImage y_egl_image, cbcr_egl_image; +}; +GLSurface gl_surfaces[SURFACE_NUM]; + +static VAConfigID config_id; +static VAContextID context_id; +static VAEncSequenceParameterBufferH264 seq_param; +static VAEncPictureParameterBufferH264 pic_param; +static VAEncSliceParameterBufferH264 slice_param; +static VAPictureH264 CurrentCurrPic; +static VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32]; + +static unsigned int MaxFrameNum = (2<<16); +static unsigned int MaxPicOrderCntLsb = (2<<8); +static unsigned int Log2MaxFrameNum = 16; +static unsigned int Log2MaxPicOrderCntLsb = 8; + +static unsigned int num_ref_frames = 2; +static unsigned int numShortTerm = 0; +static int constraint_set_flag = 0; +static int h264_packedheader = 0; /* support pack header? */ +static int h264_maxref = (1<<16|1); +static int h264_entropy_mode = 1; /* cabac */ + +static char *coded_fn = NULL; +static FILE *coded_fp = NULL; + +static int frame_width = 176; +static int frame_height = 144; +static int frame_width_mbaligned; +static int frame_height_mbaligned; +static int frame_rate = 60; +static unsigned int frame_bitrate = 0; +static unsigned int frame_slices = 1; +static double frame_size = 0; +static int initial_qp = 15; +//static int initial_qp = 28; +static int minimal_qp = 0; +static int intra_period = 30; +static int intra_idr_period = 60; +static int ip_period = 1; +static int rc_mode = -1; +static int rc_default_modes[] = { + VA_RC_VBR, + VA_RC_CQP, + VA_RC_VBR_CONSTRAINED, + VA_RC_CBR, + VA_RC_VCM, + VA_RC_NONE, +}; +static unsigned long long current_frame_encoding = 0; +static unsigned long long current_frame_display = 0; +static unsigned long long current_IDR_display = 0; +static unsigned int current_frame_num = 0; +static int current_frame_type; + +static int misc_priv_type = 0; +static int misc_priv_value = 0; + +/* thread to save coded data */ +#define SRC_SURFACE_FREE 0 +#define SRC_SURFACE_IN_ENCODING 1 + +struct __bitstream { + unsigned int *buffer; + int bit_offset; + int max_size_in_dword; +}; +typedef struct __bitstream bitstream; + +using namespace std; + +static unsigned int +va_swap32(unsigned int val) +{ + unsigned char *pval = (unsigned char *)&val; + + return ((pval[0] << 24) | + (pval[1] << 16) | + (pval[2] << 8) | + (pval[3] << 0)); +} + +static void +bitstream_start(bitstream *bs) +{ + bs->max_size_in_dword = BITSTREAM_ALLOCATE_STEPPING; + bs->buffer = (unsigned int *)calloc(bs->max_size_in_dword * sizeof(int), 1); + bs->bit_offset = 0; +} + +static void +bitstream_end(bitstream *bs) +{ + int pos = (bs->bit_offset >> 5); + int bit_offset = (bs->bit_offset & 0x1f); + int bit_left = 32 - bit_offset; + + if (bit_offset) { + bs->buffer[pos] = va_swap32((bs->buffer[pos] << bit_left)); + } +} + +static void +bitstream_put_ui(bitstream *bs, unsigned int val, int size_in_bits) +{ + int pos = (bs->bit_offset >> 5); + int bit_offset = (bs->bit_offset & 0x1f); + int bit_left = 32 - bit_offset; + + if (!size_in_bits) + return; + + bs->bit_offset += size_in_bits; + + if (bit_left > size_in_bits) { + bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val); + } else { + size_in_bits -= bit_left; + bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits); + bs->buffer[pos] = va_swap32(bs->buffer[pos]); + + if (pos + 1 == bs->max_size_in_dword) { + bs->max_size_in_dword += BITSTREAM_ALLOCATE_STEPPING; + bs->buffer = (unsigned int *)realloc(bs->buffer, bs->max_size_in_dword * sizeof(unsigned int)); + } + + bs->buffer[pos + 1] = val; + } +} + +static void +bitstream_put_ue(bitstream *bs, unsigned int val) +{ + int size_in_bits = 0; + int tmp_val = ++val; + + while (tmp_val) { + tmp_val >>= 1; + size_in_bits++; + } + + bitstream_put_ui(bs, 0, size_in_bits - 1); // leading zero + bitstream_put_ui(bs, val, size_in_bits); +} + +static void +bitstream_put_se(bitstream *bs, int val) +{ + unsigned int new_val; + + if (val <= 0) + new_val = -2 * val; + else + new_val = 2 * val - 1; + + bitstream_put_ue(bs, new_val); +} + +static void +bitstream_byte_aligning(bitstream *bs, int bit) +{ + int bit_offset = (bs->bit_offset & 0x7); + int bit_left = 8 - bit_offset; + int new_val; + + if (!bit_offset) + return; + + assert(bit == 0 || bit == 1); + + if (bit) + new_val = (1 << bit_left) - 1; + else + new_val = 0; + + bitstream_put_ui(bs, new_val, bit_left); +} + +static void +rbsp_trailing_bits(bitstream *bs) +{ + bitstream_put_ui(bs, 1, 1); + bitstream_byte_aligning(bs, 0); +} + +static void nal_start_code_prefix(bitstream *bs) +{ + bitstream_put_ui(bs, 0x00000001, 32); +} + +static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type) +{ + bitstream_put_ui(bs, 0, 1); /* forbidden_zero_bit: 0 */ + bitstream_put_ui(bs, nal_ref_idc, 2); + bitstream_put_ui(bs, nal_unit_type, 5); +} + +static void sps_rbsp(bitstream *bs) +{ + int profile_idc = PROFILE_IDC_BASELINE; + + if (h264_profile == VAProfileH264High) + profile_idc = PROFILE_IDC_HIGH; + else if (h264_profile == VAProfileH264Main) + profile_idc = PROFILE_IDC_MAIN; + + bitstream_put_ui(bs, profile_idc, 8); /* profile_idc */ + bitstream_put_ui(bs, !!(constraint_set_flag & 1), 1); /* constraint_set0_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 2), 1); /* constraint_set1_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 4), 1); /* constraint_set2_flag */ + bitstream_put_ui(bs, !!(constraint_set_flag & 8), 1); /* constraint_set3_flag */ + bitstream_put_ui(bs, 0, 4); /* reserved_zero_4bits */ + bitstream_put_ui(bs, seq_param.level_idc, 8); /* level_idc */ + bitstream_put_ue(bs, seq_param.seq_parameter_set_id); /* seq_parameter_set_id */ + + if ( profile_idc == PROFILE_IDC_HIGH) { + bitstream_put_ue(bs, 1); /* chroma_format_idc = 1, 4:2:0 */ + bitstream_put_ue(bs, 0); /* bit_depth_luma_minus8 */ + bitstream_put_ue(bs, 0); /* bit_depth_chroma_minus8 */ + bitstream_put_ui(bs, 0, 1); /* qpprime_y_zero_transform_bypass_flag */ + bitstream_put_ui(bs, 0, 1); /* seq_scaling_matrix_present_flag */ + } + + bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_frame_num_minus4); /* log2_max_frame_num_minus4 */ + bitstream_put_ue(bs, seq_param.seq_fields.bits.pic_order_cnt_type); /* pic_order_cnt_type */ + + if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) + bitstream_put_ue(bs, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4); /* log2_max_pic_order_cnt_lsb_minus4 */ + else { + assert(0); + } + + bitstream_put_ue(bs, seq_param.max_num_ref_frames); /* num_ref_frames */ + bitstream_put_ui(bs, 0, 1); /* gaps_in_frame_num_value_allowed_flag */ + + bitstream_put_ue(bs, seq_param.picture_width_in_mbs - 1); /* pic_width_in_mbs_minus1 */ + bitstream_put_ue(bs, seq_param.picture_height_in_mbs - 1); /* pic_height_in_map_units_minus1 */ + bitstream_put_ui(bs, seq_param.seq_fields.bits.frame_mbs_only_flag, 1); /* frame_mbs_only_flag */ + + if (!seq_param.seq_fields.bits.frame_mbs_only_flag) { + assert(0); + } + + bitstream_put_ui(bs, seq_param.seq_fields.bits.direct_8x8_inference_flag, 1); /* direct_8x8_inference_flag */ + bitstream_put_ui(bs, seq_param.frame_cropping_flag, 1); /* frame_cropping_flag */ + + if (seq_param.frame_cropping_flag) { + bitstream_put_ue(bs, seq_param.frame_crop_left_offset); /* frame_crop_left_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_right_offset); /* frame_crop_right_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_top_offset); /* frame_crop_top_offset */ + bitstream_put_ue(bs, seq_param.frame_crop_bottom_offset); /* frame_crop_bottom_offset */ + } + + //if ( frame_bit_rate < 0 ) { //TODO EW: the vui header isn't correct + if ( false ) { + bitstream_put_ui(bs, 0, 1); /* vui_parameters_present_flag */ + } else { + bitstream_put_ui(bs, 1, 1); /* vui_parameters_present_flag */ + bitstream_put_ui(bs, 0, 1); /* aspect_ratio_info_present_flag */ + bitstream_put_ui(bs, 0, 1); /* overscan_info_present_flag */ + bitstream_put_ui(bs, 0, 1); /* video_signal_type_present_flag */ + bitstream_put_ui(bs, 0, 1); /* chroma_loc_info_present_flag */ + bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */ + { + bitstream_put_ui(bs, 1, 32); // FPS + bitstream_put_ui(bs, frame_rate * 2, 32); // FPS + bitstream_put_ui(bs, 1, 1); + } + bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */ + { + // hrd_parameters + bitstream_put_ue(bs, 0); /* cpb_cnt_minus1 */ + bitstream_put_ui(bs, 4, 4); /* bit_rate_scale */ + bitstream_put_ui(bs, 6, 4); /* cpb_size_scale */ + + bitstream_put_ue(bs, frame_bitrate - 1); /* bit_rate_value_minus1[0] */ + bitstream_put_ue(bs, frame_bitrate*8 - 1); /* cpb_size_value_minus1[0] */ + bitstream_put_ui(bs, 1, 1); /* cbr_flag[0] */ + + bitstream_put_ui(bs, 23, 5); /* initial_cpb_removal_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* cpb_removal_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* dpb_output_delay_length_minus1 */ + bitstream_put_ui(bs, 23, 5); /* time_offset_length */ + } + bitstream_put_ui(bs, 0, 1); /* vcl_hrd_parameters_present_flag */ + bitstream_put_ui(bs, 0, 1); /* low_delay_hrd_flag */ + + bitstream_put_ui(bs, 0, 1); /* pic_struct_present_flag */ + bitstream_put_ui(bs, 0, 1); /* bitstream_restriction_flag */ + } + + rbsp_trailing_bits(bs); /* rbsp_trailing_bits */ +} + + +static void pps_rbsp(bitstream *bs) +{ + bitstream_put_ue(bs, pic_param.pic_parameter_set_id); /* pic_parameter_set_id */ + bitstream_put_ue(bs, pic_param.seq_parameter_set_id); /* seq_parameter_set_id */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.entropy_coding_mode_flag, 1); /* entropy_coding_mode_flag */ + + bitstream_put_ui(bs, 0, 1); /* pic_order_present_flag: 0 */ + + bitstream_put_ue(bs, 0); /* num_slice_groups_minus1 */ + + bitstream_put_ue(bs, pic_param.num_ref_idx_l0_active_minus1); /* num_ref_idx_l0_active_minus1 */ + bitstream_put_ue(bs, pic_param.num_ref_idx_l1_active_minus1); /* num_ref_idx_l1_active_minus1 1 */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_pred_flag, 1); /* weighted_pred_flag: 0 */ + bitstream_put_ui(bs, pic_param.pic_fields.bits.weighted_bipred_idc, 2); /* weighted_bipred_idc: 0 */ + + bitstream_put_se(bs, pic_param.pic_init_qp - 26); /* pic_init_qp_minus26 */ + bitstream_put_se(bs, 0); /* pic_init_qs_minus26 */ + bitstream_put_se(bs, 0); /* chroma_qp_index_offset */ + + bitstream_put_ui(bs, pic_param.pic_fields.bits.deblocking_filter_control_present_flag, 1); /* deblocking_filter_control_present_flag */ + bitstream_put_ui(bs, 0, 1); /* constrained_intra_pred_flag */ + bitstream_put_ui(bs, 0, 1); /* redundant_pic_cnt_present_flag */ + + /* more_rbsp_data */ + bitstream_put_ui(bs, pic_param.pic_fields.bits.transform_8x8_mode_flag, 1); /*transform_8x8_mode_flag */ + bitstream_put_ui(bs, 0, 1); /* pic_scaling_matrix_present_flag */ + bitstream_put_se(bs, pic_param.second_chroma_qp_index_offset ); /*second_chroma_qp_index_offset */ + + rbsp_trailing_bits(bs); +} + +static void slice_header(bitstream *bs) +{ + int first_mb_in_slice = slice_param.macroblock_address; + + bitstream_put_ue(bs, first_mb_in_slice); /* first_mb_in_slice: 0 */ + bitstream_put_ue(bs, slice_param.slice_type); /* slice_type */ + bitstream_put_ue(bs, slice_param.pic_parameter_set_id); /* pic_parameter_set_id: 0 */ + bitstream_put_ui(bs, pic_param.frame_num, seq_param.seq_fields.bits.log2_max_frame_num_minus4 + 4); /* frame_num */ + + /* frame_mbs_only_flag == 1 */ + if (!seq_param.seq_fields.bits.frame_mbs_only_flag) { + /* FIXME: */ + assert(0); + } + + if (pic_param.pic_fields.bits.idr_pic_flag) + bitstream_put_ue(bs, slice_param.idr_pic_id); /* idr_pic_id: 0 */ + + if (seq_param.seq_fields.bits.pic_order_cnt_type == 0) { + bitstream_put_ui(bs, pic_param.CurrPic.TopFieldOrderCnt, seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 + 4); + /* pic_order_present_flag == 0 */ + } else { + /* FIXME: */ + assert(0); + } + + /* redundant_pic_cnt_present_flag == 0 */ + /* slice type */ + if (IS_P_SLICE(slice_param.slice_type)) { + bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param.num_ref_idx_active_override_flag) + bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1); + + /* ref_pic_list_reordering */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ + } else if (IS_B_SLICE(slice_param.slice_type)) { + bitstream_put_ui(bs, slice_param.direct_spatial_mv_pred_flag, 1); /* direct_spatial_mv_pred: 1 */ + + bitstream_put_ui(bs, slice_param.num_ref_idx_active_override_flag, 1); /* num_ref_idx_active_override_flag: */ + + if (slice_param.num_ref_idx_active_override_flag) { + bitstream_put_ue(bs, slice_param.num_ref_idx_l0_active_minus1); + bitstream_put_ue(bs, slice_param.num_ref_idx_l1_active_minus1); + } + + /* ref_pic_list_reordering */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l0: 0 */ + bitstream_put_ui(bs, 0, 1); /* ref_pic_list_reordering_flag_l1: 0 */ + } + + if ((pic_param.pic_fields.bits.weighted_pred_flag && + IS_P_SLICE(slice_param.slice_type)) || + ((pic_param.pic_fields.bits.weighted_bipred_idc == 1) && + IS_B_SLICE(slice_param.slice_type))) { + /* FIXME: fill weight/offset table */ + assert(0); + } + + /* dec_ref_pic_marking */ + if (pic_param.pic_fields.bits.reference_pic_flag) { /* nal_ref_idc != 0 */ + unsigned char no_output_of_prior_pics_flag = 0; + unsigned char long_term_reference_flag = 0; + unsigned char adaptive_ref_pic_marking_mode_flag = 0; + + if (pic_param.pic_fields.bits.idr_pic_flag) { + bitstream_put_ui(bs, no_output_of_prior_pics_flag, 1); /* no_output_of_prior_pics_flag: 0 */ + bitstream_put_ui(bs, long_term_reference_flag, 1); /* long_term_reference_flag: 0 */ + } else { + bitstream_put_ui(bs, adaptive_ref_pic_marking_mode_flag, 1); /* adaptive_ref_pic_marking_mode_flag: 0 */ + } + } + + if (pic_param.pic_fields.bits.entropy_coding_mode_flag && + !IS_I_SLICE(slice_param.slice_type)) + bitstream_put_ue(bs, slice_param.cabac_init_idc); /* cabac_init_idc: 0 */ + + bitstream_put_se(bs, slice_param.slice_qp_delta); /* slice_qp_delta: 0 */ + + /* ignore for SP/SI */ + + if (pic_param.pic_fields.bits.deblocking_filter_control_present_flag) { + bitstream_put_ue(bs, slice_param.disable_deblocking_filter_idc); /* disable_deblocking_filter_idc: 0 */ + + if (slice_param.disable_deblocking_filter_idc != 1) { + bitstream_put_se(bs, slice_param.slice_alpha_c0_offset_div2); /* slice_alpha_c0_offset_div2: 2 */ + bitstream_put_se(bs, slice_param.slice_beta_offset_div2); /* slice_beta_offset_div2: 2 */ + } + } + + if (pic_param.pic_fields.bits.entropy_coding_mode_flag) { + bitstream_byte_aligning(bs, 1); + } +} + +static int +build_packed_pic_buffer(unsigned char **header_buffer) +{ + bitstream bs; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + nal_header(&bs, NAL_REF_IDC_HIGH, NAL_PPS); + pps_rbsp(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + +static int +build_packed_seq_buffer(unsigned char **header_buffer) +{ + bitstream bs; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS); + sps_rbsp(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + +static int build_packed_slice_buffer(unsigned char **header_buffer) +{ + bitstream bs; + int is_idr = !!pic_param.pic_fields.bits.idr_pic_flag; + int is_ref = !!pic_param.pic_fields.bits.reference_pic_flag; + + bitstream_start(&bs); + nal_start_code_prefix(&bs); + + if (IS_I_SLICE(slice_param.slice_type)) { + nal_header(&bs, NAL_REF_IDC_HIGH, is_idr ? NAL_IDR : NAL_NON_IDR); + } else if (IS_P_SLICE(slice_param.slice_type)) { + nal_header(&bs, NAL_REF_IDC_MEDIUM, NAL_NON_IDR); + } else { + assert(IS_B_SLICE(slice_param.slice_type)); + nal_header(&bs, is_ref ? NAL_REF_IDC_LOW : NAL_REF_IDC_NONE, NAL_NON_IDR); + } + + slice_header(&bs); + bitstream_end(&bs); + + *header_buffer = (unsigned char *)bs.buffer; + return bs.bit_offset; +} + + +/* + Assume frame sequence is: Frame#0, #1, #2, ..., #M, ..., #X, ... (encoding order) + 1) period between Frame #X and Frame #N = #X - #N + 2) 0 means infinite for intra_period/intra_idr_period, and 0 is invalid for ip_period + 3) intra_idr_period % intra_period (intra_period > 0) and intra_period % ip_period must be 0 + 4) intra_period and intra_idr_period take precedence over ip_period + 5) if ip_period > 1, intra_period and intra_idr_period are not the strict periods + of I/IDR frames, see bellow examples + ------------------------------------------------------------------- + intra_period intra_idr_period ip_period frame sequence (intra_period/intra_idr_period/ip_period) + 0 ignored 1 IDRPPPPPPP ... (No IDR/I any more) + 0 ignored >=2 IDR(PBB)(PBB)... (No IDR/I any more) + 1 0 ignored IDRIIIIIII... (No IDR any more) + 1 1 ignored IDR IDR IDR IDR... + 1 >=2 ignored IDRII IDRII IDR... (1/3/ignore) + >=2 0 1 IDRPPP IPPP I... (3/0/1) + >=2 0 >=2 IDR(PBB)(PBB)(IBB) (6/0/3) + (PBB)(IBB)(PBB)(IBB)... + >=2 >=2 1 IDRPPPPP IPPPPP IPPPPP (6/18/1) + IDRPPPPP IPPPPP IPPPPP... + >=2 >=2 >=2 {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)} (6/18/3) + {IDR(PBB)(PBB)(IBB)(PBB)(IBB)(PBB)}... + {IDR(PBB)(PBB)(IBB)(PBB)} (6/12/3) + {IDR(PBB)(PBB)(IBB)(PBB)}... + {IDR(PBB)(PBB)} (6/6/3) + {IDR(PBB)(PBB)}. +*/ + +/* + * Return displaying order with specified periods and encoding order + * displaying_order: displaying order + * frame_type: frame type + */ +#define FRAME_P 0 +#define FRAME_B 1 +#define FRAME_I 2 +#define FRAME_IDR 7 +void encoding2display_order( + unsigned long long encoding_order, int intra_period, + int intra_idr_period, int ip_period, + unsigned long long *displaying_order, + int *frame_type) +{ + int encoding_order_gop = 0; + + if (intra_period == 1) { /* all are I/IDR frames */ + *displaying_order = encoding_order; + if (intra_idr_period == 0) + *frame_type = (encoding_order == 0)?FRAME_IDR:FRAME_I; + else + *frame_type = (encoding_order % intra_idr_period == 0)?FRAME_IDR:FRAME_I; + return; + } + + if (intra_period == 0) + intra_idr_period = 0; + + /* new sequence like + * IDR PPPPP IPPPPP + * IDR (PBB)(PBB)(IBB)(PBB) + */ + encoding_order_gop = (intra_idr_period == 0)? encoding_order: + (encoding_order % (intra_idr_period + ((ip_period == 1)?0:1))); + + if (encoding_order_gop == 0) { /* the first frame */ + *frame_type = FRAME_IDR; + *displaying_order = encoding_order; + } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */ + *frame_type = FRAME_B; + *displaying_order = encoding_order - 1; + } else if ((intra_period != 0) && /* have I frames */ + (encoding_order_gop >= 2) && + ((ip_period == 1 && encoding_order_gop % intra_period == 0) || /* for IDR PPPPP IPPPP */ + /* for IDR (PBB)(PBB)(IBB) */ + (ip_period >= 2 && ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0))) { + *frame_type = FRAME_I; + *displaying_order = encoding_order + ip_period - 1; + } else { + *frame_type = FRAME_P; + *displaying_order = encoding_order + ip_period - 1; + } +} + + +static const char *rc_to_string(int rcmode) +{ + switch (rc_mode) { + case VA_RC_NONE: + return "NONE"; + case VA_RC_CBR: + return "CBR"; + case VA_RC_VBR: + return "VBR"; + case VA_RC_VCM: + return "VCM"; + case VA_RC_CQP: + return "CQP"; + case VA_RC_VBR_CONSTRAINED: + return "VBR_CONSTRAINED"; + default: + return "Unknown"; + } +} + +#if 0 +static int process_cmdline(int argc, char *argv[]) +{ + char c; + const struct option long_opts[] = { + {"help", no_argument, NULL, 0 }, + {"bitrate", required_argument, NULL, 1 }, + {"minqp", required_argument, NULL, 2 }, + {"initialqp", required_argument, NULL, 3 }, + {"intra_period", required_argument, NULL, 4 }, + {"idr_period", required_argument, NULL, 5 }, + {"ip_period", required_argument, NULL, 6 }, + {"rcmode", required_argument, NULL, 7 }, + {"srcyuv", required_argument, NULL, 9 }, + {"recyuv", required_argument, NULL, 10 }, + {"fourcc", required_argument, NULL, 11 }, + {"syncmode", no_argument, NULL, 12 }, + {"enablePSNR", no_argument, NULL, 13 }, + {"prit", required_argument, NULL, 14 }, + {"priv", required_argument, NULL, 15 }, + {"framecount", required_argument, NULL, 16 }, + {"entropy", required_argument, NULL, 17 }, + {"profile", required_argument, NULL, 18 }, + {NULL, no_argument, NULL, 0 }}; + int long_index; + + while ((c =getopt_long_only(argc, argv, "w:h:n:f:o:?", long_opts, &long_index)) != EOF) { + switch (c) { + case 'w': + frame_width = atoi(optarg); + break; + case 'h': + frame_height = atoi(optarg); + break; + case 'n': + case 'f': + frame_rate = atoi(optarg); + break; + case 'o': + coded_fn = strdup(optarg); + break; + case 0: + print_help(); + exit(0); + case 1: + frame_bitrate = atoi(optarg); + break; + case 2: + minimal_qp = atoi(optarg); + break; + case 3: + initial_qp = atoi(optarg); + break; + case 4: + intra_period = atoi(optarg); + break; + case 5: + intra_idr_period = atoi(optarg); + break; + case 6: + ip_period = atoi(optarg); + break; + case 7: + rc_mode = string_to_rc(optarg); + if (rc_mode < 0) { + print_help(); + exit(1); + } + break; + case 9: + srcyuv_fn = strdup(optarg); + break; + case 11: + srcyuv_fourcc = string_to_fourcc(optarg); + if (srcyuv_fourcc <= 0) { + print_help(); + exit(1); + } + break; + case 13: + calc_psnr = 1; + break; + case 14: + misc_priv_type = strtol(optarg, NULL, 0); + break; + case 15: + misc_priv_value = strtol(optarg, NULL, 0); + break; + case 17: + h264_entropy_mode = atoi(optarg) ? 1: 0; + break; + case 18: + if (strncmp(optarg, "BP", 2) == 0) + h264_profile = VAProfileH264Baseline; + else if (strncmp(optarg, "MP", 2) == 0) + h264_profile = VAProfileH264Main; + else if (strncmp(optarg, "HP", 2) == 0) + h264_profile = VAProfileH264High; + else + h264_profile = (VAProfile)0; + break; + case ':': + case '?': + print_help(); + exit(0); + } + } + + if (ip_period < 1) { + printf(" ip_period must be greater than 0\n"); + exit(0); + } + if (intra_period != 1 && intra_period % ip_period != 0) { + printf(" intra_period must be a multiplier of ip_period\n"); + exit(0); + } + if (intra_period != 0 && intra_idr_period % intra_period != 0) { + printf(" intra_idr_period must be a multiplier of intra_period\n"); + exit(0); + } + + if (frame_bitrate == 0) + frame_bitrate = frame_width * frame_height * 12 * frame_rate / 50; + + if (coded_fn == NULL) { + struct stat buf; + if (stat("/tmp", &buf) == 0) + coded_fn = strdup("/tmp/test.264"); + else if (stat("/sdcard", &buf) == 0) + coded_fn = strdup("/sdcard/test.264"); + else + coded_fn = strdup("./test.264"); + } + + /* store coded data into a file */ + coded_fp = fopen(coded_fn, "w+"); + if (coded_fp == NULL) { + printf("Open file %s failed, exit\n", coded_fn); + exit(1); + } + + frame_width_mbaligned = (frame_width + 15) & (~15); + frame_height_mbaligned = (frame_height + 15) & (~15); + if (frame_width != frame_width_mbaligned || + frame_height != frame_height_mbaligned) { + printf("Source frame is %dx%d and will code clip to %dx%d with crop\n", + frame_width, frame_height, + frame_width_mbaligned, frame_height_mbaligned + ); + } + + return 0; +} +#endif + +static Display *x11_display; +static Window x11_window; + +VADisplay +va_open_display(void) +{ + x11_display = XOpenDisplay(NULL); + if (!x11_display) { + fprintf(stderr, "error: can't connect to X server!\n"); + return NULL; + } + return vaGetDisplay(x11_display); +} + +void +va_close_display(VADisplay va_dpy) +{ + if (!x11_display) + return; + + if (x11_window) { + XUnmapWindow(x11_display, x11_window); + XDestroyWindow(x11_display, x11_window); + x11_window = None; + } + XCloseDisplay(x11_display); + x11_display = NULL; +} + +static int init_va(void) +{ + VAProfile profile_list[]={VAProfileH264High, VAProfileH264Main, VAProfileH264Baseline, VAProfileH264ConstrainedBaseline}; + VAEntrypoint *entrypoints; + int num_entrypoints, slice_entrypoint; + int support_encode = 0; + int major_ver, minor_ver; + VAStatus va_status; + unsigned int i; + + va_dpy = va_open_display(); + va_status = vaInitialize(va_dpy, &major_ver, &minor_ver); + CHECK_VASTATUS(va_status, "vaInitialize"); + + num_entrypoints = vaMaxNumEntrypoints(va_dpy); + entrypoints = (VAEntrypoint *)malloc(num_entrypoints * sizeof(*entrypoints)); + if (!entrypoints) { + fprintf(stderr, "error: failed to initialize VA entrypoints array\n"); + exit(1); + } + + /* use the highest profile */ + for (i = 0; i < sizeof(profile_list)/sizeof(profile_list[0]); i++) { + if ((h264_profile != ~0) && h264_profile != profile_list[i]) + continue; + + h264_profile = profile_list[i]; + vaQueryConfigEntrypoints(va_dpy, h264_profile, entrypoints, &num_entrypoints); + for (slice_entrypoint = 0; slice_entrypoint < num_entrypoints; slice_entrypoint++) { + if (entrypoints[slice_entrypoint] == VAEntrypointEncSlice) { + support_encode = 1; + break; + } + } + if (support_encode == 1) + break; + } + + if (support_encode == 0) { + printf("Can't find VAEntrypointEncSlice for H264 profiles\n"); + exit(1); + } else { + switch (h264_profile) { + case VAProfileH264Baseline: + printf("Use profile VAProfileH264Baseline\n"); + ip_period = 1; + constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ + h264_entropy_mode = 0; + break; + case VAProfileH264ConstrainedBaseline: + printf("Use profile VAProfileH264ConstrainedBaseline\n"); + constraint_set_flag |= (1 << 0 | 1 << 1); /* Annex A.2.2 */ + ip_period = 1; + break; + + case VAProfileH264Main: + printf("Use profile VAProfileH264Main\n"); + constraint_set_flag |= (1 << 1); /* Annex A.2.2 */ + break; + + case VAProfileH264High: + constraint_set_flag |= (1 << 3); /* Annex A.2.4 */ + printf("Use profile VAProfileH264High\n"); + break; + default: + printf("unknow profile. Set to Baseline"); + h264_profile = VAProfileH264Baseline; + ip_period = 1; + constraint_set_flag |= (1 << 0); /* Annex A.2.1 */ + break; + } + } + + VAConfigAttrib attrib[VAConfigAttribTypeMax]; + + /* find out the format for the render target, and rate control mode */ + for (i = 0; i < VAConfigAttribTypeMax; i++) + attrib[i].type = (VAConfigAttribType)i; + + va_status = vaGetConfigAttributes(va_dpy, h264_profile, VAEntrypointEncSlice, + &attrib[0], VAConfigAttribTypeMax); + CHECK_VASTATUS(va_status, "vaGetConfigAttributes"); + /* check the interested configattrib */ + if ((attrib[VAConfigAttribRTFormat].value & VA_RT_FORMAT_YUV420) == 0) { + printf("Not find desired YUV420 RT format\n"); + exit(1); + } else { + config_attrib[config_attrib_num].type = VAConfigAttribRTFormat; + config_attrib[config_attrib_num].value = VA_RT_FORMAT_YUV420; + config_attrib_num++; + } + + if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribRateControl].value; + + printf("Support rate control mode (0x%x):", tmp); + + if (tmp & VA_RC_NONE) + printf("NONE "); + if (tmp & VA_RC_CBR) + printf("CBR "); + if (tmp & VA_RC_VBR) + printf("VBR "); + if (tmp & VA_RC_VCM) + printf("VCM "); + if (tmp & VA_RC_CQP) + printf("CQP "); + if (tmp & VA_RC_VBR_CONSTRAINED) + printf("VBR_CONSTRAINED "); + + printf("\n"); + + if (rc_mode == -1 || !(rc_mode & tmp)) { + if (rc_mode != -1) { + printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode)); + } + + for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) { + if (rc_default_modes[i] & tmp) { + rc_mode = rc_default_modes[i]; + break; + } + } + + printf("RateControl mode: %s\n", rc_to_string(rc_mode)); + } + + config_attrib[config_attrib_num].type = VAConfigAttribRateControl; + config_attrib[config_attrib_num].value = rc_mode; + config_attrib_num++; + } + + + if (attrib[VAConfigAttribEncPackedHeaders].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncPackedHeaders].value; + + printf("Support VAConfigAttribEncPackedHeaders\n"); + + h264_packedheader = 1; + config_attrib[config_attrib_num].type = VAConfigAttribEncPackedHeaders; + config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; + + if (tmp & VA_ENC_PACKED_HEADER_SEQUENCE) { + printf("Support packed sequence headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SEQUENCE; + } + + if (tmp & VA_ENC_PACKED_HEADER_PICTURE) { + printf("Support packed picture headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_PICTURE; + } + + if (tmp & VA_ENC_PACKED_HEADER_SLICE) { + printf("Support packed slice headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_SLICE; + } + + if (tmp & VA_ENC_PACKED_HEADER_MISC) { + printf("Support packed misc headers\n"); + config_attrib[config_attrib_num].value |= VA_ENC_PACKED_HEADER_MISC; + } + + enc_packed_header_idx = config_attrib_num; + config_attrib_num++; + } + + if (attrib[VAConfigAttribEncInterlaced].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncInterlaced].value; + + printf("Support VAConfigAttribEncInterlaced\n"); + + if (tmp & VA_ENC_INTERLACED_FRAME) + printf("support VA_ENC_INTERLACED_FRAME\n"); + if (tmp & VA_ENC_INTERLACED_FIELD) + printf("Support VA_ENC_INTERLACED_FIELD\n"); + if (tmp & VA_ENC_INTERLACED_MBAFF) + printf("Support VA_ENC_INTERLACED_MBAFF\n"); + if (tmp & VA_ENC_INTERLACED_PAFF) + printf("Support VA_ENC_INTERLACED_PAFF\n"); + + config_attrib[config_attrib_num].type = VAConfigAttribEncInterlaced; + config_attrib[config_attrib_num].value = VA_ENC_PACKED_HEADER_NONE; + config_attrib_num++; + } + + if (attrib[VAConfigAttribEncMaxRefFrames].value != VA_ATTRIB_NOT_SUPPORTED) { + h264_maxref = attrib[VAConfigAttribEncMaxRefFrames].value; + + printf("Support %d RefPicList0 and %d RefPicList1\n", + h264_maxref & 0xffff, (h264_maxref >> 16) & 0xffff ); + } + + if (attrib[VAConfigAttribEncMaxSlices].value != VA_ATTRIB_NOT_SUPPORTED) + printf("Support %d slices\n", attrib[VAConfigAttribEncMaxSlices].value); + + if (attrib[VAConfigAttribEncSliceStructure].value != VA_ATTRIB_NOT_SUPPORTED) { + int tmp = attrib[VAConfigAttribEncSliceStructure].value; + + printf("Support VAConfigAttribEncSliceStructure\n"); + + if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS) + printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS\n"); + if (tmp & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) + printf("Support VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS\n"); + if (tmp & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) + printf("Support VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS\n"); + } + if (attrib[VAConfigAttribEncMacroblockInfo].value != VA_ATTRIB_NOT_SUPPORTED) { + printf("Support VAConfigAttribEncMacroblockInfo\n"); + } + + free(entrypoints); + return 0; +} + +static int setup_encode() +{ + VAStatus va_status; + VASurfaceID *tmp_surfaceid; + int codedbuf_size, i; + static VASurfaceID src_surface[SURFACE_NUM]; + static VASurfaceID ref_surface[SURFACE_NUM]; + + va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, + &config_attrib[0], config_attrib_num, &config_id); + CHECK_VASTATUS(va_status, "vaCreateConfig"); + + /* create source surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &src_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + /* create reference surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &ref_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); + memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); + memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); + + /* Create a context for this encode pipe */ + va_status = vaCreateContext(va_dpy, config_id, + frame_width_mbaligned, frame_height_mbaligned, + VA_PROGRESSIVE, + tmp_surfaceid, 2 * SURFACE_NUM, + &context_id); + CHECK_VASTATUS(va_status, "vaCreateContext"); + free(tmp_surfaceid); + + codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); + + for (i = 0; i < SURFACE_NUM; i++) { + /* create coded buffer once for all + * other VA buffers which won't be used again after vaRenderPicture. + * so APP can always vaCreateBuffer for every frame + * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture + * so VA won't maintain the coded buffer + */ + va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, + codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + } + + /* create OpenGL objects */ + //glGenFramebuffers(SURFACE_NUM, fbos); + + for (i = 0; i < SURFACE_NUM; i++) { + glGenTextures(1, &gl_surfaces[i].y_tex); + glGenTextures(1, &gl_surfaces[i].cbcr_tex); + } + + for (i = 0; i < SURFACE_NUM; i++) { + gl_surfaces[i].src_surface = src_surface[i]; + gl_surfaces[i].ref_surface = ref_surface[i]; + } + + return 0; +} + + + +#define partition(ref, field, key, ascending) \ + while (i <= j) { \ + if (ascending) { \ + while (ref[i].field < key) \ + i++; \ + while (ref[j].field > key) \ + j--; \ + } else { \ + while (ref[i].field > key) \ + i++; \ + while (ref[j].field < key) \ + j--; \ + } \ + if (i <= j) { \ + tmp = ref[i]; \ + ref[i] = ref[j]; \ + ref[j] = tmp; \ + i++; \ + j--; \ + } \ + } \ + +static void sort_one(VAPictureH264 ref[], int left, int right, + int ascending, int frame_idx) +{ + int i = left, j = right; + unsigned int key; + VAPictureH264 tmp; + + if (frame_idx) { + key = ref[(left + right) / 2].frame_idx; + partition(ref, frame_idx, key, ascending); + } else { + key = ref[(left + right) / 2].TopFieldOrderCnt; + partition(ref, TopFieldOrderCnt, (signed int)key, ascending); + } + + /* recursion */ + if (left < j) + sort_one(ref, left, j, ascending, frame_idx); + + if (i < right) + sort_one(ref, i, right, ascending, frame_idx); +} + +static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx, + int partition_ascending, int list0_ascending, int list1_ascending) +{ + int i = left, j = right; + VAPictureH264 tmp; + + if (frame_idx) { + partition(ref, frame_idx, key, partition_ascending); + } else { + partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending); + } + + + sort_one(ref, left, i-1, list0_ascending, frame_idx); + sort_one(ref, j+1, right, list1_ascending, frame_idx); +} + +static int update_ReferenceFrames(void) +{ + int i; + + if (current_frame_type == FRAME_B) + return 0; + + CurrentCurrPic.flags = VA_PICTURE_H264_SHORT_TERM_REFERENCE; + numShortTerm++; + if (numShortTerm > num_ref_frames) + numShortTerm = num_ref_frames; + for (i=numShortTerm-1; i>0; i--) + ReferenceFrames[i] = ReferenceFrames[i-1]; + ReferenceFrames[0] = CurrentCurrPic; + + if (current_frame_type != FRAME_B) + current_frame_num++; + if (current_frame_num > MaxFrameNum) + current_frame_num = 0; + + return 0; +} + + +static int update_RefPicList(void) +{ + unsigned int current_poc = CurrentCurrPic.TopFieldOrderCnt; + + if (current_frame_type == FRAME_P) { + memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_one(RefPicList0_P, 0, numShortTerm-1, 0, 1); + } + + if (current_frame_type == FRAME_B) { + memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_two(RefPicList0_B, 0, numShortTerm-1, current_poc, 0, + 1, 0, 1); + + memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264)); + sort_two(RefPicList1_B, 0, numShortTerm-1, current_poc, 0, + 0, 1, 0); + } + + return 0; +} + + +static int render_sequence(void) +{ + VABufferID seq_param_buf, rc_param_buf, misc_param_tmpbuf, render_id[2]; + VAStatus va_status; + VAEncMiscParameterBuffer *misc_param, *misc_param_tmp; + VAEncMiscParameterRateControl *misc_rate_ctrl; + + seq_param.level_idc = 41 /*SH_LEVEL_3*/; + seq_param.picture_width_in_mbs = frame_width_mbaligned / 16; + seq_param.picture_height_in_mbs = frame_height_mbaligned / 16; + seq_param.bits_per_second = frame_bitrate; + + seq_param.intra_period = intra_period; + seq_param.intra_idr_period = intra_idr_period; + seq_param.ip_period = ip_period; + + seq_param.max_num_ref_frames = num_ref_frames; + seq_param.seq_fields.bits.frame_mbs_only_flag = 1; + seq_param.time_scale = frame_rate * 2; + seq_param.num_units_in_tick = 1; /* Tc = num_units_in_tick / scale */ + seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = Log2MaxPicOrderCntLsb - 4; + seq_param.seq_fields.bits.log2_max_frame_num_minus4 = Log2MaxFrameNum - 4;; + seq_param.seq_fields.bits.frame_mbs_only_flag = 1; + seq_param.seq_fields.bits.chroma_format_idc = 1; + seq_param.seq_fields.bits.direct_8x8_inference_flag = 1; + + if (frame_width != frame_width_mbaligned || + frame_height != frame_height_mbaligned) { + seq_param.frame_cropping_flag = 1; + seq_param.frame_crop_left_offset = 0; + seq_param.frame_crop_right_offset = (frame_width_mbaligned - frame_width)/2; + seq_param.frame_crop_top_offset = 0; + seq_param.frame_crop_bottom_offset = (frame_height_mbaligned - frame_height)/2; + } + + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncSequenceParameterBufferType, + sizeof(seq_param), 1, &seq_param, &seq_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncMiscParameterBufferType, + sizeof(VAEncMiscParameterBuffer) + sizeof(VAEncMiscParameterRateControl), + 1, NULL, &rc_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + vaMapBuffer(va_dpy, rc_param_buf, (void **)&misc_param); + misc_param->type = VAEncMiscParameterTypeRateControl; + misc_rate_ctrl = (VAEncMiscParameterRateControl *)misc_param->data; + memset(misc_rate_ctrl, 0, sizeof(*misc_rate_ctrl)); + misc_rate_ctrl->bits_per_second = frame_bitrate; + misc_rate_ctrl->target_percentage = 66; + misc_rate_ctrl->window_size = 1000; + misc_rate_ctrl->initial_qp = initial_qp; + misc_rate_ctrl->min_qp = minimal_qp; + misc_rate_ctrl->basic_unit_size = 0; + vaUnmapBuffer(va_dpy, rc_param_buf); + + render_id[0] = seq_param_buf; + render_id[1] = rc_param_buf; + + va_status = vaRenderPicture(va_dpy, context_id, &render_id[0], 2); + CHECK_VASTATUS(va_status, "vaRenderPicture");; + + if (misc_priv_type != 0) { + va_status = vaCreateBuffer(va_dpy, context_id, + VAEncMiscParameterBufferType, + sizeof(VAEncMiscParameterBuffer), + 1, NULL, &misc_param_tmpbuf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + vaMapBuffer(va_dpy, misc_param_tmpbuf, (void **)&misc_param_tmp); + misc_param_tmp->type = (VAEncMiscParameterType)misc_priv_type; + misc_param_tmp->data[0] = misc_priv_value; + vaUnmapBuffer(va_dpy, misc_param_tmpbuf); + + va_status = vaRenderPicture(va_dpy, context_id, &misc_param_tmpbuf, 1); + } + + return 0; +} + +static int calc_poc(int pic_order_cnt_lsb) +{ + static int PicOrderCntMsb_ref = 0, pic_order_cnt_lsb_ref = 0; + int prevPicOrderCntMsb, prevPicOrderCntLsb; + int PicOrderCntMsb, TopFieldOrderCnt; + + if (current_frame_type == FRAME_IDR) + prevPicOrderCntMsb = prevPicOrderCntLsb = 0; + else { + prevPicOrderCntMsb = PicOrderCntMsb_ref; + prevPicOrderCntLsb = pic_order_cnt_lsb_ref; + } + + if ((pic_order_cnt_lsb < prevPicOrderCntLsb) && + ((prevPicOrderCntLsb - pic_order_cnt_lsb) >= (int)(MaxPicOrderCntLsb / 2))) + PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; + else if ((pic_order_cnt_lsb > prevPicOrderCntLsb) && + ((pic_order_cnt_lsb - prevPicOrderCntLsb) > (int)(MaxPicOrderCntLsb / 2))) + PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; + else + PicOrderCntMsb = prevPicOrderCntMsb; + + TopFieldOrderCnt = PicOrderCntMsb + pic_order_cnt_lsb; + + if (current_frame_type != FRAME_B) { + PicOrderCntMsb_ref = PicOrderCntMsb; + pic_order_cnt_lsb_ref = pic_order_cnt_lsb; + } + + return TopFieldOrderCnt; +} + +static int render_picture(void) +{ + VABufferID pic_param_buf; + VAStatus va_status; + int i = 0; + + pic_param.CurrPic.picture_id = gl_surfaces[current_frame_display % SURFACE_NUM].ref_surface; + pic_param.CurrPic.frame_idx = current_frame_num; + pic_param.CurrPic.flags = 0; + pic_param.CurrPic.TopFieldOrderCnt = calc_poc((current_frame_display - current_IDR_display) % MaxPicOrderCntLsb); + pic_param.CurrPic.BottomFieldOrderCnt = pic_param.CurrPic.TopFieldOrderCnt; + CurrentCurrPic = pic_param.CurrPic; + + if (getenv("TO_DEL")) { /* set RefPicList into ReferenceFrames */ + update_RefPicList(); /* calc RefPicList */ + memset(pic_param.ReferenceFrames, 0xff, 16 * sizeof(VAPictureH264)); /* invalid all */ + if (current_frame_type == FRAME_P) { + pic_param.ReferenceFrames[0] = RefPicList0_P[0]; + } else if (current_frame_type == FRAME_B) { + pic_param.ReferenceFrames[0] = RefPicList0_B[0]; + pic_param.ReferenceFrames[1] = RefPicList1_B[0]; + } + } else { + memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264)); + for (i = numShortTerm; i < SURFACE_NUM; i++) { + pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE; + pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID; + } + } + + pic_param.pic_fields.bits.idr_pic_flag = (current_frame_type == FRAME_IDR); + pic_param.pic_fields.bits.reference_pic_flag = (current_frame_type != FRAME_B); + pic_param.pic_fields.bits.entropy_coding_mode_flag = h264_entropy_mode; + pic_param.pic_fields.bits.deblocking_filter_control_present_flag = 1; + pic_param.frame_num = current_frame_num; + pic_param.coded_buf = gl_surfaces[current_frame_display % SURFACE_NUM].coded_buf; + pic_param.last_picture = false; // FIXME + pic_param.pic_init_qp = initial_qp; + + va_status = vaCreateBuffer(va_dpy, context_id, VAEncPictureParameterBufferType, + sizeof(pic_param), 1, &pic_param, &pic_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer");; + + va_status = vaRenderPicture(va_dpy, context_id, &pic_param_buf, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + return 0; +} + +static int render_packedsequence(void) +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedseq_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_seq_buffer(&packedseq_buffer); + + packedheader_param_buffer.type = VAEncPackedHeaderSequence; + + packedheader_param_buffer.bit_length = length_in_bits; /*length_in_bits*/ + packedheader_param_buffer.has_emulation_bytes = 0; + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedseq_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedseq_buffer, + &packedseq_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedseq_para_bufid; + render_id[1] = packedseq_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedseq_buffer); + + return 0; +} + + +static int render_packedpicture(void) +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedpic_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_pic_buffer(&packedpic_buffer); + packedheader_param_buffer.type = VAEncPackedHeaderPicture; + packedheader_param_buffer.bit_length = length_in_bits; + packedheader_param_buffer.has_emulation_bytes = 0; + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedpic_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedpic_buffer, + &packedpic_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedpic_para_bufid; + render_id[1] = packedpic_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedpic_buffer); + + return 0; +} + +static void render_packedslice() +{ + VAEncPackedHeaderParameterBuffer packedheader_param_buffer; + VABufferID packedslice_para_bufid, packedslice_data_bufid, render_id[2]; + unsigned int length_in_bits; + unsigned char *packedslice_buffer = NULL; + VAStatus va_status; + + length_in_bits = build_packed_slice_buffer(&packedslice_buffer); + packedheader_param_buffer.type = VAEncPackedHeaderSlice; + packedheader_param_buffer.bit_length = length_in_bits; + packedheader_param_buffer.has_emulation_bytes = 0; + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderParameterBufferType, + sizeof(packedheader_param_buffer), 1, &packedheader_param_buffer, + &packedslice_para_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + va_status = vaCreateBuffer(va_dpy, + context_id, + VAEncPackedHeaderDataBufferType, + (length_in_bits + 7) / 8, 1, packedslice_buffer, + &packedslice_data_bufid); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + + render_id[0] = packedslice_para_bufid; + render_id[1] = packedslice_data_bufid; + va_status = vaRenderPicture(va_dpy, context_id, render_id, 2); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + free(packedslice_buffer); +} + +static int render_slice(void) +{ + VABufferID slice_param_buf; + VAStatus va_status; + int i; + + update_RefPicList(); + + /* one frame, one slice */ + slice_param.macroblock_address = 0; + slice_param.num_macroblocks = frame_width_mbaligned * frame_height_mbaligned/(16*16); /* Measured by MB */ + slice_param.slice_type = (current_frame_type == FRAME_IDR)?2:current_frame_type; + if (current_frame_type == FRAME_IDR) { + if (current_frame_encoding != 0) + ++slice_param.idr_pic_id; + } else if (current_frame_type == FRAME_P) { + int refpiclist0_max = h264_maxref & 0xffff; + memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264)); + + for (i = refpiclist0_max; i < 32; i++) { + slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; + } + } else if (current_frame_type == FRAME_B) { + int refpiclist0_max = h264_maxref & 0xffff; + int refpiclist1_max = (h264_maxref >> 16) & 0xffff; + + memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264)); + for (i = refpiclist0_max; i < 32; i++) { + slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID; + } + + memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264)); + for (i = refpiclist1_max; i < 32; i++) { + slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE; + slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID; + } + } + + slice_param.slice_alpha_c0_offset_div2 = 0; + slice_param.slice_beta_offset_div2 = 0; + slice_param.direct_spatial_mv_pred_flag = 1; + slice_param.pic_order_cnt_lsb = (current_frame_display - current_IDR_display) % MaxPicOrderCntLsb; + + + if (h264_packedheader && + config_attrib[enc_packed_header_idx].value & VA_ENC_PACKED_HEADER_SLICE) + render_packedslice(); + + va_status = vaCreateBuffer(va_dpy, context_id, VAEncSliceParameterBufferType, + sizeof(slice_param), 1, &slice_param, &slice_param_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer");; + + va_status = vaRenderPicture(va_dpy, context_id, &slice_param_buf, 1); + CHECK_VASTATUS(va_status, "vaRenderPicture"); + + return 0; +} + + + +int H264Encoder::save_codeddata(unsigned long long display_order, unsigned long long encode_order, int frame_type) +{ + VACodedBufferSegment *buf_list = NULL; + VAStatus va_status; + unsigned int coded_size = 0; + + string data; + + va_status = vaMapBuffer(va_dpy, gl_surfaces[display_order % SURFACE_NUM].coded_buf, (void **)(&buf_list)); + CHECK_VASTATUS(va_status, "vaMapBuffer"); + while (buf_list != NULL) { + data.append(reinterpret_cast(buf_list->buf), buf_list->size); + if (coded_fp != nullptr) + coded_size += fwrite(buf_list->buf, 1, buf_list->size, coded_fp); + buf_list = (VACodedBufferSegment *) buf_list->next; + + frame_size += coded_size; + } + vaUnmapBuffer(va_dpy, gl_surfaces[display_order % SURFACE_NUM].coded_buf); + + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.pts = av_rescale_q(display_order, AVRational{1, frame_rate}, avstream->time_base); + pkt.dts = av_rescale_q(encode_order, AVRational{1, frame_rate}, avstream->time_base); + pkt.data = reinterpret_cast(&data[0]); + pkt.size = data.size(); + pkt.stream_index = 0; + if (frame_type == FRAME_IDR || frame_type == FRAME_I) { + pkt.flags = AV_PKT_FLAG_KEY; + } else { + pkt.flags = 0; + } + pkt.duration = 1; + av_interleaved_write_frame(avctx, &pkt); + +#if 0 + printf("\r "); /* return back to startpoint */ + switch (encode_order % 4) { + case 0: + printf("|"); + break; + case 1: + printf("/"); + break; + case 2: + printf("-"); + break; + case 3: + printf("\\"); + break; + } + printf("%08lld", encode_order); + printf("(%06d bytes coded)", coded_size); +#endif + + return 0; +} + + +// this is weird. but it seems to put a new frame onto the queue +void H264Encoder::storage_task_enqueue(unsigned long long display_order, unsigned long long encode_order, int frame_type) +{ + std::unique_lock lock(storage_task_queue_mutex); + + storage_task tmp; + tmp.display_order = display_order; + tmp.encode_order = encode_order; + tmp.frame_type = frame_type; + storage_task_queue.push(tmp); + srcsurface_status[display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING; + + storage_task_queue_changed.notify_all(); +} + +void H264Encoder::storage_task_thread() +{ + for ( ;; ) { + storage_task current; + { + // wait until there's an encoded frame + std::unique_lock lock(storage_task_queue_mutex); + storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || !storage_task_queue.empty(); }); + if (storage_thread_should_quit) return; + current = storage_task_queue.front(); + storage_task_queue.pop(); + } + + VAStatus va_status; + + // waits for data, then saves it to disk. + va_status = vaSyncSurface(va_dpy, gl_surfaces[current.display_order % SURFACE_NUM].src_surface); + CHECK_VASTATUS(va_status, "vaSyncSurface"); + save_codeddata(current.display_order, current.encode_order, current.frame_type); + + { + std::unique_lock lock(storage_task_queue_mutex); + srcsurface_status[current.display_order % SURFACE_NUM] = SRC_SURFACE_FREE; + storage_task_queue_changed.notify_all(); + } + } +} + +static int release_encode() +{ + int i; + + for (i = 0; i < SURFACE_NUM; i++) { + vaDestroyBuffer(va_dpy, gl_surfaces[i].coded_buf); + vaDestroySurfaces(va_dpy, &gl_surfaces[i].src_surface, 1); + vaDestroySurfaces(va_dpy, &gl_surfaces[i].ref_surface, 1); + } + + vaDestroyContext(va_dpy, context_id); + vaDestroyConfig(va_dpy, config_id); + + return 0; +} + +static int deinit_va() +{ + vaTerminate(va_dpy); + + va_close_display(va_dpy); + + return 0; +} + + +static int print_input() +{ + printf("\n\nINPUT:Try to encode H264...\n"); + if (rc_mode != -1) + printf("INPUT: RateControl : %s\n", rc_to_string(rc_mode)); + printf("INPUT: Resolution : %dx%dframes\n", frame_width, frame_height); + printf("INPUT: FrameRate : %d\n", frame_rate); + printf("INPUT: Bitrate : %d\n", frame_bitrate); + printf("INPUT: Slieces : %d\n", frame_slices); + printf("INPUT: IntraPeriod : %d\n", intra_period); + printf("INPUT: IDRPeriod : %d\n", intra_idr_period); + printf("INPUT: IpPeriod : %d\n", ip_period); + printf("INPUT: Initial QP : %d\n", initial_qp); + printf("INPUT: Min QP : %d\n", minimal_qp); + printf("INPUT: Coded Clip : %s\n", coded_fn); + + printf("\n\n"); /* return back to startpoint */ + + return 0; +} + + +//H264Encoder::H264Encoder(SDL_Window *window, SDL_GLContext context, int width, int height, const char *output_filename) +H264Encoder::H264Encoder(QSurface *surface, int width, int height, const char *output_filename) + : current_storage_frame(0), surface(surface) + //: width(width), height(height), current_encoding_frame(0) +{ + av_register_all(); + avctx = avformat_alloc_context(); + avctx->oformat = av_guess_format(NULL, output_filename, NULL); + strcpy(avctx->filename, output_filename); + if (avio_open2(&avctx->pb, output_filename, AVIO_FLAG_WRITE, &avctx->interrupt_callback, NULL) < 0) { + fprintf(stderr, "%s: avio_open2() failed\n", output_filename); + exit(1); + } + AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264); + avstream = avformat_new_stream(avctx, codec); + if (avstream == nullptr) { + fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename); + exit(1); + } + avstream->time_base = AVRational{1, frame_rate}; // TODO + avstream->codec->width = width; + avstream->codec->height = height; + //avstream->codec->time_base = AVRational{1, 60}; // TODO + avstream->codec->time_base = AVRational{1, frame_rate}; + avstream->codec->ticks_per_frame = 1; // or 2? + + if (avformat_write_header(avctx, NULL) < 0) { + fprintf(stderr, "%s: avformat_write_header() failed\n", output_filename); + exit(1); + } + + coded_fp = fopen("dump.h264", "wb"); + assert(coded_fp != NULL); + + frame_width = width; + frame_height = height; + frame_width_mbaligned = (frame_width + 15) & (~15); + frame_height_mbaligned = (frame_height + 15) & (~15); + frame_bitrate = 15000000; // / 60; + current_frame_encoding = 0; + + print_input(); + + init_va(); + setup_encode(); + + // No frames are ready yet. + memset(srcsurface_status, SRC_SURFACE_FREE, sizeof(srcsurface_status)); + + memset(&seq_param, 0, sizeof(seq_param)); + memset(&pic_param, 0, sizeof(pic_param)); + memset(&slice_param, 0, sizeof(slice_param)); + + storage_thread = std::thread(&H264Encoder::storage_task_thread, this); + + copy_thread = std::thread([this]{ + //SDL_GL_MakeCurrent(window, context); + QOpenGLContext *context = create_context(); + eglBindAPI(EGL_OPENGL_API); + if (!make_current(context, this->surface)) { + printf("display=%p surface=%p context=%p curr=%p err=%d\n", eglGetCurrentDisplay(), this->surface, context, eglGetCurrentContext(), + eglGetError()); + exit(1); + } + copy_thread_func(); + }); +} + +H264Encoder::~H264Encoder() +{ + { + unique_lock lock(storage_task_queue_mutex); + storage_thread_should_quit = true; + storage_task_queue_changed.notify_all(); + } + { + unique_lock lock(frame_queue_mutex); + copy_thread_should_quit = true; + frame_queue_nonempty.notify_one(); + } + storage_thread.join(); + copy_thread.join(); + + release_encode(); + deinit_va(); + + av_write_trailer(avctx); + avformat_free_context(avctx); +} + +bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) +{ + { + // Wait until this frame slot is done encoding. + std::unique_lock lock(storage_task_queue_mutex); + storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || (srcsurface_status[current_storage_frame % SURFACE_NUM] == SRC_SURFACE_FREE); }); + if (storage_thread_should_quit) return false; + } + + //*fbo = fbos[current_storage_frame % SURFACE_NUM]; + GLSurface *surf = &gl_surfaces[current_storage_frame % SURFACE_NUM]; + *y_tex = surf->y_tex; + *cbcr_tex = surf->cbcr_tex; + + VASurfaceID surface = surf->src_surface; + VAStatus va_status = vaDeriveImage(va_dpy, surface, &surf->surface_image); + CHECK_VASTATUS(va_status, "vaDeriveImage"); + + VABufferInfo buf_info; + buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? + va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); + CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); + + // Create Y image. + surf->y_egl_image = EGL_NO_IMAGE_KHR; + EGLint y_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), + EGL_NONE + }; + + surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); + assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); + + // Associate Y image to a texture. + glBindTexture(GL_TEXTURE_2D, *y_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); + + // Create CbCr image. + surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; + EGLint cbcr_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), + EGL_NONE + }; + + surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); + assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); + + // Associate CbCr image to a texture. + glBindTexture(GL_TEXTURE_2D, *cbcr_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + + return true; +} + +void H264Encoder::end_frame(GLsync fence) +{ + { + unique_lock lock(frame_queue_mutex); + pending_frames[current_storage_frame++] = fence; + } + frame_queue_nonempty.notify_one(); +} + +void H264Encoder::copy_thread_func() +{ + for ( ;; ) { + GLsync fence; + encoding2display_order(current_frame_encoding, intra_period, intra_idr_period, ip_period, + ¤t_frame_display, ¤t_frame_type); + if (current_frame_type == FRAME_IDR) { + numShortTerm = 0; + current_frame_num = 0; + current_IDR_display = current_frame_display; + } + + { + unique_lock lock(frame_queue_mutex); + frame_queue_nonempty.wait(lock, [this]{ return copy_thread_should_quit || pending_frames.count(current_frame_display) != 0; }); + if (copy_thread_should_quit) return; + fence = pending_frames[current_frame_display]; + pending_frames.erase(current_frame_display); + } + + // Wait for the GPU to be done with the frame. + glClientWaitSync(fence, 0, 0); + glDeleteSync(fence); + + // Unmap the image. + GLSurface *surf = &gl_surfaces[current_frame_display % SURFACE_NUM]; + eglDestroyImageKHR(eglGetCurrentDisplay(), surf->y_egl_image); + eglDestroyImageKHR(eglGetCurrentDisplay(), surf->cbcr_egl_image); + VAStatus va_status = vaReleaseBufferHandle(va_dpy, surf->surface_image.buf); + CHECK_VASTATUS(va_status, "vaReleaseBufferHandle"); + va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id); + CHECK_VASTATUS(va_status, "vaDestroyImage"); + + VASurfaceID surface = surf->src_surface; + + // Schedule the frame for encoding. + va_status = vaBeginPicture(va_dpy, context_id, surface); + CHECK_VASTATUS(va_status, "vaBeginPicture"); + + if (current_frame_type == FRAME_IDR) { + render_sequence(); + render_picture(); + if (h264_packedheader) { + render_packedsequence(); + render_packedpicture(); + } + } else { + //render_sequence(); + render_picture(); + } + render_slice(); + + va_status = vaEndPicture(va_dpy, context_id); + CHECK_VASTATUS(va_status, "vaEndPicture"); + + // so now the data is done encoding (well, async job kicked off)... + // we send that to the storage thread + storage_task_enqueue(current_frame_display, current_frame_encoding, current_frame_type); + + update_ReferenceFrames(); + ++current_frame_encoding; + } +} diff --git a/h264encode.h b/h264encode.h new file mode 100644 index 0000000..f355116 --- /dev/null +++ b/h264encode.h @@ -0,0 +1,105 @@ +// Hardware H.264 encoding via VAAPI. Heavily modified based on example +// code by Intel. Intel's original copyright and license is reproduced below: +// +// Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sub license, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice (including the +// next paragraph) shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +// IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +// ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#ifndef _H264ENCODE_H +#define _H264ENCODE_H + +extern "C" { +#include +} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pbo_frame_allocator.h" +#include "context.h" + +#define SURFACE_NUM 16 /* 16 surfaces for source YUV */ + +class H264Encoder { +public: + H264Encoder(QSurface *surface, int width, int height, const char *output_filename); + ~H264Encoder(); + //void add_frame(FrameAllocator::Frame frame, GLsync fence); + +#if 0 + struct Frame { + public: + GLuint fbo; + GLuint y_tex, cbcr_tex; + + private: + //int surface_subnum; + }; + void +#endif + bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); + void end_frame(GLsync fence); + +private: + struct storage_task { + unsigned long long display_order; + unsigned long long encode_order; + int frame_type; + }; + + void copy_thread_func(); + void storage_task_thread(); + void storage_task_enqueue(unsigned long long display_order, unsigned long long encode_order, int frame_type); + int save_codeddata(unsigned long long display_order, unsigned long long encode_order, int frame_type); + + std::thread copy_thread, storage_thread; + + std::mutex storage_task_queue_mutex; + std::condition_variable storage_task_queue_changed; + int srcsurface_status[SURFACE_NUM]; // protected by storage_task_queue_mutex + std::queue storage_task_queue; // protected by storage_task_queue_mutex + bool storage_thread_should_quit = false; // protected by storage_task_queue_mutex + + std::mutex frame_queue_mutex; + std::condition_variable frame_queue_nonempty; + bool copy_thread_should_quit = false; // under frame_queue_mutex + + //int frame_width, frame_height; + //int ; + int current_storage_frame; +#if 0 + std::map> pending_frames; +#endif + std::map pending_frames; + QSurface *surface; + + AVFormatContext *avctx; + AVStream *avstream; +}; + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..34a1c49 --- /dev/null +++ b/main.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +#include "mainwindow.h" + +int main(int argc, char *argv[]) +{ + setenv("QT_XCB_GL_INTEGRATION", "xcb_egl", 0); + + QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts, true); + QApplication app(argc, argv); + + QSurfaceFormat fmt; + fmt.setDepthBufferSize(0); + fmt.setStencilBufferSize(0); + fmt.setProfile(QSurfaceFormat::CoreProfile); + fmt.setMajorVersion(3); + fmt.setMinorVersion(1); + QSurfaceFormat::setDefaultFormat(fmt); + + MainWindow mainWindow; + mainWindow.resize(mainWindow.sizeHint()); + mainWindow.show(); + + return app.exec(); +} diff --git a/mainwindow.cpp b/mainwindow.cpp new file mode 100644 index 0000000..4804c08 --- /dev/null +++ b/mainwindow.cpp @@ -0,0 +1,13 @@ +#include "mainwindow.h" +#include "window.h" +#include + +#include "context.h" +#include "mixer.h" + +using std::thread; + +MainWindow::MainWindow() +{ + setCentralWidget(new Window(this)); +} diff --git a/mainwindow.h b/mainwindow.h new file mode 100644 index 0000000..fd60640 --- /dev/null +++ b/mainwindow.h @@ -0,0 +1,14 @@ +#ifndef MAINWINDOW_H +#define MAINWINDOW_H + +#include + +class MainWindow : public QMainWindow +{ + Q_OBJECT + +public: + MainWindow(); +}; + +#endif diff --git a/mixer.cpp b/mixer.cpp new file mode 100644 index 0000000..b8cdbe6 --- /dev/null +++ b/mixer.cpp @@ -0,0 +1,654 @@ +#define GL_GLEXT_PROTOTYPES 1 +#define NO_SDL_GLEXT 1 +#define NUM_CARDS 2 + +#define WIDTH 1280 +#define HEIGHT 720 + +#include +#include + +#undef Success + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "h264encode.h" +#include "context.h" +#include "bmusb.h" +#include "pbo_frame_allocator.h" + +using namespace movit; +using namespace std; +using namespace std::placeholders; + +// shared between all EGL contexts +EGLDisplay egl_display; +EGLSurface egl_surface; +EGLConfig ecfg; +EGLint ctxattr[] = { + EGL_CONTEXT_CLIENT_VERSION, 2, + EGL_CONTEXT_MAJOR_VERSION_KHR, 3, + EGL_CONTEXT_MINOR_VERSION_KHR, 1, + //EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR, + EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, + EGL_NONE +}; + +EGLConfig pbuffer_ecfg; + +std::mutex bmusb_mutex; // protects + +struct CaptureCard { + BMUSBCapture *usb; + + // Threading stuff + bool thread_initialized; + QSurface *surface; + QOpenGLContext *context; + + bool new_data_ready; // Whether new_frame contains anything. + PBOFrameAllocator::Frame new_frame; + GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. + std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. +}; +CaptureCard cards[NUM_CARDS]; + +void bm_frame(int card_index, uint16_t timecode, + FrameAllocator::Frame video_frame, size_t video_offset, uint16_t video_format, + FrameAllocator::Frame audio_frame, size_t audio_offset, uint16_t audio_format) +{ + CaptureCard *card = &cards[card_index]; + if (!card->thread_initialized) { + printf("initializing context for bmusb thread %d\n", card_index); + eglBindAPI(EGL_OPENGL_API); + card->context = create_context(); + if (!make_current(card->context, card->surface)) { + printf("failed to create bmusb context\n"); + exit(1); + } + card->thread_initialized = true; + } + + if (video_frame.len - video_offset != 1280 * 750 * 2) { + printf("dropping frame with wrong length (%ld)\n", video_frame.len - video_offset); + FILE *fp = fopen("frame.raw", "wb"); + fwrite(video_frame.data, video_frame.len, 1, fp); + fclose(fp); + //exit(1); + card->usb->get_video_frame_allocator()->release_frame(video_frame); + card->usb->get_audio_frame_allocator()->release_frame(audio_frame); + return; + } + { + // Wait until the previous frame was consumed. + std::unique_lock lock(bmusb_mutex); + card->new_data_ready_changed.wait(lock, [card]{ return !card->new_data_ready; }); + } + GLuint pbo = (GLint)(intptr_t)video_frame.userdata; + check_error(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); + check_error(); + glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, video_frame.size); + check_error(); + //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); + //check_error(); + GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); + check_error(); + assert(fence != nullptr); + { + std::unique_lock lock(bmusb_mutex); + card->new_data_ready = true; + card->new_frame = video_frame; + card->new_data_ready_fence = fence; + card->new_data_ready_changed.notify_all(); + } + + // Video frame will be released later. + card->usb->get_audio_frame_allocator()->release_frame(audio_frame); +} + +void place_rectangle(Effect *resample_effect, Effect *padding_effect, float x0, float y0, float x1, float y1) +{ + float srcx0 = 0.0f; + float srcx1 = 1.0f; + float srcy0 = 0.0f; + float srcy1 = 1.0f; + + // Cull. + if (x0 > 1280.0 || x1 < 0.0 || y0 > 720.0 || y1 < 0.0) { + CHECK(resample_effect->set_int("width", 1)); + CHECK(resample_effect->set_int("height", 1)); + CHECK(resample_effect->set_float("zoom_x", 1280.0)); + CHECK(resample_effect->set_float("zoom_y", 720.0)); + CHECK(padding_effect->set_int("left", 2000)); + CHECK(padding_effect->set_int("top", 2000)); + return; + } + + // Clip. (TODO: Clip on upper/left sides, too.) + if (x1 > 1280.0) { + srcx1 = (1280.0 - x0) / (x1 - x0); + x1 = 1280.0; + } + if (y1 > 720.0) { + srcy1 = (720.0 - y0) / (y1 - y0); + y1 = 720.0; + } + + float x_subpixel_offset = x0 - floor(x0); + float y_subpixel_offset = y0 - floor(y0); + + // Resampling must be to an integral number of pixels. Round up, + // and then add an extra pixel so we have some leeway for the border. + int width = int(ceil(x1 - x0)) + 1; + int height = int(ceil(y1 - y0)) + 1; + CHECK(resample_effect->set_int("width", width)); + CHECK(resample_effect->set_int("height", height)); + + // Correct the discrepancy with zoom. (This will leave a small + // excess edge of pixels and subpixels, which we'll correct for soon.) + float zoom_x = (x1 - x0) / (width * (srcx1 - srcx0)); + float zoom_y = (y1 - y0) / (height * (srcy1 - srcy0)); + CHECK(resample_effect->set_float("zoom_x", zoom_x)); + CHECK(resample_effect->set_float("zoom_y", zoom_y)); + CHECK(resample_effect->set_float("zoom_center_x", 0.0f)); + CHECK(resample_effect->set_float("zoom_center_y", 0.0f)); + + // Padding must also be to a whole-pixel offset. + CHECK(padding_effect->set_int("left", floor(x0))); + CHECK(padding_effect->set_int("top", floor(y0))); + + // Correct _that_ discrepancy by subpixel offset in the resampling. + CHECK(resample_effect->set_float("left", -x_subpixel_offset / zoom_x)); + CHECK(resample_effect->set_float("top", -y_subpixel_offset / zoom_y)); + + // Finally, adjust the border so it is exactly where we want it. + CHECK(padding_effect->set_float("border_offset_left", x_subpixel_offset)); + CHECK(padding_effect->set_float("border_offset_right", x1 - (floor(x0) + width))); + CHECK(padding_effect->set_float("border_offset_top", y_subpixel_offset)); + CHECK(padding_effect->set_float("border_offset_bottom", y1 - (floor(y0) + height))); +} + +void mixer_thread(QSurface *surface, QSurface *surface2, QSurface *surface3, QSurface *surface4) +{ + bool quit = false; + + cards[0].surface = surface3; +#if NUM_CARDS == 2 + cards[1].surface = surface4; +#endif + + eglBindAPI(EGL_OPENGL_API); + //QSurface *surface = create_surface(); + QOpenGLContext *context = create_context(); + if (!make_current(context, surface)) { + printf("oops\n"); + exit(1); + } + printf("egl=%p\n", eglGetCurrentContext()); + + CHECK(init_movit("/usr/share/movit", MOVIT_DEBUG_ON)); + printf("GPU texture subpixel precision: about %.1f bits\n", + log2(1.0f / movit_texel_subpixel_precision)); + printf("Wrongly rounded x+0.48 or x+0.52 values: %d/510\n", + movit_num_wrongly_rounded); + if (movit_num_wrongly_rounded > 0) { + if (movit_shader_rounding_supported) { + printf("Rounding off in the shader to compensate.\n"); + } else { + printf("No shader roundoff available; cannot compensate.\n"); + } + } + + //printf("egl_api=%d EGL_OPENGL_API=%d\n", epoxy_egl_get_current_gl_context_api(), EGL_OPENGL_API); + //exit(0); + + check_error(); + + EffectChain chain(WIDTH, HEIGHT); + check_error(); +#if 0 + glViewport(0, 0, WIDTH, HEIGHT); + check_error(); + + glMatrixMode(GL_PROJECTION); + check_error(); + glLoadIdentity(); + check_error(); + glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + check_error(); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); +#endif + + ImageFormat inout_format; + inout_format.color_space = COLORSPACE_sRGB; + inout_format.gamma_curve = GAMMA_sRGB; + + YCbCrFormat ycbcr_format; + ycbcr_format.chroma_subsampling_x = 2; + ycbcr_format.chroma_subsampling_y = 1; + ycbcr_format.cb_x_position = 0.0; + ycbcr_format.cr_x_position = 0.0; + ycbcr_format.cb_y_position = 0.5; + ycbcr_format.cr_y_position = 0.5; + ycbcr_format.luma_coefficients = YCBCR_REC_601; + ycbcr_format.full_range = false; + + YCbCrInput *input[NUM_CARDS]; + + input[0] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR); + chain.add_input(input[0]); + //if (NUM_CARDS == 2) { + input[1] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR); + chain.add_input(input[1]); + //} + //YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(inout_format, ycbcr_format, WIDTH, HEIGHT); + //YCbCr422InterleavedInput *input = new YCbCr422InterleavedInput(inout_format, ycbcr_format, 2, 1); + Effect *resample_effect = chain.add_effect(new ResampleEffect(), input[0]); + Effect *padding_effect = chain.add_effect(new IntegralPaddingEffect()); + float border_color[] = { 0.0f, 0.0f, 0.0f, 1.0f }; + CHECK(padding_effect->set_vec4("border_color", border_color)); + + //Effect *resample2_effect = chain.add_effect(new ResampleEffect(), input[1 % NUM_CARDS]); + Effect *resample2_effect = chain.add_effect(new ResampleEffect(), input[1]); + Effect *saturation_effect = chain.add_effect(new SaturationEffect()); + CHECK(saturation_effect->set_float("saturation", 0.3f)); + Effect *wb_effect = chain.add_effect(new WhiteBalanceEffect()); + CHECK(wb_effect->set_float("output_color_temperature", 3500.0)); + Effect *padding2_effect = chain.add_effect(new IntegralPaddingEffect()); + + chain.add_effect(new OverlayEffect(), padding_effect, padding2_effect); + + ycbcr_format.chroma_subsampling_x = 1; + + chain.add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR); + chain.set_dither_bits(8); + chain.set_output_origin(OUTPUT_ORIGIN_TOP_LEFT); + chain.finalize(); + +#if 0 + // generate a PBO to hold the data we read back with glReadPixels() + // (Intel/DRI goes into a slow path if we don't read to PBO) + GLuint pbo; + glGenBuffers(1, &pbo); + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); + glBufferData(GL_PIXEL_PACK_BUFFER_ARB, WIDTH * HEIGHT * 4, NULL, GL_STREAM_READ); +#endif + + //make_hsv_wheel_texture(); + +// QSurface *create_surface(const QSurfaceFormat &format); + H264Encoder h264_encoder(surface2, WIDTH, HEIGHT, "test.mp4"); + + printf("Configuring first card...\n"); + cards[0].usb = new BMUSBCapture(0x1edb, 0xbd3b); // 0xbd4f + //cards[0].usb = new BMUSBCapture(0x1edb, 0xbd4f); + cards[0].usb->set_frame_callback(std::bind(bm_frame, 0, _1, _2, _3, _4, _5, _6, _7)); + std::unique_ptr pbo_allocator1(new PBOFrameAllocator(1280 * 750 * 2 + 44)); + cards[0].usb->set_video_frame_allocator(pbo_allocator1.get()); + cards[0].usb->configure_card(); + + std::unique_ptr pbo_allocator2(new PBOFrameAllocator(1280 * 750 * 2 + 44)); + if (NUM_CARDS == 2) { + printf("Configuring second card...\n"); + cards[1].usb = new BMUSBCapture(0x1edb, 0xbd4f); + cards[1].usb->set_frame_callback(std::bind(bm_frame, 1, _1, _2, _3, _4, _5, _6, _7)); + cards[1].usb->set_video_frame_allocator(pbo_allocator2.get()); + cards[1].usb->configure_card(); + } + + BMUSBCapture::start_bm_thread(); + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + cards[card_index].usb->start_bm_capture(); + } + + int frame = 0; +#if _POSIX_C_SOURCE >= 199309L + struct timespec start, now; + clock_gettime(CLOCK_MONOTONIC, &start); +#else + struct timeval start, now; + gettimeofday(&start, NULL); +#endif + + PBOFrameAllocator::Frame bmusb_current_rendering_frame[NUM_CARDS]; + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + bmusb_current_rendering_frame[card_index] = + cards[card_index].usb->get_video_frame_allocator()->alloc_frame(); + GLint input_tex_pbo = (GLint)(intptr_t)bmusb_current_rendering_frame[card_index].userdata; + input[card_index]->set_pixel_data(0, nullptr, input_tex_pbo); + input[card_index]->set_pixel_data(1, nullptr, input_tex_pbo); + check_error(); + } + + //chain.enable_phase_timing(true); + + // Set up stuff for NV12 conversion. +#if 0 + PBOFrameAllocator nv12_frame_pool(WIDTH * HEIGHT * 3 / 2, /*num_frames=*/24, + GL_PIXEL_PACK_BUFFER_ARB, GL_MAP_READ_BIT | GL_MAP_COHERENT_BIT, 0); +#endif + ResourcePool *resource_pool = chain.get_resource_pool(); + //GLuint ycbcr_tex = resource_pool->create_2d_texture(GL_RGBA8, WIDTH, HEIGHT); + GLuint chroma_tex = resource_pool->create_2d_texture(GL_RG8, WIDTH, HEIGHT); + +#if 0 + // Y shader. + string y_vert_shader = read_version_dependent_file("vs-y", "vert"); + string y_frag_shader = + "#version 130 \n" + "in vec2 tc; \n" + "uniform sampler2D ycbcr_tex; \n" + "void main() { \n" + " gl_FragColor = texture2D(ycbcr_tex, tc); \n" + "} \n"; + GLuint y_program_num = resource_pool->compile_glsl_program(y_vert_shader, y_frag_shader); +#endif + +#if 1 + // Cb/Cr shader. + string cbcr_vert_shader = read_version_dependent_file("vs-cbcr", "vert"); + string cbcr_frag_shader = + "#version 130 \n" + "in vec2 tc0; \n" +// "in vec2 tc1; \n" + "uniform sampler2D cbcr_tex; \n" + "void main() { \n" + " gl_FragColor = texture2D(cbcr_tex, tc0); \n" +// " gl_FragColor.ba = texture2D(cbcr_tex, tc1).gb; \n" + "} \n"; + GLuint cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader); +#endif + + GLuint vao; + glGenVertexArrays(1, &vao); + check_error(); + + while (!quit) { + ++frame; + + //int width0 = lrintf(848 * (1.0 + 0.2 * sin(frame * 0.02))); + int width0 = 848; + int height0 = lrintf(width0 * 9.0 / 16.0); + + //float top0 = 96 + 48 * sin(frame * 0.005); + //float left0 = 96 + 48 * cos(frame * 0.006); + float top0 = 48; + float left0 = 16; + float bottom0 = top0 + height0; + float right0 = left0 + width0; + + int width1 = 384; + int height1 = 216; + + float bottom1 = 720 - 48; + float right1 = 1280 - 16; + float top1 = bottom1 - height1; + float left1 = right1 - width1; + + float t = 0.5 + 0.5 * cos(frame * 0.006); + //float t = 0.0; + float scale0 = 1.0 + t * (1280.0 / 848.0 - 1.0); + float tx0 = 0.0 + t * (-16.0 * scale0); + float ty0 = 0.0 + t * (-48.0 * scale0); + + top0 = top0 * scale0 + ty0; + bottom0 = bottom0 * scale0 + ty0; + left0 = left0 * scale0 + tx0; + right0 = right0 * scale0 + tx0; + + top1 = top1 * scale0 + ty0; + bottom1 = bottom1 * scale0 + ty0; + left1 = left1 * scale0 + tx0; + right1 = right1 * scale0 + tx0; + + place_rectangle(resample_effect, padding_effect, left0, top0, right0, bottom0); + place_rectangle(resample2_effect, padding2_effect, left1, top1, right1, bottom1); + + CaptureCard card_copy[NUM_CARDS]; + + { + std::unique_lock lock(bmusb_mutex); + + // The first card is the master timer, so wait for it to have a new frame. + // TODO: Make configurable, and with a timeout. + cards[0].new_data_ready_changed.wait(lock, []{ return cards[0].new_data_ready; }); + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + CaptureCard *card = &cards[card_index]; + card_copy[card_index].usb = card->usb; + card_copy[card_index].new_data_ready = card->new_data_ready; + card_copy[card_index].new_frame = card->new_frame; + card_copy[card_index].new_data_ready_fence = card->new_data_ready_fence; + card->new_data_ready = false; + card->new_data_ready_changed.notify_all(); + } + } + + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { + CaptureCard *card = &card_copy[card_index]; + if (!card->new_data_ready) + continue; + + // FIXME: We could still be rendering from it! + card->usb->get_video_frame_allocator()->release_frame(bmusb_current_rendering_frame[card_index]); + bmusb_current_rendering_frame[card_index] = card->new_frame; + + // The new texture might still be uploaded, + // tell the GPU to wait until it's there. + if (card->new_data_ready_fence) + glWaitSync(card->new_data_ready_fence, /*flags=*/0, GL_TIMEOUT_IGNORED); + check_error(); + glDeleteSync(card->new_data_ready_fence); + check_error(); + GLint input_tex_pbo = (GLint)(intptr_t)bmusb_current_rendering_frame[card_index].userdata; + input[card_index]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo); + input[card_index]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo); + + if (NUM_CARDS == 1) { + // Set to the other one, too. + input[1]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo); + input[1]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo); + } + } + + GLuint y_tex, cbcr_tex; + bool got_frame = h264_encoder.begin_frame(&y_tex, &cbcr_tex); + assert(got_frame); + + // Render chain. + { + GLuint ycbcr_fbo = resource_pool->create_fbo(y_tex, chroma_tex); + chain.render_to_fbo(ycbcr_fbo, WIDTH, HEIGHT); + resource_pool->release_fbo(ycbcr_fbo); + } + if (false) { + glViewport(0, 0, WIDTH, HEIGHT); + chain.render_to_screen(); + } + +#if 0 + PBOFrameAllocator::Frame nv12_frame = nv12_frame_pool.alloc_frame(); + assert(nv12_frame.data != nullptr); // should never happen... maybe? + GLuint pbo = (GLuint)(intptr_t)nv12_frame.userdata; + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); + check_error(); +#endif + + // Set up for extraction. + float vertices[] = { + 0.0f, 2.0f, + 0.0f, 0.0f, + 2.0f, 0.0f + }; + + glBindVertexArray(vao); + check_error(); + +#if 0 + // Extract Y. + GLuint y_fbo = resource_pool->create_fbo(y_tex); + glBindFramebuffer(GL_FRAMEBUFFER, y_fbo); + glViewport(0, 0, WIDTH, HEIGHT); + check_error(); + { + glUseProgram(y_program_num); + check_error(); + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, ycbcr_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + GLuint position_vbo = fill_vertex_attribute(y_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(y_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + cleanup_vertex_attribute(y_program_num, "position", position_vbo); + check_error(); + cleanup_vertex_attribute(y_program_num, "texcoord", texcoord_vbo); + check_error(); + + resource_pool->release_fbo(y_fbo); + } +#endif + + // Extract Cb/Cr. + GLuint cbcr_fbo = resource_pool->create_fbo(cbcr_tex); + glBindFramebuffer(GL_FRAMEBUFFER, cbcr_fbo); + glViewport(0, 0, WIDTH/2, HEIGHT/2); + check_error(); + GLsync fence; + { + glUseProgram(cbcr_program_num); + check_error(); + + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, chroma_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + float chroma_offset_0[] = { -0.5f / WIDTH, 0.0f }; +// float chroma_offset_1[] = { +0.5f / WIDTH, 0.0f }; + set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0); +// set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1); + + GLuint position_vbo = fill_vertex_attribute(cbcr_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(cbcr_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + cleanup_vertex_attribute(cbcr_program_num, "position", position_vbo); + cleanup_vertex_attribute(cbcr_program_num, "texcoord", texcoord_vbo); + + glUseProgram(0); + check_error(); + +#if 0 + glReadPixels(0, 0, WIDTH/4, HEIGHT/2, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(WIDTH * HEIGHT)); + check_error(); +#endif + + fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); + check_error(); + + resource_pool->release_fbo(cbcr_fbo); + } + +#if 0 + glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); +#endif + +#if 1 + h264_encoder.end_frame(fence); +#else + nv12_frame_pool.release_frame(nv12_frame); +#endif + + //eglSwapBuffers(egl_display, egl_surface); + + +#if 1 +#if _POSIX_C_SOURCE >= 199309L + clock_gettime(CLOCK_MONOTONIC, &now); + double elapsed = now.tv_sec - start.tv_sec + + 1e-9 * (now.tv_nsec - start.tv_nsec); +#else + gettimeofday(&now, NULL); + double elapsed = now.tv_sec - start.tv_sec + + 1e-6 * (now.tv_usec - start.tv_usec); +#endif + if (frame % 100 == 0) { + printf("%d frames in %.3f seconds = %.1f fps (%.1f ms/frame)\n", + frame, elapsed, frame / elapsed, + 1e3 * elapsed / frame); + // chain.print_phase_timing(); + } + + // Reset every 100 frames, so that local variations in frame times + // (especially for the first few frames, when the shaders are + // compiled etc.) don't make it hard to measure for the entire + // remaining duration of the program. + if (frame == 10000) { + frame = 0; + start = now; + } +#endif + } + glDeleteVertexArrays(1, &vao); + //resource_pool->release_glsl_program(y_program_num); + resource_pool->release_glsl_program(cbcr_program_num); + resource_pool->release_2d_texture(chroma_tex); + BMUSBCapture::stop_bm_thread(); +} diff --git a/mixer.h b/mixer.h new file mode 100644 index 0000000..ef7701f --- /dev/null +++ b/mixer.h @@ -0,0 +1,2 @@ +class QSurface; +void mixer_thread(QSurface *surface, QSurface *surface2, QSurface *surface3, QSurface *surface4); diff --git a/pbo_frame_allocator.cpp b/pbo_frame_allocator.cpp new file mode 100644 index 0000000..59013bf --- /dev/null +++ b/pbo_frame_allocator.cpp @@ -0,0 +1,70 @@ +#include "pbo_frame_allocator.h" +#include "util.h" + +using namespace std; + +PBOFrameAllocator::PBOFrameAllocator(size_t frame_size, size_t num_queued_frames, GLenum buffer, GLenum permissions, GLenum map_bits) + : frame_size(frame_size), buffer(buffer) +{ + for (size_t i = 0; i < num_queued_frames; ++i) { + GLuint pbo; + glGenBuffers(1, &pbo); + check_error(); + glBindBuffer(buffer, pbo); + check_error(); + glBufferStorage(buffer, frame_size, NULL, permissions | GL_MAP_PERSISTENT_BIT); + check_error(); + + Frame frame; + frame.data = (uint8_t *)glMapBufferRange(buffer, 0, frame_size, permissions | map_bits | GL_MAP_PERSISTENT_BIT); + frame.data2 = frame.data + frame_size / 2; + check_error(); + frame.size = frame_size; + frame.userdata = (void *)(intptr_t)pbo; + frame.owner = this; + frame.interleaved = true; + freelist.push(frame); + } + glBindBuffer(buffer, 0); + check_error(); +} + +PBOFrameAllocator::~PBOFrameAllocator() +{ + while (!freelist.empty()) { + Frame frame = freelist.front(); + freelist.pop(); + GLuint pbo = (intptr_t)frame.userdata; + glBindBuffer(buffer, pbo); + check_error(); + glUnmapBuffer(buffer); + check_error(); + glBindBuffer(buffer, 0); + check_error(); + glDeleteBuffers(1, &pbo); + } +} +//static int sumsum = 0; + +FrameAllocator::Frame PBOFrameAllocator::alloc_frame() +{ + Frame vf; + + std::unique_lock lock(freelist_mutex); // Meh. + if (freelist.empty()) { + printf("Frame overrun (no more spare PBO frames), dropping frame!\n"); + } else { + //fprintf(stderr, "freelist has %d allocated\n", ++sumsum); + vf = freelist.front(); + freelist.pop(); // Meh. + } + vf.len = 0; + return vf; +} + +void PBOFrameAllocator::release_frame(Frame frame) +{ + std::unique_lock lock(freelist_mutex); + freelist.push(frame); + //--sumsum; +} diff --git a/pbo_frame_allocator.h b/pbo_frame_allocator.h new file mode 100644 index 0000000..1155d20 --- /dev/null +++ b/pbo_frame_allocator.h @@ -0,0 +1,34 @@ +#ifndef _PBO_FRAME_ALLOCATOR +#define _PBO_FRAME_ALLOCATOR 1 + +#include +#include +#include + +#include "bmusb.h" + +// An allocator that allocates straight into OpenGL pinned memory. +// Meant for video frames only. We use a queue rather than a stack, +// since we want to maximize pipelineability. +class PBOFrameAllocator : public FrameAllocator { +public: + // Note: You need to have an OpenGL context when calling + // the constructor. + PBOFrameAllocator(size_t frame_size, + size_t num_queued_frames = 16, // FIXME: should be 6 + GLenum buffer = GL_PIXEL_UNPACK_BUFFER_ARB, + GLenum permissions = GL_MAP_WRITE_BIT, + GLenum map_bits = GL_MAP_FLUSH_EXPLICIT_BIT); + ~PBOFrameAllocator() override; + Frame alloc_frame() override; + void release_frame(Frame frame) override; + +private: + size_t frame_size; + + std::mutex freelist_mutex; + std::queue freelist; + GLenum buffer; +}; + +#endif // !defined(_PBO_FRAME_ALLOCATOR) diff --git a/vs-cbcr.130.vert b/vs-cbcr.130.vert new file mode 100644 index 0000000..3c639ce --- /dev/null +++ b/vs-cbcr.130.vert @@ -0,0 +1,23 @@ +#version 130 + +in vec2 position; +in vec2 texcoord; +out vec2 tc0; +//out vec2 tc1; +uniform vec2 foo_chroma_offset_0; +//uniform vec2 foo_chroma_offset_1; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + vec2 flipped_tc = texcoord; +// flipped_tc.y = 1.0 - flipped_tc.y; + tc0 = flipped_tc + foo_chroma_offset_0; +// tc1 = flipped_tc + foo_chroma_offset_1; +} diff --git a/window.cpp b/window.cpp new file mode 100644 index 0000000..b8eced7 --- /dev/null +++ b/window.cpp @@ -0,0 +1,23 @@ +#include "glwidget.h" +#include "window.h" +#include "mainwindow.h" +#include +#include + +Window::Window(MainWindow *mw) + : main_window(mw) +{ + gl_widget = new GLWidget; + + QVBoxLayout *mainLayout = new QVBoxLayout; + QHBoxLayout *container = new QHBoxLayout; + container->addWidget(gl_widget); + + QWidget *w = new QWidget; + w->setLayout(container); + mainLayout->addWidget(w); + + setLayout(mainLayout); + + setWindowTitle(tr("Nageru")); +} diff --git a/window.h b/window.h new file mode 100644 index 0000000..ff883f4 --- /dev/null +++ b/window.h @@ -0,0 +1,21 @@ +#ifndef WINDOW_H +#define WINDOW_H + +#include + +class GLWidget; +class MainWindow; + +class Window : public QWidget +{ + Q_OBJECT + +public: + Window(MainWindow *mw); + +private: + GLWidget *gl_widget; + MainWindow *main_window; +}; + +#endif -- 2.39.2