]> git.sesse.net Git - bmusb/blob - bmusb.cpp
If we have an audio block with no video, send it on anyway (since audio continuity...
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <netinet/in.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #ifdef __SSE4_1__
17 #include <immintrin.h>
18 #endif
19 #include "bmusb.h"
20
21 #include <algorithm>
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <functional>
28 #include <memory>
29 #include <mutex>
30 #include <stack>
31 #include <thread>
32
33 using namespace std;
34 using namespace std::placeholders;
35
36 #define WIDTH 1280
37 #define HEIGHT 750  /* 30 lines ancillary data? */
38 //#define WIDTH 1920
39 //#define HEIGHT 1125  /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
43
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
46 #define FRAME_SIZE (8 << 20)
47
48 FILE *audiofp;
49
50 thread usb_thread;
51 atomic<bool> should_quit;
52
53 FrameAllocator::~FrameAllocator() {}
54
55 #define NUM_QUEUED_FRAMES 16
56 class MallocFrameAllocator : public FrameAllocator {
57 public:
58         MallocFrameAllocator(size_t frame_size);
59         Frame alloc_frame() override;
60         void release_frame(Frame frame) override;
61
62 private:
63         size_t frame_size;
64
65         mutex freelist_mutex;
66         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
67 };
68
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70         : frame_size(frame_size)
71 {
72         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
74         }
75 }
76
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
78 {
79         Frame vf;
80         vf.owner = this;
81
82         unique_lock<mutex> lock(freelist_mutex);  // Meh.
83         if (freelist.empty()) {
84                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
85                         frame_size);
86         } else {
87                 vf.data = freelist.top().release();
88                 vf.size = frame_size;
89                 freelist.pop();  // Meh.
90         }
91         return vf;
92 }
93
94 void MallocFrameAllocator::release_frame(Frame frame)
95 {
96         if (frame.overflow > 0) {
97                 printf("%d bytes overflow after last (malloc) frame\n", int(frame.overflow));
98         }
99         unique_lock<mutex> lock(freelist_mutex);
100         freelist.push(unique_ptr<uint8_t[]>(frame.data));
101 }
102
103 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
104 {
105         if (a == b) {
106                 return false;
107         } else if (a < b) {
108                 return (b - a < 0x8000);
109         } else {
110                 int wrap_b = 0x10000 + int(b);
111                 return (wrap_b - a < 0x8000);
112         }
113 }
114
115 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
116 {
117         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
118                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
119                         q->back().timecode, timecode);
120                 frame.owner->release_frame(frame);
121                 return;
122         }
123
124         QueuedFrame qf;
125         qf.format = format;
126         qf.timecode = timecode;
127         qf.frame = frame;
128
129         {
130                 unique_lock<mutex> lock(queue_lock);
131                 q->push_back(move(qf));
132         }
133         queues_not_empty.notify_one();  // might be spurious
134 }
135
136 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
137 {
138         FILE *fp = fopen(filename, "wb");
139         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
140                 printf("short write!\n");
141         }
142         fclose(fp);
143 }
144
145 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
146 {
147         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
148 }
149
150 void BMUSBCapture::dequeue_thread_func()
151 {
152         if (has_dequeue_callbacks) {
153                 dequeue_init_callback();
154         }
155         while (!dequeue_thread_should_quit) {
156                 unique_lock<mutex> lock(queue_lock);
157                 queues_not_empty.wait(lock, [this]{ return dequeue_thread_should_quit || (!pending_video_frames.empty() && !pending_audio_frames.empty()); });
158
159                 uint16_t video_timecode = pending_video_frames.front().timecode;
160                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
161                 if (video_timecode < audio_timecode) {
162                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
163                                 video_timecode);
164                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
165                         pending_video_frames.pop_front();
166                 } else if (audio_timecode < video_timecode) {
167                         printf("Audio block 0x%04x without corresponding video block, sending blank frame.\n",
168                                 audio_timecode);
169                         QueuedFrame audio_frame = pending_audio_frames.front();
170                         pending_audio_frames.pop_front();
171                         lock.unlock();
172                         frame_callback(audio_timecode,
173                                        FrameAllocator::Frame(), 0, 0x0000,
174                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
175                 } else {
176                         QueuedFrame video_frame = pending_video_frames.front();
177                         QueuedFrame audio_frame = pending_audio_frames.front();
178                         pending_audio_frames.pop_front();
179                         pending_video_frames.pop_front();
180                         lock.unlock();
181
182 #if 0
183                         char filename[255];
184                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
185                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
186                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
187 #endif
188
189                         frame_callback(video_timecode,
190                                        video_frame.frame, HEADER_SIZE, video_frame.format,
191                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
192                 }
193         }
194         if (has_dequeue_callbacks) {
195                 dequeue_cleanup_callback();
196         }
197 }
198
199 void BMUSBCapture::start_new_frame(const uint8_t *start)
200 {
201         uint16_t format = (start[3] << 8) | start[2];
202         uint16_t timecode = (start[1] << 8) | start[0];
203
204         if (current_video_frame.len > 0) {
205                 // If format is 0x0800 (no signal), add a fake (empty) audio
206                 // frame to get it out of the queue.
207                 // TODO: Figure out if there are other formats that come with
208                 // no audio, and treat them the same.
209                 if (format == 0x0800) {
210                         FrameAllocator::Frame fake_audio_frame = audio_frame_allocator->alloc_frame();
211                         if (fake_audio_frame.data == nullptr) {
212                                 // Oh well, it's just a no-signal frame anyway.
213                                 printf("Couldn't allocate fake audio frame, also dropping no-signal video frame.\n");
214                                 current_video_frame.owner->release_frame(current_video_frame);
215                                 current_video_frame = video_frame_allocator->alloc_frame();
216                                 return;
217                         }
218                         queue_frame(format, timecode, fake_audio_frame, &pending_audio_frames);
219                 }
220                 //dump_frame();
221                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
222         }
223         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
224         //      format, timecode,
225         //      //start[7], start[6], start[5], start[4],
226         //      read_current_frame, FRAME_SIZE);
227
228         current_video_frame = video_frame_allocator->alloc_frame();
229         //if (current_video_frame.data == nullptr) {
230         //      read_current_frame = -1;
231         //} else {
232         //      read_current_frame = 0;
233         //}
234 }
235
236 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
237 {
238         uint16_t format = (start[3] << 8) | start[2];
239         uint16_t timecode = (start[1] << 8) | start[0];
240         if (current_audio_frame.len > 0) {
241                 //dump_audio_block();
242                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
243         }
244         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
245         //      format, timecode, read_current_audio_block);
246         current_audio_frame = audio_frame_allocator->alloc_frame();
247 }
248
249 #if 0
250 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
251 {
252         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
253         for (unsigned j = 0; j < pack->actual_length; j++) {
254         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
255                 printf("%02x", xfr->buffer[j + offset]);
256                 if ((j % 16) == 15)
257                         printf("\n");
258                 else if ((j % 8) == 7)
259                         printf("  ");
260                 else
261                         printf(" ");
262         }
263 }
264 #endif
265
266 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
267 {
268         assert(n % 2 == 0);
269         uint8_t *dptr1 = dest1;
270         uint8_t *dptr2 = dest2;
271
272         for (size_t i = 0; i < n; i += 2) {
273                 *dptr1++ = *src++;
274                 *dptr2++ = *src++;
275         }
276 }
277
278 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
279 {
280         if (current_frame->data == nullptr ||
281             current_frame->len > current_frame->size ||
282             start == end) {
283                 return;
284         }
285
286         int bytes = end - start;
287         if (current_frame->len + bytes > current_frame->size) {
288                 current_frame->overflow = current_frame->len + bytes - current_frame->size;
289                 current_frame->len = current_frame->size;
290                 if (current_frame->overflow > 1048576) {
291                         printf("%d bytes overflow after last %s frame\n",
292                                 int(current_frame->overflow), frame_type_name);
293                         current_frame->overflow = 0;
294                 }
295                 //dump_frame();
296         } else {
297                 if (current_frame->interleaved) {
298                         uint8_t *data = current_frame->data + current_frame->len / 2;
299                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
300                         if (current_frame->len % 2 == 1) {
301                                 ++data;
302                                 swap(data, data2);
303                         }
304                         if (bytes % 2 == 1) {
305                                 *data++ = *start++;
306                                 swap(data, data2);
307                                 ++current_frame->len;
308                                 --bytes;
309                         }
310                         memcpy_interleaved(data, data2, start, bytes);
311                         current_frame->len += bytes;
312                 } else {
313                         memcpy(current_frame->data + current_frame->len, start, bytes);
314                         current_frame->len += bytes;
315                 }
316         }
317 }
318
319 #ifdef __SSE4_1__
320
321 #if 0
322 void avx2_dump(const char *name, __m256i n)
323 {
324         printf("%-10s:", name);
325         printf(" %02x", _mm256_extract_epi8(n, 0));
326         printf(" %02x", _mm256_extract_epi8(n, 1));
327         printf(" %02x", _mm256_extract_epi8(n, 2));
328         printf(" %02x", _mm256_extract_epi8(n, 3));
329         printf(" %02x", _mm256_extract_epi8(n, 4));
330         printf(" %02x", _mm256_extract_epi8(n, 5));
331         printf(" %02x", _mm256_extract_epi8(n, 6));
332         printf(" %02x", _mm256_extract_epi8(n, 7));
333         printf(" ");
334         printf(" %02x", _mm256_extract_epi8(n, 8));
335         printf(" %02x", _mm256_extract_epi8(n, 9));
336         printf(" %02x", _mm256_extract_epi8(n, 10));
337         printf(" %02x", _mm256_extract_epi8(n, 11));
338         printf(" %02x", _mm256_extract_epi8(n, 12));
339         printf(" %02x", _mm256_extract_epi8(n, 13));
340         printf(" %02x", _mm256_extract_epi8(n, 14));
341         printf(" %02x", _mm256_extract_epi8(n, 15));
342         printf(" ");
343         printf(" %02x", _mm256_extract_epi8(n, 16));
344         printf(" %02x", _mm256_extract_epi8(n, 17));
345         printf(" %02x", _mm256_extract_epi8(n, 18));
346         printf(" %02x", _mm256_extract_epi8(n, 19));
347         printf(" %02x", _mm256_extract_epi8(n, 20));
348         printf(" %02x", _mm256_extract_epi8(n, 21));
349         printf(" %02x", _mm256_extract_epi8(n, 22));
350         printf(" %02x", _mm256_extract_epi8(n, 23));
351         printf(" ");
352         printf(" %02x", _mm256_extract_epi8(n, 24));
353         printf(" %02x", _mm256_extract_epi8(n, 25));
354         printf(" %02x", _mm256_extract_epi8(n, 26));
355         printf(" %02x", _mm256_extract_epi8(n, 27));
356         printf(" %02x", _mm256_extract_epi8(n, 28));
357         printf(" %02x", _mm256_extract_epi8(n, 29));
358         printf(" %02x", _mm256_extract_epi8(n, 30));
359         printf(" %02x", _mm256_extract_epi8(n, 31));
360         printf("\n");
361 }
362 #endif
363
364 // Does a memcpy and memchr in one to reduce processing time.
365 // Note that the benefit is somewhat limited if your L3 cache is small,
366 // as you'll (unfortunately) spend most of the time loading the data
367 // from main memory.
368 //
369 // Complicated cases are left to the slow path; it basically stops copying
370 // up until the first instance of "sync_char" (usually a bit before, actually).
371 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
372 // data, and what we really need this for is the 00 00 ff ff marker in video data.
373 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
374 {
375         if (current_frame->data == nullptr ||
376             current_frame->len > current_frame->size ||
377             start == limit) {
378                 return start;
379         }
380         size_t orig_bytes = limit - start;
381         if (orig_bytes < 128) {
382                 // Don't bother.
383                 return start;
384         }
385
386         // Don't read more bytes than we can write.
387         limit = min(limit, start + (current_frame->size - current_frame->len));
388
389         // Align end to 32 bytes.
390         limit = (const uint8_t *)(intptr_t(limit) & ~31);
391
392         if (start >= limit) {
393                 return start;
394         }
395
396         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
397         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
398         if (aligned_start != start) {
399                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
400                 if (sync_start == nullptr) {
401                         add_to_frame(current_frame, "", start, aligned_start);
402                 } else {
403                         add_to_frame(current_frame, "", start, sync_start);
404                         return sync_start;
405                 }
406         }
407
408         // Make the length a multiple of 64.
409         if (current_frame->interleaved) {
410                 if (((limit - aligned_start) % 64) != 0) {
411                         limit -= 32;
412                 }
413                 assert(((limit - aligned_start) % 64) == 0);
414         }
415
416 #if __AVX2__
417         const __m256i needle = _mm256_set1_epi8(sync_char);
418
419         const __restrict __m256i *in = (const __m256i *)aligned_start;
420         if (current_frame->interleaved) {
421                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
422                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
423                 if (current_frame->len % 2 == 1) {
424                         swap(out1, out2);
425                 }
426
427                 __m256i shuffle_cw = _mm256_set_epi8(
428                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
429                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
430                 while (in < (const __m256i *)limit) {
431                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
432                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
433                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
434
435                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
436                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
437                         __m256i found = _mm256_or_si256(found1, found2);
438
439                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
440                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
441                 
442                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
443                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
444
445                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
446                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
447
448                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
449                         _mm256_storeu_si256(out2, hi);
450
451                         if (!_mm256_testz_si256(found, found)) {
452                                 break;
453                         }
454
455                         in += 2;
456                         ++out1;
457                         ++out2;
458                 }
459                 current_frame->len += (uint8_t *)in - aligned_start;
460         } else {
461                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
462                 while (in < (const __m256i *)limit) {
463                         __m256i data = _mm256_load_si256(in);
464                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
465                         __m256i found = _mm256_cmpeq_epi8(data, needle);
466                         if (!_mm256_testz_si256(found, found)) {
467                                 break;
468                         }
469
470                         ++in;
471                         ++out;
472                 }
473                 current_frame->len = (uint8_t *)out - current_frame->data;
474         }
475 #else
476         const __m128i needle = _mm_set1_epi8(sync_char);
477
478         const __m128i *in = (const __m128i *)aligned_start;
479         if (current_frame->interleaved) {
480                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
481                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
482                 if (current_frame->len % 2 == 1) {
483                         swap(out1, out2);
484                 }
485
486                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
487                 while (in < (const __m128i *)limit) {
488                         __m128i data1 = _mm_load_si128(in);
489                         __m128i data2 = _mm_load_si128(in + 1);
490                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
491                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
492                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
493                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
494                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
495                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
496                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
497                         _mm_storeu_si128(out2, hi);
498                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
499                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
500                         if (!_mm_testz_si128(found1, found1) ||
501                             !_mm_testz_si128(found2, found2)) {
502                                 break;
503                         }
504
505                         in += 2;
506                         ++out1;
507                         ++out2;
508                 }
509                 current_frame->len += (uint8_t *)in - aligned_start;
510         } else {
511                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
512                 while (in < (const __m128i *)limit) {
513                         __m128i data = _mm_load_si128(in);
514                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
515                         __m128i found = _mm_cmpeq_epi8(data, needle);
516                         if (!_mm_testz_si128(found, found)) {
517                                 break;
518                         }
519
520                         ++in;
521                         ++out;
522                 }
523                 current_frame->len = (uint8_t *)out - current_frame->data;
524         }
525 #endif
526
527         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
528
529         return (const uint8_t *)in;
530 }
531 #endif
532
533 void decode_packs(const libusb_transfer *xfr,
534                   const char *sync_pattern,
535                   int sync_length,
536                   FrameAllocator::Frame *current_frame,
537                   const char *frame_type_name,
538                   function<void(const uint8_t *start)> start_callback)
539 {
540         int offset = 0;
541         for (int i = 0; i < xfr->num_iso_packets; i++) {
542                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
543
544                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
545                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
546                         continue;
547 //exit(5);
548                 }
549
550                 const uint8_t *start = xfr->buffer + offset;
551                 const uint8_t *limit = start + pack->actual_length;
552                 while (start < limit) {  // Usually runs only one iteration.
553 #ifdef __SSE4_1__
554                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
555                         if (start == limit) break;
556                         assert(start < limit);
557 #endif
558
559                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
560                         if (start_next_frame == nullptr) {
561                                 // add the rest of the buffer
562                                 add_to_frame(current_frame, frame_type_name, start, limit);
563                                 break;
564                         } else {
565                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
566                                 start = start_next_frame + sync_length;  // skip sync
567                                 start_callback(start);
568                         }
569                 }
570 #if 0
571                 dump_pack(xfr, offset, pack);
572 #endif
573                 offset += pack->length;
574         }
575 }
576
577 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
578 {
579         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
580                 fprintf(stderr, "transfer status %d\n", xfr->status);
581                 libusb_free_transfer(xfr);
582                 exit(3);
583         }
584
585         assert(xfr->user_data != nullptr);
586         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
587
588         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
589                 if (xfr->endpoint == 0x84) {
590                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
591                 } else {
592                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
593                 }
594         }
595         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
596                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
597                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
598 #if 0
599                 if (setup->wIndex == 44) {
600                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
601                 } else {
602                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
603                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
604                 }
605 #else
606                 memcpy(usb->register_file + usb->current_register, buf, 4);
607                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
608                 if (usb->current_register == 0) {
609                         // read through all of them
610                         printf("register dump:");
611                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
612                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
613                         }
614                         printf("\n");
615                 }
616                 libusb_fill_control_setup(xfr->buffer,
617                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
618                         /*index=*/usb->current_register, /*length=*/4);
619 #endif
620         }
621
622 #if 0
623         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
624         for (i = 0; i < xfr->actual_length; i++) {
625                 printf("%02x", xfr->buffer[i]);
626                 if (i % 16)
627                         printf("\n");
628                 else if (i % 8)
629                         printf("  ");
630                 else
631                         printf(" ");
632         }
633 #endif
634
635         if (libusb_submit_transfer(xfr) < 0) {
636                 fprintf(stderr, "error re-submitting URB\n");
637                 exit(1);
638         }
639 }
640
641 void BMUSBCapture::usb_thread_func()
642 {
643         sched_param param;
644         memset(&param, 0, sizeof(param));
645         param.sched_priority = 1;
646         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
647                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
648         }
649         while (!should_quit) {
650                 int rc = libusb_handle_events(nullptr);
651                 if (rc != LIBUSB_SUCCESS)
652                         break;
653         }
654 }
655
656 void BMUSBCapture::configure_card()
657 {
658         if (video_frame_allocator == nullptr) {
659                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
660         }
661         if (audio_frame_allocator == nullptr) {
662                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
663         }
664         dequeue_thread_should_quit = false;
665         dequeue_thread = thread(&BMUSBCapture::dequeue_thread_func, this);
666
667         int rc;
668         struct libusb_transfer *xfr;
669
670         rc = libusb_init(nullptr);
671         if (rc < 0) {
672                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
673                 exit(1);
674         }
675
676         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
677         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
678         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
679         if (!devh) {
680                 fprintf(stderr, "Error finding USB device\n");
681                 exit(1);
682         }
683
684         libusb_config_descriptor *config;
685         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
686         if (rc < 0) {
687                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
688                 exit(1);
689         }
690         printf("%d interface\n", config->bNumInterfaces);
691         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
692                 printf("  interface %d\n", interface_number);
693                 const libusb_interface *interface = &config->interface[interface_number];
694                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
695                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
696                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
697                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
698                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
699                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
700                         }
701                 }
702         }
703
704         rc = libusb_set_configuration(devh, /*configuration=*/1);
705         if (rc < 0) {
706                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
707                 exit(1);
708         }
709
710         rc = libusb_claim_interface(devh, 0);
711         if (rc < 0) {
712                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
713                 exit(1);
714         }
715
716         // Alternate setting 1 is output, alternate setting 2 is input.
717         // Card is reset when switching alternates, so the driver uses
718         // this “double switch” when it wants to reset.
719         //
720         // There's also alternate settings 3 and 4, which seem to be
721         // like 1 and 2 except they advertise less bandwidth needed.
722         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
723         if (rc < 0) {
724                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
725                 exit(1);
726         }
727         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
728         if (rc < 0) {
729                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
730                 exit(1);
731         }
732 #if 0
733         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
734         if (rc < 0) {
735                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
736                 exit(1);
737         }
738 #endif
739
740 #if 0
741         rc = libusb_claim_interface(devh, 3);
742         if (rc < 0) {
743                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
744                 exit(1);
745         }
746 #endif
747
748         // theories:
749         //   44 is some kind of timer register (first 16 bits count upwards)
750         //   24 is some sort of watchdog?
751         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
752         //      (or will go to 0x73c60010?), also seen 0x73c60100
753         //   12 also changes all the time, unclear why  
754         //   16 seems to be autodetected mode somehow
755         //      --    this is e00115e0 after reset?
756         //                    ed0115e0 after mode change [to output?]
757         //                    2d0015e0 after more mode change [to input]
758         //                    ed0115e0 after more mode change
759         //                    2d0015e0 after more mode change
760         //
761         //                    390115e0 seems to indicate we have signal
762         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
763         //
764         //                    200015e0 on startup
765         //         changes to 250115e0 when we sync to the signal
766         //
767         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
768         //
769         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
770         //
771         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
772         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
773         //
774         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
775         //    perhaps some of them are related to analog output?
776         //
777         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
778         //    but the driver sets it to 0x8036802a at some point.
779         //
780         //    all of this is on request 214/215. other requests (192, 219,
781         //    222, 223, 224) are used for firmware upgrade. Probably best to
782         //    stay out of it unless you know what you're doing.
783         //
784         //
785         // register 16:
786         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
787         //
788         // theories:
789         //   0x01 - stable signal
790         //   0x04 - deep color
791         //   0x08 - unknown (audio??)
792         //   0x20 - 720p??
793         //   0x30 - 576p??
794
795         struct ctrl {
796                 int endpoint;
797                 int request;
798                 int index;
799                 uint32_t data;
800         };
801         static const ctrl ctrls[] = {
802                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
803                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
804
805                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
806                 // capture (v210).
807                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
808                 // 0x10000000 = analog audio instead of embedded audio, it seems
809                 // 0x3a000000 = component video? (analog audio)
810                 // 0x3c000000 = composite video? (analog audio)
811                 // 0x3e000000 = s-video? (analog audio)
812                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
813                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
814                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
815                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
816                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
817         };
818
819         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
820                 uint32_t flipped = htonl(ctrls[req].data);
821                 static uint8_t value[4];
822                 memcpy(value, &flipped, sizeof(flipped));
823                 int size = sizeof(value);
824                 //if (ctrls[req].request == 215) size = 0;
825                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
826                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
827                 if (rc < 0) {
828                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
829                         exit(1);
830                 }
831                 
832                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
833                 for (int i = 0; i < rc; ++i) {
834                         printf("%02x", value[i]);
835                 }
836                 printf("\n");
837         }
838
839 #if 0
840         // DEBUG
841         for ( ;; ) {
842                 static int my_index = 0;
843                 static uint8_t value[4];
844                 int size = sizeof(value);
845                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
846                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
847                 if (rc < 0) {
848                         fprintf(stderr, "Error on control\n");
849                         exit(1);
850                 }
851                 printf("rc=%d index=%d: 0x", rc, my_index);
852                 for (int i = 0; i < rc; ++i) {
853                         printf("%02x", value[i]);
854                 }
855                 printf("\n");
856         }
857 #endif
858
859 #if 0
860         // set up an asynchronous transfer of the timer register
861         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
862         static int completed = 0;
863
864         xfr = libusb_alloc_transfer(0);
865         libusb_fill_control_setup(cmdbuf,
866             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
867                 /*index=*/44, /*length=*/4);
868         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
869         xfr->user_data = this;
870         libusb_submit_transfer(xfr);
871
872         // set up an asynchronous transfer of register 24
873         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
874         static int completed2 = 0;
875
876         xfr = libusb_alloc_transfer(0);
877         libusb_fill_control_setup(cmdbuf2,
878             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
879                 /*index=*/24, /*length=*/4);
880         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
881         xfr->user_data = this;
882         libusb_submit_transfer(xfr);
883 #endif
884
885         // set up an asynchronous transfer of the register dump
886         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
887         static int completed3 = 0;
888
889         xfr = libusb_alloc_transfer(0);
890         libusb_fill_control_setup(cmdbuf3,
891             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
892                 /*index=*/current_register, /*length=*/4);
893         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
894         xfr->user_data = this;
895         //libusb_submit_transfer(xfr);
896
897         audiofp = fopen("audio.raw", "wb");
898
899         // set up isochronous transfers for audio and video
900         for (int e = 3; e <= 4; ++e) {
901                 //int num_transfers = (e == 3) ? 6 : 6;
902                 int num_transfers = 6;
903                 for (int i = 0; i < num_transfers; ++i) {
904                         int num_iso_pack, size;
905                         if (e == 3) {
906                                 // Video seems to require isochronous packets scaled with the width; 
907                                 // seemingly six lines is about right, rounded up to the required 1kB
908                                 // multiple.
909                                 size = WIDTH * 2 * 6;
910                                 // Note that for 10-bit input, you'll need to increase size accordingly.
911                                 //size = size * 4 / 3;
912                                 if (size % 1024 != 0) {
913                                         size &= ~1023;
914                                         size += 1024;
915                                 }
916                                 num_iso_pack = (2 << 18) / size;  // 512 kB.
917                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
918                         } else {
919                                 size = 0xc0;
920                                 num_iso_pack = 80;
921                         }
922                         int num_bytes = num_iso_pack * size;
923                         uint8_t *buf = new uint8_t[num_bytes];
924
925                         xfr = libusb_alloc_transfer(num_iso_pack);
926                         if (!xfr) {
927                                 fprintf(stderr, "oom\n");
928                                 exit(1);
929                         }
930
931                         int ep = LIBUSB_ENDPOINT_IN | e;
932                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
933                                 num_iso_pack, cb_xfr, nullptr, 0);
934                         libusb_set_iso_packet_lengths(xfr, size);
935                         xfr->user_data = this;
936                         iso_xfrs.push_back(xfr);
937                 }
938         }
939 }
940
941 void BMUSBCapture::start_bm_capture()
942 {
943         printf("starting capture\n");
944         int i = 0;
945         for (libusb_transfer *xfr : iso_xfrs) {
946                 printf("submitting transfer...\n");
947                 int rc = libusb_submit_transfer(xfr);
948                 ++i;
949                 if (rc < 0) {
950                         //printf("num_bytes=%d\n", num_bytes);
951                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
952                                 xfr->endpoint, i, libusb_error_name(rc));
953                         exit(1);
954                 }
955         }
956
957
958 #if 0
959         libusb_release_interface(devh, 0);
960 out:
961         if (devh)
962                 libusb_close(devh);
963         libusb_exit(nullptr);
964         return rc;
965 #endif
966 }
967
968 void BMUSBCapture::stop_dequeue_thread()
969 {
970         dequeue_thread_should_quit = true;
971         queues_not_empty.notify_all();
972         dequeue_thread.join();
973 }
974
975 void BMUSBCapture::start_bm_thread()
976 {
977         should_quit = false;
978         usb_thread = thread(&BMUSBCapture::usb_thread_func);
979 }
980
981 void BMUSBCapture::stop_bm_thread()
982 {
983         should_quit = true;
984         usb_thread.join();
985 }