]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Support multiple cards at the same time (although currently, they have to be of diffe...
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <libusb.h>
10 #include <arpa/inet.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <fcntl.h>
14 #include <stdint.h>
15 #include <assert.h>
16 #ifdef __SSE2__
17 #include <immintrin.h>
18 #endif
19 #include <algorithm>
20 #include <functional>
21 #include <memory>
22 #include <deque>
23 #include <utility>
24 #include <mutex>
25 #include <condition_variable>
26 #include <thread>
27 #include <stack>
28 #include <atomic>
29 #include "bmusb.h"
30
31 using namespace std;
32 using namespace std::placeholders;
33
34 #define WIDTH 1280
35 #define HEIGHT 750  /* 30 lines ancillary data? */
36 //#define WIDTH 1920
37 //#define HEIGHT 1125  /* ??? lines ancillary data? */
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
41
42 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
43 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
44 #define FRAME_SIZE (8 << 20)
45
46 FILE *audiofp;
47
48 thread usb_thread;
49 atomic<bool> should_quit;
50
51 FrameAllocator::~FrameAllocator() {}
52
53 #define NUM_QUEUED_FRAMES 8
54 class MallocFrameAllocator : public FrameAllocator {
55 public:
56         MallocFrameAllocator(size_t frame_size);
57         Frame alloc_frame() override;
58         void release_frame(Frame frame) override;
59
60 private:
61         size_t frame_size;
62
63         mutex freelist_mutex;
64         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
65 };
66
67 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
68         : frame_size(frame_size)
69 {
70         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
71                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
72         }
73 }
74
75 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
76 {
77         Frame vf;
78         vf.owner = this;
79
80         unique_lock<mutex> lock(freelist_mutex);  // Meh.
81         if (freelist.empty()) {
82                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
83                         frame_size);
84         } else {
85                 vf.data = freelist.top().release();
86                 vf.size = frame_size;
87                 freelist.pop();  // Meh.
88         }
89         return vf;
90 }
91
92 void MallocFrameAllocator::release_frame(Frame frame)
93 {
94         unique_lock<mutex> lock(freelist_mutex);
95         freelist.push(unique_ptr<uint8_t[]>(frame.data));
96 }
97
98 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
99 {
100         if (a == b) {
101                 return false;
102         } else if (a < b) {
103                 return (b - a < 0x8000);
104         } else {
105                 int wrap_b = 0x10000 + int(b);
106                 return (wrap_b - a < 0x8000);
107         }
108 }
109
110 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
111 {
112         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
113                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
114                         q->back().timecode, timecode);
115                 frame.owner->release_frame(frame);
116                 return;
117         }
118
119         QueuedFrame qf;
120         qf.format = format;
121         qf.timecode = timecode;
122         qf.frame = frame;
123
124         {
125                 unique_lock<mutex> lock(queue_lock);
126                 q->push_back(move(qf));
127         }
128         queues_not_empty.notify_one();  // might be spurious
129 }
130
131 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
132 {
133         FILE *fp = fopen(filename, "wb");
134         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
135                 printf("short write!\n");
136         }
137         fclose(fp);
138 }
139
140 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
141 {
142         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
143 }
144
145 void BMUSBCapture::dequeue_thread()
146 {
147         for ( ;; ) {
148                 unique_lock<mutex> lock(queue_lock);
149                 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
150
151                 uint16_t video_timecode = pending_video_frames.front().timecode;
152                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
153                 if (video_timecode < audio_timecode) {
154                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
155                                 video_timecode);
156                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
157                         pending_video_frames.pop_front();
158                 } else if (audio_timecode < video_timecode) {
159                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
160                                 audio_timecode);
161                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
162                         pending_audio_frames.pop_front();
163                 } else {
164                         QueuedFrame video_frame = pending_video_frames.front();
165                         QueuedFrame audio_frame = pending_audio_frames.front();
166                         pending_audio_frames.pop_front();
167                         pending_video_frames.pop_front();
168                         lock.unlock();
169
170 #if 0
171                         char filename[255];
172                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
173                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
174                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
175 #endif
176
177                         frame_callback(video_timecode,
178                                        video_frame.frame, HEADER_SIZE, video_frame.format,
179                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
180                 }
181         }
182 }
183
184 void BMUSBCapture::start_new_frame(const uint8_t *start)
185 {
186         uint16_t format = (start[3] << 8) | start[2];
187         uint16_t timecode = (start[1] << 8) | start[0];
188
189         if (current_video_frame.len > 0) {
190                 //dump_frame();
191                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
192         }
193         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
194         //      format, timecode,
195         //      //start[7], start[6], start[5], start[4],
196         //      read_current_frame, FRAME_SIZE);
197
198         current_video_frame = video_frame_allocator->alloc_frame();
199         //if (current_video_frame.data == nullptr) {
200         //      read_current_frame = -1;
201         //} else {
202         //      read_current_frame = 0;
203         //}
204 }
205
206 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
207 {
208         uint16_t format = (start[3] << 8) | start[2];
209         uint16_t timecode = (start[1] << 8) | start[0];
210         if (current_audio_frame.len > 0) {
211                 //dump_audio_block();
212                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
213         }
214         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
215         //      format, timecode, read_current_audio_block);
216         current_audio_frame = audio_frame_allocator->alloc_frame();
217 }
218
219 #if 0
220 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
221 {
222         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
223         for (unsigned j = 0; j < pack->actual_length; j++) {
224         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
225                 printf("%02x", xfr->buffer[j + offset]);
226                 if ((j % 16) == 15)
227                         printf("\n");
228                 else if ((j % 8) == 7)
229                         printf("  ");
230                 else
231                         printf(" ");
232         }
233 }
234 #endif
235
236 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
237 {
238         assert(n % 2 == 0);
239         uint8_t *dptr1 = dest1;
240         uint8_t *dptr2 = dest2;
241
242         for (size_t i = 0; i < n; i += 2) {
243                 *dptr1++ = *src++;
244                 *dptr2++ = *src++;
245         }
246 }
247
248 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
249 {
250         if (current_frame->data == nullptr ||
251             current_frame->len > current_frame->size ||
252             start == end) {
253                 return;
254         }
255
256         int bytes = end - start;
257         if (current_frame->len + bytes > current_frame->size) {
258                 printf("%d bytes overflow after last %s frame\n",
259                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
260                 //dump_frame();
261         } else {
262                 if (current_frame->interleaved) {
263                         uint8_t *data = current_frame->data + current_frame->len / 2;
264                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
265                         if (current_frame->len % 2 == 1) {
266                                 ++data;
267                                 swap(data, data2);
268                         }
269                         if (bytes % 2 == 1) {
270                                 *data++ = *start++;
271                                 swap(data, data2);
272                                 ++current_frame->len;
273                                 --bytes;
274                         }
275                         memcpy_interleaved(data, data2, start, bytes);
276                         current_frame->len += bytes;
277                 } else {
278                         memcpy(current_frame->data + current_frame->len, start, bytes);
279                         current_frame->len += bytes;
280                 }
281         }
282 }
283
284 #ifdef __SSE2__
285
286 // Does a memcpy and memchr in one to reduce processing time.
287 // Note that the benefit is somewhat limited if your L3 cache is small,
288 // as you'll (unfortunately) spend most of the time loading the data
289 // from main memory.
290 //
291 // Complicated cases are left to the slow path; it basically stops copying
292 // up until the first instance of "sync_char" (usually a bit before, actually).
293 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
294 // data, and what we really need this for is the 00 00 ff ff marker in video data.
295 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
296 {
297         if (current_frame->data == nullptr ||
298             current_frame->len > current_frame->size ||
299             start == limit) {
300                 return start;
301         }
302         size_t orig_bytes = limit - start;
303         if (orig_bytes < 128) {
304                 // Don't bother.
305                 return start;
306         }
307
308         // Don't read more bytes than we can write.
309         limit = min(limit, start + (current_frame->size - current_frame->len));
310
311         // Align end to 32 bytes.
312         limit = (const uint8_t *)(intptr_t(limit) & ~31);
313
314         if (start >= limit) {
315                 return start;
316         }
317
318         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
319         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
320         if (aligned_start != start) {
321                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
322                 if (sync_start == nullptr) {
323                         add_to_frame(current_frame, "", start, aligned_start);
324                 } else {
325                         add_to_frame(current_frame, "", start, sync_start);
326                         return sync_start;
327                 }
328         }
329
330         // Make the length a multiple of 64.
331         if (current_frame->interleaved) {
332                 if (((limit - aligned_start) % 64) != 0) {
333                         limit -= 32;
334                 }
335                 assert(((limit - aligned_start) % 64) == 0);
336         }
337
338 #if __AVX2__
339         const __m256i needle = _mm256_set1_epi8(sync_char);
340
341         const __restrict __m256i *in = (const __m256i *)aligned_start;
342         if (current_frame->interleaved) {
343                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
344                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
345                 if (current_frame->len % 2 == 1) {
346                         swap(out1, out2);
347                 }
348
349                 __m256i shuffle_cw = _mm256_set_epi8(
350                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
351                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
352                 while (in < (const __m256i *)limit) {
353                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
354                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
355                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
356
357                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
358                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
359                         __m256i found = _mm256_or_si256(found1, found2);
360
361                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
362                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
363                 
364                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
365                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
366
367                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
368                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
369
370                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
371                         _mm256_storeu_si256(out2, hi);
372
373                         if (!_mm256_testz_si256(found, found)) {
374                                 break;
375                         }
376
377                         in += 2;
378                         ++out1;
379                         ++out2;
380                 }
381                 current_frame->len += (uint8_t *)in - aligned_start;
382         } else {
383                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
384                 while (in < (const __m256i *)limit) {
385                         __m256i data = _mm256_load_si256(in);
386                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
387                         __m256i found = _mm256_cmpeq_epi8(data, needle);
388                         if (!_mm256_testz_si256(found, found)) {
389                                 break;
390                         }
391
392                         ++in;
393                         ++out;
394                 }
395                 current_frame->len = (uint8_t *)out - current_frame->data;
396         }
397 #else
398         const __m128i needle = _mm_set1_epi8(sync_char);
399
400         const __m128i *in = (const __m128i *)aligned_start;
401         if (current_frame->interleaved) {
402                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
403                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
404                 if (current_frame->len % 2 == 1) {
405                         swap(out1, out2);
406                 }
407
408                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
409                 while (in < (const __m128i *)limit) {
410                         __m128i data1 = _mm_load_si128(in);
411                         __m128i data2 = _mm_load_si128(in + 1);
412                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
413                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
414                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
415                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
416                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
417                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
418                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
419                         _mm_storeu_si128(out2, hi);
420                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
421                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
422                         if (!_mm_testz_si128(found1, found1) ||
423                             !_mm_testz_si128(found2, found2)) {
424                                 break;
425                         }
426
427                         in += 2;
428                         ++out1;
429                         ++out2;
430                 }
431                 current_frame->len += (uint8_t *)in - aligned_start;
432         } else {
433                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
434                 while (in < (const __m128i *)limit) {
435                         __m128i data = _mm_load_si128(in);
436                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
437                         __m128i found = _mm_cmpeq_epi8(data, needle);
438                         if (!_mm_testz_si128(found, found)) {
439                                 break;
440                         }
441
442                         ++in;
443                         ++out;
444                 }
445                 current_frame->len = (uint8_t *)out - current_frame->data;
446         }
447 #endif
448
449         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
450
451         return (const uint8_t *)in;
452 }
453 #endif
454
455 void decode_packs(const libusb_transfer *xfr,
456                   const char *sync_pattern,
457                   int sync_length,
458                   FrameAllocator::Frame *current_frame,
459                   const char *frame_type_name,
460                   function<void(const uint8_t *start)> start_callback)
461 {
462         int offset = 0;
463         for (int i = 0; i < xfr->num_iso_packets; i++) {
464                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
465
466                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
467                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
468                         continue;
469 //exit(5);
470                 }
471
472                 const uint8_t *start = xfr->buffer + offset;
473                 const uint8_t *limit = start + pack->actual_length;
474                 while (start < limit) {  // Usually runs only one iteration.
475 #ifdef __SSE2__
476                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
477                         if (start == limit) break;
478                         assert(start < limit);
479 #endif
480
481                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
482                         if (start_next_frame == nullptr) {
483                                 // add the rest of the buffer
484                                 add_to_frame(current_frame, frame_type_name, start, limit);
485                                 break;
486                         } else {
487                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
488                                 start = start_next_frame + sync_length;  // skip sync
489                                 start_callback(start);
490                         }
491                 }
492 #if 0
493                 dump_pack(xfr, offset, pack);
494 #endif
495                 offset += pack->length;
496         }
497 }
498
499 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
500 {
501         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
502                 fprintf(stderr, "transfer status %d\n", xfr->status);
503                 libusb_free_transfer(xfr);
504                 exit(3);
505         }
506
507         assert(xfr->user_data != nullptr);
508         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
509
510         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
511                 if (xfr->endpoint == 0x84) {
512                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
513                 } else {
514                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
515                 }
516         }
517         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
518                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
519                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
520 #if 0
521                 if (setup->wIndex == 44) {
522                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
523                 } else {
524                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
525                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
526                 }
527 #else
528                 memcpy(usb->register_file + usb->current_register, buf, 4);
529                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
530                 if (usb->current_register == 0) {
531                         // read through all of them
532                         printf("register dump:");
533                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
534                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
535                         }
536                         printf("\n");
537                 }
538                 libusb_fill_control_setup(xfr->buffer,
539                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
540                         /*index=*/usb->current_register, /*length=*/4);
541 #endif
542         }
543
544 #if 0
545         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
546         for (i = 0; i < xfr->actual_length; i++) {
547                 printf("%02x", xfr->buffer[i]);
548                 if (i % 16)
549                         printf("\n");
550                 else if (i % 8)
551                         printf("  ");
552                 else
553                         printf(" ");
554         }
555 #endif
556
557         if (libusb_submit_transfer(xfr) < 0) {
558                 fprintf(stderr, "error re-submitting URB\n");
559                 exit(1);
560         }
561 }
562
563 void BMUSBCapture::usb_thread_func()
564 {
565         printf("usb thread started\n");
566
567         sched_param param;
568         memset(&param, 0, sizeof(param));
569         param.sched_priority = 1;
570         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
571                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
572         }
573         while (!should_quit) {
574                 int rc = libusb_handle_events(nullptr);
575                 if (rc != LIBUSB_SUCCESS)
576                         break;
577         }
578 }
579
580 void BMUSBCapture::configure_card()
581 {
582         if (video_frame_allocator == nullptr) {
583                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
584         }
585         if (audio_frame_allocator == nullptr) {
586                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
587         }
588         thread(&BMUSBCapture::dequeue_thread, this).detach();
589
590         int rc;
591         struct libusb_transfer *xfr;
592
593         rc = libusb_init(nullptr);
594         if (rc < 0) {
595                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
596                 exit(1);
597         }
598
599         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
600         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
601         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
602         if (!devh) {
603                 fprintf(stderr, "Error finding USB device\n");
604                 exit(1);
605         }
606
607         libusb_config_descriptor *config;
608         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
609         if (rc < 0) {
610                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
611                 exit(1);
612         }
613         printf("%d interface\n", config->bNumInterfaces);
614         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
615                 printf("  interface %d\n", interface_number);
616                 const libusb_interface *interface = &config->interface[interface_number];
617                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
618                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
619                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
620                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
621                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
622                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
623                         }
624                 }
625         }
626
627         rc = libusb_set_configuration(devh, /*configuration=*/1);
628         if (rc < 0) {
629                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
630                 exit(1);
631         }
632
633         rc = libusb_claim_interface(devh, 0);
634         if (rc < 0) {
635                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
636                 exit(1);
637         }
638
639         // Alternate setting 1 is output, alternate setting 2 is input.
640         // Card is reset when switching alternates, so the driver uses
641         // this “double switch” when it wants to reset.
642         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
643         if (rc < 0) {
644                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
645                 exit(1);
646         }
647         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
648         if (rc < 0) {
649                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
650                 exit(1);
651         }
652 #if 0
653         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
654         if (rc < 0) {
655                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
656                 exit(1);
657         }
658 #endif
659
660 #if 0
661         rc = libusb_claim_interface(devh, 3);
662         if (rc < 0) {
663                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
664                 exit(1);
665         }
666 #endif
667
668         // theories:
669         //   44 is some kind of timer register (first 16 bits count upwards)
670         //   24 is some sort of watchdog?
671         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
672         //      (or will go to 0x73c60010?), also seen 0x73c60100
673         //   12 also changes all the time, unclear why  
674         //   16 seems to be autodetected mode somehow
675         //      --    this is e00115e0 after reset?
676         //                    ed0115e0 after mode change [to output?]
677         //                    2d0015e0 after more mode change [to input]
678         //                    ed0115e0 after more mode change
679         //                    2d0015e0 after more mode change
680         //
681         //                    390115e0 seems to indicate we have signal
682         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
683         //
684         //                    200015e0 on startup
685         //         changes to 250115e0 when we sync to the signal
686         //
687         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
688         //
689         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
690         //
691         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
692         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
693         //
694         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
695         //    perhaps some of them are related to analog output?
696         //
697         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
698         //    but the driver sets it to 0x8036802a at some point.
699         //
700         //    all of this is on request 214/215. other requests (192, 219,
701         //    222, 223, 224) are used for firmware upgrade. Probably best to
702         //    stay out of it unless you know what you're doing.
703         //
704         //
705         // register 16:
706         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
707         //
708         // theories:
709         //   0x01 - stable signal
710         //   0x04 - deep color
711         //   0x08 - unknown (audio??)
712         //   0x20 - 720p??
713         //   0x30 - 576p??
714
715         struct ctrl {
716                 int endpoint;
717                 int request;
718                 int index;
719                 uint32_t data;
720         };
721         static const ctrl ctrls[] = {
722                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
723                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
724
725                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
726                 // capture (v210).
727                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
728                 // 0x10000000 = analog audio instead of embedded audio, it seems
729                 // 0x3a000000 = component video? (analog audio)
730                 // 0x3c000000 = composite video? (analog audio)
731                 // 0x3e000000 = s-video? (analog audio)
732                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
733                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
734                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
735                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
736                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
737         };
738
739         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
740                 uint32_t flipped = htonl(ctrls[req].data);
741                 static uint8_t value[4];
742                 memcpy(value, &flipped, sizeof(flipped));
743                 int size = sizeof(value);
744                 //if (ctrls[req].request == 215) size = 0;
745                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
746                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
747                 if (rc < 0) {
748                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
749                         exit(1);
750                 }
751                 
752                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
753                 for (int i = 0; i < rc; ++i) {
754                         printf("%02x", value[i]);
755                 }
756                 printf("\n");
757         }
758
759 #if 0
760         // DEBUG
761         for ( ;; ) {
762                 static int my_index = 0;
763                 static uint8_t value[4];
764                 int size = sizeof(value);
765                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
766                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
767                 if (rc < 0) {
768                         fprintf(stderr, "Error on control\n");
769                         exit(1);
770                 }
771                 printf("rc=%d index=%d: 0x", rc, my_index);
772                 for (int i = 0; i < rc; ++i) {
773                         printf("%02x", value[i]);
774                 }
775                 printf("\n");
776         }
777 #endif
778
779 #if 0
780         // set up an asynchronous transfer of the timer register
781         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
782         static int completed = 0;
783
784         xfr = libusb_alloc_transfer(0);
785         libusb_fill_control_setup(cmdbuf,
786             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
787                 /*index=*/44, /*length=*/4);
788         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
789         xfr->user_data = this;
790         libusb_submit_transfer(xfr);
791
792         // set up an asynchronous transfer of register 24
793         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
794         static int completed2 = 0;
795
796         xfr = libusb_alloc_transfer(0);
797         libusb_fill_control_setup(cmdbuf2,
798             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
799                 /*index=*/24, /*length=*/4);
800         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
801         xfr->user_data = this;
802         libusb_submit_transfer(xfr);
803 #endif
804
805         // set up an asynchronous transfer of the register dump
806         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
807         static int completed3 = 0;
808
809         xfr = libusb_alloc_transfer(0);
810         libusb_fill_control_setup(cmdbuf3,
811             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
812                 /*index=*/current_register, /*length=*/4);
813         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
814         xfr->user_data = this;
815         //libusb_submit_transfer(xfr);
816
817         audiofp = fopen("audio.raw", "wb");
818
819         // set up isochronous transfers for audio and video
820         for (int e = 3; e <= 4; ++e) {
821                 //int num_transfers = (e == 3) ? 6 : 6;
822                 int num_transfers = 6;
823                 for (int i = 0; i < num_transfers; ++i) {
824                         int num_iso_pack, size;
825                         if (e == 3) {
826                                 // Video seems to require isochronous packets scaled with the width; 
827                                 // seemingly six lines is about right, rounded up to the required 1kB
828                                 // multiple.
829                                 size = WIDTH * 2 * 6;
830                                 // Note that for 10-bit input, you'll need to increase size accordingly.
831                                 //size = size * 4 / 3;
832                                 if (size % 1024 != 0) {
833                                         size &= ~1023;
834                                         size += 1024;
835                                 }
836                                 num_iso_pack = (2 << 18) / size;  // 512 kB.
837                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
838                         } else {
839                                 size = 0xc0;
840                                 num_iso_pack = 80;
841                         }
842                         int num_bytes = num_iso_pack * size;
843                         uint8_t *buf = new uint8_t[num_bytes];
844
845                         xfr = libusb_alloc_transfer(num_iso_pack);
846                         if (!xfr) {
847                                 fprintf(stderr, "oom\n");
848                                 exit(1);
849                         }
850
851                         int ep = LIBUSB_ENDPOINT_IN | e;
852                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
853                                 num_iso_pack, cb_xfr, nullptr, 0);
854                         libusb_set_iso_packet_lengths(xfr, size);
855                         xfr->user_data = this;
856                         iso_xfrs.push_back(xfr);
857                 }
858         }
859 }
860
861 void BMUSBCapture::start_bm_capture()
862 {
863         printf("starting capture\n");
864         int i = 0;
865         for (libusb_transfer *xfr : iso_xfrs) {
866                 printf("submitting transfer...\n");
867                 int rc = libusb_submit_transfer(xfr);
868                 ++i;
869                 if (rc < 0) {
870                         //printf("num_bytes=%d\n", num_bytes);
871                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
872                                 xfr->endpoint, i, libusb_error_name(rc));
873                         exit(1);
874                 }
875         }
876
877
878 #if 0
879         libusb_release_interface(devh, 0);
880 out:
881         if (devh)
882                 libusb_close(devh);
883         libusb_exit(nullptr);
884         return rc;
885 #endif
886 }
887
888 void BMUSBCapture::start_bm_thread()
889 {
890         should_quit = false;
891         usb_thread = thread(&BMUSBCapture::usb_thread_func);
892 }
893
894 void BMUSBCapture::stop_bm_thread()
895 {
896         should_quit = true;
897         usb_thread.join();
898 }