]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Remove a lot of control transfers (they were from the day when I hardly understood...
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <libusb.h>
10 #include <arpa/inet.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <fcntl.h>
14 #include <stdint.h>
15 #include <assert.h>
16 #ifdef __SSE2__
17 #include <immintrin.h>
18 #endif
19 #include <algorithm>
20 #include <functional>
21 #include <memory>
22 #include <deque>
23 #include <utility>
24 #include <mutex>
25 #include <condition_variable>
26 #include <thread>
27 #include <stack>
28 #include <atomic>
29 #include "bmusb.h"
30
31 using namespace std;
32 using namespace std::placeholders;
33
34 #define WIDTH 1280
35 #define HEIGHT 750  /* 30 lines ancillary data? */
36 //#define WIDTH 1920
37 //#define HEIGHT 1125  /* ??? lines ancillary data? */
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
41
42 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
43 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
44 #define FRAME_SIZE (8 << 20)
45
46 FILE *audiofp;
47
48 FrameAllocator::~FrameAllocator() {}
49
50 #define NUM_QUEUED_FRAMES 8
51 class MallocFrameAllocator : public FrameAllocator {
52 public:
53         MallocFrameAllocator(size_t frame_size);
54         Frame alloc_frame() override;
55         void release_frame(Frame frame) override;
56
57 private:
58         size_t frame_size;
59
60         mutex freelist_mutex;
61         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
62 };
63
64 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
65         : frame_size(frame_size)
66 {
67         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
68                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
69         }
70 }
71
72 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
73 {
74         Frame vf;
75         vf.owner = this;
76
77         unique_lock<mutex> lock(freelist_mutex);  // Meh.
78         if (freelist.empty()) {
79                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
80                         frame_size);
81         } else {
82                 vf.data = freelist.top().release();
83                 vf.size = frame_size;
84                 freelist.pop();  // Meh.
85         }
86         return vf;
87 }
88
89 void MallocFrameAllocator::release_frame(Frame frame)
90 {
91         unique_lock<mutex> lock(freelist_mutex);
92         freelist.push(unique_ptr<uint8_t[]>(frame.data));
93 }
94
95 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
96 {
97         if (a == b) {
98                 return false;
99         } else if (a < b) {
100                 return (b - a < 0x8000);
101         } else {
102                 int wrap_b = 0x10000 + int(b);
103                 return (wrap_b - a < 0x8000);
104         }
105 }
106
107 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
108 {
109         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
110                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
111                         q->back().timecode, timecode);
112                 frame.owner->release_frame(frame);
113                 return;
114         }
115
116         QueuedFrame qf;
117         qf.format = format;
118         qf.timecode = timecode;
119         qf.frame = frame;
120
121         {
122                 unique_lock<mutex> lock(queue_lock);
123                 q->push_back(move(qf));
124         }
125         queues_not_empty.notify_one();  // might be spurious
126 }
127
128 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
129 {
130         FILE *fp = fopen(filename, "wb");
131         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
132                 printf("short write!\n");
133         }
134         fclose(fp);
135 }
136
137 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
138 {
139         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
140 }
141
142 void BMUSBCapture::dequeue_thread()
143 {
144         for ( ;; ) {
145                 unique_lock<mutex> lock(queue_lock);
146                 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
147
148                 uint16_t video_timecode = pending_video_frames.front().timecode;
149                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
150                 if (video_timecode < audio_timecode) {
151                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
152                                 video_timecode);
153                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
154                         pending_video_frames.pop_front();
155                 } else if (audio_timecode < video_timecode) {
156                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
157                                 audio_timecode);
158                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
159                         pending_audio_frames.pop_front();
160                 } else {
161                         QueuedFrame video_frame = pending_video_frames.front();
162                         QueuedFrame audio_frame = pending_audio_frames.front();
163                         pending_audio_frames.pop_front();
164                         pending_video_frames.pop_front();
165                         lock.unlock();
166
167 #if 0
168                         char filename[255];
169                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
170                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
171                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
172 #endif
173
174                         frame_callback(video_timecode,
175                                        video_frame.frame, HEADER_SIZE, video_frame.format,
176                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
177                 }
178         }
179 }
180
181 void BMUSBCapture::start_new_frame(const uint8_t *start)
182 {
183         uint16_t format = (start[3] << 8) | start[2];
184         uint16_t timecode = (start[1] << 8) | start[0];
185
186         if (current_video_frame.len > 0) {
187                 //dump_frame();
188                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
189         }
190         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
191         //      format, timecode,
192         //      //start[7], start[6], start[5], start[4],
193         //      read_current_frame, FRAME_SIZE);
194
195         current_video_frame = video_frame_allocator->alloc_frame();
196         //if (current_video_frame.data == nullptr) {
197         //      read_current_frame = -1;
198         //} else {
199         //      read_current_frame = 0;
200         //}
201 }
202
203 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
204 {
205         uint16_t format = (start[3] << 8) | start[2];
206         uint16_t timecode = (start[1] << 8) | start[0];
207         if (current_audio_frame.len > 0) {
208                 //dump_audio_block();
209                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
210         }
211         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
212         //      format, timecode, read_current_audio_block);
213         current_audio_frame = audio_frame_allocator->alloc_frame();
214 }
215
216 #if 0
217 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
218 {
219         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
220         for (unsigned j = 0; j < pack->actual_length; j++) {
221         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
222                 printf("%02x", xfr->buffer[j + offset]);
223                 if ((j % 16) == 15)
224                         printf("\n");
225                 else if ((j % 8) == 7)
226                         printf("  ");
227                 else
228                         printf(" ");
229         }
230 }
231 #endif
232
233 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
234 {
235         assert(n % 2 == 0);
236         uint8_t *dptr1 = dest1;
237         uint8_t *dptr2 = dest2;
238
239         for (size_t i = 0; i < n; i += 2) {
240                 *dptr1++ = *src++;
241                 *dptr2++ = *src++;
242         }
243 }
244
245 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
246 {
247         if (current_frame->data == nullptr ||
248             current_frame->len > current_frame->size ||
249             start == end) {
250                 return;
251         }
252
253         int bytes = end - start;
254         if (current_frame->len + bytes > current_frame->size) {
255                 printf("%d bytes overflow after last %s frame\n",
256                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
257                 //dump_frame();
258         } else {
259                 if (current_frame->interleaved) {
260                         uint8_t *data = current_frame->data + current_frame->len / 2;
261                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
262                         if (current_frame->len % 2 == 1) {
263                                 ++data;
264                                 swap(data, data2);
265                         }
266                         if (bytes % 2 == 1) {
267                                 *data++ = *start++;
268                                 swap(data, data2);
269                                 ++current_frame->len;
270                                 --bytes;
271                         }
272                         memcpy_interleaved(data, data2, start, bytes);
273                         current_frame->len += bytes;
274                 } else {
275                         memcpy(current_frame->data + current_frame->len, start, bytes);
276                         current_frame->len += bytes;
277                 }
278         }
279 }
280
281 #ifdef __SSE2__
282
283 // Does a memcpy and memchr in one to reduce processing time.
284 // Note that the benefit is somewhat limited if your L3 cache is small,
285 // as you'll (unfortunately) spend most of the time loading the data
286 // from main memory.
287 //
288 // Complicated cases are left to the slow path; it basically stops copying
289 // up until the first instance of "sync_char" (usually a bit before, actually).
290 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
291 // data, and what we really need this for is the 00 00 ff ff marker in video data.
292 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
293 {
294         if (current_frame->data == nullptr ||
295             current_frame->len > current_frame->size ||
296             start == limit) {
297                 return start;
298         }
299         size_t orig_bytes = limit - start;
300         if (orig_bytes < 128) {
301                 // Don't bother.
302                 return start;
303         }
304
305         // Don't read more bytes than we can write.
306         limit = min(limit, start + (current_frame->size - current_frame->len));
307
308         // Align end to 32 bytes.
309         limit = (const uint8_t *)(intptr_t(limit) & ~31);
310
311         if (start >= limit) {
312                 return start;
313         }
314
315         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
316         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
317         if (aligned_start != start) {
318                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
319                 if (sync_start == nullptr) {
320                         add_to_frame(current_frame, "", start, aligned_start);
321                 } else {
322                         add_to_frame(current_frame, "", start, sync_start);
323                         return sync_start;
324                 }
325         }
326
327         // Make the length a multiple of 64.
328         if (current_frame->interleaved) {
329                 if (((limit - aligned_start) % 64) != 0) {
330                         limit -= 32;
331                 }
332                 assert(((limit - aligned_start) % 64) == 0);
333         }
334
335 #if __AVX2__
336         const __m256i needle = _mm256_set1_epi8(sync_char);
337
338         const __restrict __m256i *in = (const __m256i *)aligned_start;
339         if (current_frame->interleaved) {
340                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
341                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
342                 if (current_frame->len % 2 == 1) {
343                         swap(out1, out2);
344                 }
345
346                 __m256i shuffle_cw = _mm256_set_epi8(
347                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
348                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
349                 while (in < (const __m256i *)limit) {
350                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
351                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
352                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
353
354                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
355                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
356                         __m256i found = _mm256_or_si256(found1, found2);
357
358                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
359                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
360                 
361                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
362                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
363
364                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
365                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
366
367                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
368                         _mm256_storeu_si256(out2, hi);
369
370                         if (!_mm256_testz_si256(found, found)) {
371                                 break;
372                         }
373
374                         in += 2;
375                         ++out1;
376                         ++out2;
377                 }
378                 current_frame->len += (uint8_t *)in - aligned_start;
379         } else {
380                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
381                 while (in < (const __m256i *)limit) {
382                         __m256i data = _mm256_load_si256(in);
383                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
384                         __m256i found = _mm256_cmpeq_epi8(data, needle);
385                         if (!_mm256_testz_si256(found, found)) {
386                                 break;
387                         }
388
389                         ++in;
390                         ++out;
391                 }
392                 current_frame->len = (uint8_t *)out - current_frame->data;
393         }
394 #else
395         const __m128i needle = _mm_set1_epi8(sync_char);
396
397         const __m128i *in = (const __m128i *)aligned_start;
398         if (current_frame->interleaved) {
399                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
400                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
401                 if (current_frame->len % 2 == 1) {
402                         swap(out1, out2);
403                 }
404
405                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
406                 while (in < (const __m128i *)limit) {
407                         __m128i data1 = _mm_load_si128(in);
408                         __m128i data2 = _mm_load_si128(in + 1);
409                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
410                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
411                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
412                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
413                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
414                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
415                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
416                         _mm_storeu_si128(out2, hi);
417                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
418                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
419                         if (!_mm_testz_si128(found1, found1) ||
420                             !_mm_testz_si128(found2, found2)) {
421                                 break;
422                         }
423
424                         in += 2;
425                         ++out1;
426                         ++out2;
427                 }
428                 current_frame->len += (uint8_t *)in - aligned_start;
429         } else {
430                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
431                 while (in < (const __m128i *)limit) {
432                         __m128i data = _mm_load_si128(in);
433                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
434                         __m128i found = _mm_cmpeq_epi8(data, needle);
435                         if (!_mm_testz_si128(found, found)) {
436                                 break;
437                         }
438
439                         ++in;
440                         ++out;
441                 }
442                 current_frame->len = (uint8_t *)out - current_frame->data;
443         }
444 #endif
445
446         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
447
448         return (const uint8_t *)in;
449 }
450 #endif
451
452 void decode_packs(const libusb_transfer *xfr,
453                   const char *sync_pattern,
454                   int sync_length,
455                   FrameAllocator::Frame *current_frame,
456                   const char *frame_type_name,
457                   function<void(const uint8_t *start)> start_callback)
458 {
459         int offset = 0;
460         for (int i = 0; i < xfr->num_iso_packets; i++) {
461                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
462
463                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
464                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
465                         continue;
466 //exit(5);
467                 }
468
469                 const uint8_t *start = xfr->buffer + offset;
470                 const uint8_t *limit = start + pack->actual_length;
471                 while (start < limit) {  // Usually runs only one iteration.
472 #ifdef __SSE2__
473                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
474                         if (start == limit) break;
475                         assert(start < limit);
476 #endif
477
478                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
479                         if (start_next_frame == nullptr) {
480                                 // add the rest of the buffer
481                                 add_to_frame(current_frame, frame_type_name, start, limit);
482                                 break;
483                         } else {
484                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
485                                 start = start_next_frame + sync_length;  // skip sync
486                                 start_callback(start);
487                         }
488                 }
489 #if 0
490                 dump_pack(xfr, offset, pack);
491 #endif
492                 offset += pack->length;
493         }
494 }
495
496 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
497 {
498         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
499                 fprintf(stderr, "transfer status %d\n", xfr->status);
500                 libusb_free_transfer(xfr);
501                 exit(3);
502         }
503
504         assert(xfr->user_data != nullptr);
505         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
506
507         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
508                 if (xfr->endpoint == 0x84) {
509                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
510                 } else {
511                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
512                 }
513         }
514         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
515                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
516                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
517 #if 0
518                 if (setup->wIndex == 44) {
519                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
520                 } else {
521                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
522                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
523                 }
524 #else
525                 memcpy(usb->register_file + usb->current_register, buf, 4);
526                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
527                 if (usb->current_register == 0) {
528                         // read through all of them
529                         printf("register dump:");
530                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
531                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
532                         }
533                         printf("\n");
534                 }
535                 libusb_fill_control_setup(xfr->buffer,
536                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
537                         /*index=*/usb->current_register, /*length=*/4);
538 #endif
539         }
540
541 #if 0
542         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
543         for (i = 0; i < xfr->actual_length; i++) {
544                 printf("%02x", xfr->buffer[i]);
545                 if (i % 16)
546                         printf("\n");
547                 else if (i % 8)
548                         printf("  ");
549                 else
550                         printf(" ");
551         }
552 #endif
553
554         if (libusb_submit_transfer(xfr) < 0) {
555                 fprintf(stderr, "error re-submitting URB\n");
556                 exit(1);
557         }
558 }
559
560 void BMUSBCapture::usb_thread_func()
561 {
562         printf("usb thread started\n");
563
564         sched_param param;
565         memset(&param, 0, sizeof(param));
566         param.sched_priority = 1;
567         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
568                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
569         }
570         while (!should_quit) {
571                 int rc = libusb_handle_events(nullptr);
572                 if (rc != LIBUSB_SUCCESS)
573                         break;
574         }
575 }
576
577 void BMUSBCapture::start_bm_capture()
578 {
579         if (video_frame_allocator == nullptr) {
580                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
581         }
582         if (audio_frame_allocator == nullptr) {
583                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
584         }
585         thread(&BMUSBCapture::dequeue_thread, this).detach();
586
587         int rc;
588         struct libusb_transfer *xfr;
589         vector<libusb_transfer *> iso_xfrs;
590
591         rc = libusb_init(nullptr);
592         if (rc < 0) {
593                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
594                 exit(1);
595         }
596
597         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
598         if (!devh) {
599                 fprintf(stderr, "Error finding USB device\n");
600                 exit(1);
601         }
602
603         libusb_config_descriptor *config;
604         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
605         if (rc < 0) {
606                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
607                 exit(1);
608         }
609         printf("%d interface\n", config->bNumInterfaces);
610         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
611                 printf("  interface %d\n", interface_number);
612                 const libusb_interface *interface = &config->interface[interface_number];
613                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
614                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
615                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
616                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
617                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
618                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
619                         }
620                 }
621         }
622
623         rc = libusb_set_configuration(devh, /*configuration=*/1);
624         if (rc < 0) {
625                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
626                 exit(1);
627         }
628
629         rc = libusb_claim_interface(devh, 0);
630         if (rc < 0) {
631                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
632                 exit(1);
633         }
634
635         // Alternate setting 1 is output, alternate setting 2 is input.
636         // Card is reset when switching alternates, so the driver uses
637         // this “double switch” when it wants to reset.
638         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
639         if (rc < 0) {
640                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
641                 exit(1);
642         }
643         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
644         if (rc < 0) {
645                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
646                 exit(1);
647         }
648 #if 0
649         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
650         if (rc < 0) {
651                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
652                 exit(1);
653         }
654 #endif
655
656 #if 0
657         rc = libusb_claim_interface(devh, 3);
658         if (rc < 0) {
659                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
660                 exit(1);
661         }
662 #endif
663
664         // theories:
665         //   44 is some kind of timer register (first 16 bits count upwards)
666         //   24 is some sort of watchdog?
667         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
668         //      (or will go to 0x73c60010?), also seen 0x73c60100
669         //   12 also changes all the time, unclear why  
670         //   16 seems to be autodetected mode somehow
671         //      --    this is e00115e0 after reset?
672         //                    ed0115e0 after mode change [to output?]
673         //                    2d0015e0 after more mode change [to input]
674         //                    ed0115e0 after more mode change
675         //                    2d0015e0 after more mode change
676         //
677         //                    390115e0 seems to indicate we have signal
678         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
679         //
680         //                    200015e0 on startup
681         //         changes to 250115e0 when we sync to the signal
682         //
683         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
684         //
685         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
686         //
687         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
688         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
689         //
690         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
691         //    perhaps some of them are related to analog output?
692         //
693         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
694         //    but the driver sets it to 0x8036802a at some point.
695         //
696         //    all of this is on request 214/215. other requests (192, 219,
697         //    222, 223, 224) are used for firmware upgrade. Probably best to
698         //    stay out of it unless you know what you're doing.
699         //
700         //
701         // register 16:
702         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
703         //
704         // theories:
705         //   0x01 - stable signal
706         //   0x04 - deep color
707         //   0x08 - unknown (audio??)
708         //   0x20 - 720p??
709         //   0x30 - 576p??
710
711         struct ctrl {
712                 int endpoint;
713                 int request;
714                 int index;
715                 uint32_t data;
716         };
717         static const ctrl ctrls[] = {
718                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
719                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
720
721                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
722                 // capture (v210).
723                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
724                 // 0x10000000 = analog audio instead of embedded audio, it seems
725                 // 0x3a000000 = component video? (analog audio)
726                 // 0x3c000000 = composite video? (analog audio)
727                 // 0x3e000000 = s-video? (analog audio)
728                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
729                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
730                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
731                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
732                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
733         };
734
735         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
736                 uint32_t flipped = htonl(ctrls[req].data);
737                 static uint8_t value[4];
738                 memcpy(value, &flipped, sizeof(flipped));
739                 int size = sizeof(value);
740                 //if (ctrls[req].request == 215) size = 0;
741                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
742                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
743                 if (rc < 0) {
744                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
745                         exit(1);
746                 }
747                 
748                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
749                 for (int i = 0; i < rc; ++i) {
750                         printf("%02x", value[i]);
751                 }
752                 printf("\n");
753         }
754
755 #if 0
756         // DEBUG
757         for ( ;; ) {
758                 static int my_index = 0;
759                 static uint8_t value[4];
760                 int size = sizeof(value);
761                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
762                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
763                 if (rc < 0) {
764                         fprintf(stderr, "Error on control\n");
765                         exit(1);
766                 }
767                 printf("rc=%d index=%d: 0x", rc, my_index);
768                 for (int i = 0; i < rc; ++i) {
769                         printf("%02x", value[i]);
770                 }
771                 printf("\n");
772         }
773 #endif
774
775 #if 0
776         // set up an asynchronous transfer of the timer register
777         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
778         static int completed = 0;
779
780         xfr = libusb_alloc_transfer(0);
781         libusb_fill_control_setup(cmdbuf,
782             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
783                 /*index=*/44, /*length=*/4);
784         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
785         xfr->user_data = this;
786         libusb_submit_transfer(xfr);
787
788         // set up an asynchronous transfer of register 24
789         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
790         static int completed2 = 0;
791
792         xfr = libusb_alloc_transfer(0);
793         libusb_fill_control_setup(cmdbuf2,
794             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
795                 /*index=*/24, /*length=*/4);
796         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
797         xfr->user_data = this;
798         libusb_submit_transfer(xfr);
799 #endif
800
801         // set up an asynchronous transfer of the register dump
802         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
803         static int completed3 = 0;
804
805         xfr = libusb_alloc_transfer(0);
806         libusb_fill_control_setup(cmdbuf3,
807             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
808                 /*index=*/current_register, /*length=*/4);
809         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
810         xfr->user_data = this;
811         //libusb_submit_transfer(xfr);
812
813         audiofp = fopen("audio.raw", "wb");
814
815         // set up isochronous transfers for audio and video
816         for (int e = 3; e <= 4; ++e) {
817                 //int num_transfers = (e == 3) ? 6 : 6;
818                 int num_transfers = 6;
819                 for (int i = 0; i < num_transfers; ++i) {
820                         int num_iso_pack, size;
821                         if (e == 3) {
822                                 // Video seems to require isochronous packets scaled with the width; 
823                                 // seemingly six lines is about right, rounded up to the required 1kB
824                                 // multiple.
825                                 size = WIDTH * 2 * 6;
826                                 // Note that for 10-bit input, you'll need to increase size accordingly.
827                                 //size = size * 4 / 3;
828                                 if (size % 1024 != 0) {
829                                         size &= ~1023;
830                                         size += 1024;
831                                 }
832                                 num_iso_pack = (2 << 20) / size;  // 2 MB.
833                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
834                         } else {
835                                 size = 0xc0;
836                                 num_iso_pack = 80;
837                         }
838                         int num_bytes = num_iso_pack * size;
839                         uint8_t *buf = new uint8_t[num_bytes];
840
841                         xfr = libusb_alloc_transfer(num_iso_pack);
842                         if (!xfr) {
843                                 fprintf(stderr, "oom\n");
844                                 exit(1);
845                         }
846
847                         int ep = LIBUSB_ENDPOINT_IN | e;
848                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
849                                 num_iso_pack, cb_xfr, nullptr, 0);
850                         libusb_set_iso_packet_lengths(xfr, size);
851                         xfr->user_data = this;
852                         iso_xfrs.push_back(xfr);
853                 }
854         }
855
856         {
857                 int i = 0;
858                 for (libusb_transfer *xfr : iso_xfrs) {
859                         rc = libusb_submit_transfer(xfr);
860                         ++i;
861                         if (rc < 0) {
862                                 //printf("num_bytes=%d\n", num_bytes);
863                                 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
864                                         xfr->endpoint, i, libusb_error_name(rc));
865                                 exit(1);
866                         }
867                 }
868         }
869
870         should_quit = false;
871         usb_thread = thread(&BMUSBCapture::usb_thread_func, this);
872
873
874 #if 0
875         libusb_release_interface(devh, 0);
876 out:
877         if (devh)
878                 libusb_close(devh);
879         libusb_exit(nullptr);
880         return rc;
881 #endif
882 }
883
884 void BMUSBCapture::stop_bm_capture()
885 {
886         should_quit = true;
887         usb_thread.join();
888 }