]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Some corrections about firmware upgrade.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <libusb.h>
10 #include <arpa/inet.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <fcntl.h>
14 #include <stdint.h>
15 #include <assert.h>
16 #ifdef __SSE2__
17 #include <immintrin.h>
18 #endif
19 #include <algorithm>
20 #include <functional>
21 #include <memory>
22 #include <deque>
23 #include <utility>
24 #include <mutex>
25 #include <condition_variable>
26 #include <thread>
27 #include <stack>
28 #include <atomic>
29 #include "bmusb.h"
30
31 using namespace std;
32 using namespace std::placeholders;
33
34 #define WIDTH 1280
35 #define HEIGHT 750  /* 30 lines ancillary data? */
36 //#define WIDTH 1920
37 //#define HEIGHT 1125  /* ??? lines ancillary data? */
38 #define HEADER_SIZE 44
39 //#define HEADER_SIZE 0
40 #define AUDIO_HEADER_SIZE 4
41
42 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
43 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
44 #define FRAME_SIZE (8 << 20)
45
46 FILE *audiofp;
47
48 FrameAllocator::~FrameAllocator() {}
49
50 #define NUM_QUEUED_FRAMES 8
51 class MallocFrameAllocator : public FrameAllocator {
52 public:
53         MallocFrameAllocator(size_t frame_size);
54         Frame alloc_frame() override;
55         void release_frame(Frame frame) override;
56
57 private:
58         size_t frame_size;
59
60         mutex freelist_mutex;
61         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
62 };
63
64 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
65         : frame_size(frame_size)
66 {
67         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
68                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
69         }
70 }
71
72 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
73 {
74         Frame vf;
75         vf.owner = this;
76
77         unique_lock<mutex> lock(freelist_mutex);  // Meh.
78         if (freelist.empty()) {
79                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
80                         frame_size);
81         } else {
82                 vf.data = freelist.top().release();
83                 vf.size = frame_size;
84                 freelist.pop();  // Meh.
85         }
86         return vf;
87 }
88
89 void MallocFrameAllocator::release_frame(Frame frame)
90 {
91         unique_lock<mutex> lock(freelist_mutex);
92         freelist.push(unique_ptr<uint8_t[]>(frame.data));
93 }
94
95 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
96 {
97         if (a == b) {
98                 return false;
99         } else if (a < b) {
100                 return (b - a < 0x8000);
101         } else {
102                 int wrap_b = 0x10000 + int(b);
103                 return (wrap_b - a < 0x8000);
104         }
105 }
106
107 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
108 {
109         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
110                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
111                         q->back().timecode, timecode);
112                 frame.owner->release_frame(frame);
113                 return;
114         }
115
116         QueuedFrame qf;
117         qf.format = format;
118         qf.timecode = timecode;
119         qf.frame = frame;
120
121         {
122                 unique_lock<mutex> lock(queue_lock);
123                 q->push_back(move(qf));
124         }
125         queues_not_empty.notify_one();  // might be spurious
126 }
127
128 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
129 {
130         FILE *fp = fopen(filename, "wb");
131         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
132                 printf("short write!\n");
133         }
134         fclose(fp);
135 }
136
137 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
138 {
139         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
140 }
141
142 void BMUSBCapture::dequeue_thread()
143 {
144         for ( ;; ) {
145                 unique_lock<mutex> lock(queue_lock);
146                 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
147
148                 uint16_t video_timecode = pending_video_frames.front().timecode;
149                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
150                 if (video_timecode < audio_timecode) {
151                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
152                                 video_timecode);
153                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
154                         pending_video_frames.pop_front();
155                 } else if (audio_timecode < video_timecode) {
156                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
157                                 audio_timecode);
158                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
159                         pending_audio_frames.pop_front();
160                 } else {
161                         QueuedFrame video_frame = pending_video_frames.front();
162                         QueuedFrame audio_frame = pending_audio_frames.front();
163                         pending_audio_frames.pop_front();
164                         pending_video_frames.pop_front();
165                         lock.unlock();
166
167 #if 0
168                         char filename[255];
169                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
170                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
171                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
172 #endif
173
174                         frame_callback(video_timecode,
175                                        video_frame.frame, HEADER_SIZE, video_frame.format,
176                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
177                 }
178         }
179 }
180
181 void BMUSBCapture::start_new_frame(const uint8_t *start)
182 {
183         uint16_t format = (start[3] << 8) | start[2];
184         uint16_t timecode = (start[1] << 8) | start[0];
185
186         if (current_video_frame.len > 0) {
187                 //dump_frame();
188                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
189         }
190         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
191         //      format, timecode,
192         //      //start[7], start[6], start[5], start[4],
193         //      read_current_frame, FRAME_SIZE);
194
195         current_video_frame = video_frame_allocator->alloc_frame();
196         //if (current_video_frame.data == nullptr) {
197         //      read_current_frame = -1;
198         //} else {
199         //      read_current_frame = 0;
200         //}
201 }
202
203 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
204 {
205         uint16_t format = (start[3] << 8) | start[2];
206         uint16_t timecode = (start[1] << 8) | start[0];
207         if (current_audio_frame.len > 0) {
208                 //dump_audio_block();
209                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
210         }
211         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
212         //      format, timecode, read_current_audio_block);
213         current_audio_frame = audio_frame_allocator->alloc_frame();
214 }
215
216 #if 0
217 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
218 {
219         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
220         for (unsigned j = 0; j < pack->actual_length; j++) {
221         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
222                 printf("%02x", xfr->buffer[j + offset]);
223                 if ((j % 16) == 15)
224                         printf("\n");
225                 else if ((j % 8) == 7)
226                         printf("  ");
227                 else
228                         printf(" ");
229         }
230 }
231 #endif
232
233 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
234 {
235         assert(n % 2 == 0);
236         uint8_t *dptr1 = dest1;
237         uint8_t *dptr2 = dest2;
238
239         for (size_t i = 0; i < n; i += 2) {
240                 *dptr1++ = *src++;
241                 *dptr2++ = *src++;
242         }
243 }
244
245 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
246 {
247         if (current_frame->data == nullptr ||
248             current_frame->len > current_frame->size ||
249             start == end) {
250                 return;
251         }
252
253         int bytes = end - start;
254         if (current_frame->len + bytes > current_frame->size) {
255                 printf("%d bytes overflow after last %s frame\n",
256                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
257                 //dump_frame();
258         } else {
259                 if (current_frame->interleaved) {
260                         uint8_t *data = current_frame->data + current_frame->len / 2;
261                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
262                         if (current_frame->len % 2 == 1) {
263                                 ++data;
264                                 swap(data, data2);
265                         }
266                         if (bytes % 2 == 1) {
267                                 *data++ = *start++;
268                                 swap(data, data2);
269                                 ++current_frame->len;
270                                 --bytes;
271                         }
272                         memcpy_interleaved(data, data2, start, bytes);
273                         current_frame->len += bytes;
274                 } else {
275                         memcpy(current_frame->data + current_frame->len, start, bytes);
276                         current_frame->len += bytes;
277                 }
278         }
279 }
280
281 #ifdef __SSE2__
282
283 // Does a memcpy and memchr in one to reduce processing time.
284 // Note that the benefit is somewhat limited if your L3 cache is small,
285 // as you'll (unfortunately) spend most of the time loading the data
286 // from main memory.
287 //
288 // Complicated cases are left to the slow path; it basically stops copying
289 // up until the first instance of "sync_char" (usually a bit before, actually).
290 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
291 // data, and what we really need this for is the 00 00 ff ff marker in video data.
292 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
293 {
294         if (current_frame->data == nullptr ||
295             current_frame->len > current_frame->size ||
296             start == limit) {
297                 return start;
298         }
299         size_t orig_bytes = limit - start;
300         if (orig_bytes < 128) {
301                 // Don't bother.
302                 return start;
303         }
304
305         // Don't read more bytes than we can write.
306         limit = min(limit, start + (current_frame->size - current_frame->len));
307
308         // Align end to 32 bytes.
309         limit = (const uint8_t *)(intptr_t(limit) & ~31);
310
311         if (start >= limit) {
312                 return start;
313         }
314
315         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
316         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
317         if (aligned_start != start) {
318                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
319                 if (sync_start == nullptr) {
320                         add_to_frame(current_frame, "", start, aligned_start);
321                 } else {
322                         add_to_frame(current_frame, "", start, sync_start);
323                         return sync_start;
324                 }
325         }
326
327         // Make the length a multiple of 64.
328         if (current_frame->interleaved) {
329                 if (((limit - aligned_start) % 64) != 0) {
330                         limit -= 32;
331                 }
332                 assert(((limit - aligned_start) % 64) == 0);
333         }
334
335 #if __AVX2__
336         const __m256i needle = _mm256_set1_epi8(sync_char);
337
338         const __m256i *in = (const __m256i *)aligned_start;
339         if (current_frame->interleaved) {
340                 __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
341                 __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
342                 if (current_frame->len % 2 == 1) {
343                         swap(out1, out2);
344                 }
345
346                 __m256i mask_lower_byte = _mm256_set1_epi16(0x00ff);
347                 while (in < (const __m256i *)limit) {
348                         __m256i data1 = _mm256_load_si256(in);
349                         __m256i data2 = _mm256_load_si256(in + 1);
350                         __m256i data1_lo = _mm256_and_si256(data1, mask_lower_byte);
351                         __m256i data2_lo = _mm256_and_si256(data2, mask_lower_byte);
352                         __m256i data1_hi = _mm256_srli_epi16(data1, 8);
353                         __m256i data2_hi = _mm256_srli_epi16(data2, 8);
354                         __m256i lo = _mm256_packus_epi16(data1_lo, data2_lo);
355                         lo = _mm256_permute4x64_epi64(lo, 0b11011000);
356                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
357                         __m256i hi = _mm256_packus_epi16(data1_hi, data2_hi);
358                         hi = _mm256_permute4x64_epi64(hi, 0b11011000);
359                         _mm256_storeu_si256(out2, hi);
360                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
361                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
362                         if (!_mm256_testz_si256(found1, found1) ||
363                             !_mm256_testz_si256(found2, found2)) {
364                                 break;
365                         }
366
367                         in += 2;
368                         ++out1;
369                         ++out2;
370                 }
371                 current_frame->len += (uint8_t *)in - aligned_start;
372         } else {
373                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
374                 while (in < (const __m256i *)limit) {
375                         __m256i data = _mm256_load_si256(in);
376                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
377                         __m256i found = _mm256_cmpeq_epi8(data, needle);
378                         if (!_mm256_testz_si256(found, found)) {
379                                 break;
380                         }
381
382                         ++in;
383                         ++out;
384                 }
385                 current_frame->len = (uint8_t *)out - current_frame->data;
386         }
387 #else
388         const __m128i needle = _mm_set1_epi8(sync_char);
389
390         const __m128i *in = (const __m128i *)aligned_start;
391         if (current_frame->interleaved) {
392                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
393                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
394                 if (current_frame->len % 2 == 1) {
395                         swap(out1, out2);
396                 }
397
398                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
399                 while (in < (const __m128i *)limit) {
400                         __m128i data1 = _mm_load_si128(in);
401                         __m128i data2 = _mm_load_si128(in + 1);
402                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
403                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
404                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
405                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
406                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
407                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
408                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
409                         _mm_storeu_si128(out2, hi);
410                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
411                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
412                         if (!_mm_testz_si128(found1, found1) ||
413                             !_mm_testz_si128(found2, found2)) {
414                                 break;
415                         }
416
417                         in += 2;
418                         ++out1;
419                         ++out2;
420                 }
421                 current_frame->len += (uint8_t *)in - aligned_start;
422         } else {
423                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
424                 while (in < (const __m128i *)limit) {
425                         __m128i data = _mm_load_si128(in);
426                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
427                         __m128i found = _mm_cmpeq_epi8(data, needle);
428                         if (!_mm_testz_si128(found, found)) {
429                                 break;
430                         }
431
432                         ++in;
433                         ++out;
434                 }
435                 current_frame->len = (uint8_t *)out - current_frame->data;
436         }
437 #endif
438
439         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
440
441         return (const uint8_t *)in;
442 }
443 #endif
444
445 void decode_packs(const libusb_transfer *xfr,
446                   const char *sync_pattern,
447                   int sync_length,
448                   FrameAllocator::Frame *current_frame,
449                   const char *frame_type_name,
450                   function<void(const uint8_t *start)> start_callback)
451 {
452         int offset = 0;
453         for (int i = 0; i < xfr->num_iso_packets; i++) {
454                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
455
456                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
457                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
458                         continue;
459 //exit(5);
460                 }
461
462                 const uint8_t *start = xfr->buffer + offset;
463                 const uint8_t *limit = start + pack->actual_length;
464                 while (start < limit) {  // Usually runs only one iteration.
465 #ifdef __SSE2__
466                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
467                         if (start == limit) break;
468                         assert(start < limit);
469 #endif
470
471                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
472                         if (start_next_frame == nullptr) {
473                                 // add the rest of the buffer
474                                 add_to_frame(current_frame, frame_type_name, start, limit);
475                                 break;
476                         } else {
477                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
478                                 start = start_next_frame + sync_length;  // skip sync
479                                 start_callback(start);
480                         }
481                 }
482 #if 0
483                 dump_pack(xfr, offset, pack);
484 #endif
485                 offset += pack->length;
486         }
487 }
488
489 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
490 {
491         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
492                 fprintf(stderr, "transfer status %d\n", xfr->status);
493                 libusb_free_transfer(xfr);
494                 exit(3);
495         }
496
497         assert(xfr->user_data != nullptr);
498         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
499
500         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
501                 if (xfr->endpoint == 0x84) {
502                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
503                 } else {
504                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
505                 }
506         }
507         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
508                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
509                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
510 #if 0
511                 if (setup->wIndex == 44) {
512                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
513                 } else {
514                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
515                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
516                 }
517 #else
518                 memcpy(usb->register_file + usb->current_register, buf, 4);
519                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
520                 if (usb->current_register == 0) {
521                         // read through all of them
522                         printf("register dump:");
523                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
524                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
525                         }
526                         printf("\n");
527                 }
528                 libusb_fill_control_setup(xfr->buffer,
529                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
530                         /*index=*/usb->current_register, /*length=*/4);
531 #endif
532         }
533
534 #if 0
535         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
536         for (i = 0; i < xfr->actual_length; i++) {
537                 printf("%02x", xfr->buffer[i]);
538                 if (i % 16)
539                         printf("\n");
540                 else if (i % 8)
541                         printf("  ");
542                 else
543                         printf(" ");
544         }
545 #endif
546
547         if (libusb_submit_transfer(xfr) < 0) {
548                 fprintf(stderr, "error re-submitting URB\n");
549                 exit(1);
550         }
551 }
552
553 void BMUSBCapture::usb_thread_func()
554 {
555         printf("usb thread started\n");
556
557         sched_param param;
558         memset(&param, 0, sizeof(param));
559         param.sched_priority = 1;
560         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
561                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
562         }
563         while (!should_quit) {
564                 int rc = libusb_handle_events(nullptr);
565                 if (rc != LIBUSB_SUCCESS)
566                         break;
567         }
568 }
569
570 void BMUSBCapture::start_bm_capture()
571 {
572         if (video_frame_allocator == nullptr) {
573                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
574         }
575         if (audio_frame_allocator == nullptr) {
576                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
577         }
578         thread(&BMUSBCapture::dequeue_thread, this).detach();
579
580         int rc;
581         struct libusb_transfer *xfr;
582         vector<libusb_transfer *> iso_xfrs;
583
584         rc = libusb_init(nullptr);
585         if (rc < 0) {
586                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
587                 exit(1);
588         }
589
590         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
591         if (!devh) {
592                 fprintf(stderr, "Error finding USB device\n");
593                 exit(1);
594         }
595
596         libusb_config_descriptor *config;
597         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
598         if (rc < 0) {
599                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
600                 exit(1);
601         }
602         printf("%d interface\n", config->bNumInterfaces);
603         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
604                 printf("  interface %d\n", interface_number);
605                 const libusb_interface *interface = &config->interface[interface_number];
606                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
607                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
608                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
609                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
610                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
611                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
612                         }
613                 }
614         }
615
616         rc = libusb_set_configuration(devh, /*configuration=*/1);
617         if (rc < 0) {
618                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
619                 exit(1);
620         }
621
622         rc = libusb_claim_interface(devh, 0);
623         if (rc < 0) {
624                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
625                 exit(1);
626         }
627
628         // Alternate setting 1 is output, alternate setting 2 is input.
629         // Card is reset when switching alternates, so the driver uses
630         // this “double switch” when it wants to reset.
631         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
632         if (rc < 0) {
633                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
634                 exit(1);
635         }
636         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
637         if (rc < 0) {
638                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
639                 exit(1);
640         }
641 #if 0
642         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
643         if (rc < 0) {
644                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
645                 exit(1);
646         }
647 #endif
648
649 #if 0
650         rc = libusb_claim_interface(devh, 3);
651         if (rc < 0) {
652                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
653                 exit(1);
654         }
655 #endif
656
657         // theories:
658         //   44 is some kind of timer register (first 16 bits count upwards)
659         //   24 is some sort of watchdog?
660         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
661         //      (or will go to 0x73c60010?), also seen 0x73c60100
662         //   12 also changes all the time, unclear why  
663         //   16 seems to be autodetected mode somehow
664         //      --    this is e00115e0 after reset?
665         //                    ed0115e0 after mode change [to output?]
666         //                    2d0015e0 after more mode change [to input]
667         //                    ed0115e0 after more mode change
668         //                    2d0015e0 after more mode change
669         //
670         //                    390115e0 seems to indicate we have signal
671         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
672         //
673         //                    200015e0 on startup
674         //         changes to 250115e0 when we sync to the signal
675         //
676         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
677         //
678         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
679         //
680         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
681         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
682         //
683         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
684         //    perhaps some of them are related to analog output?
685         //
686         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
687         //    but the driver sets it to 0x8036802a at some point.
688         //
689         //    all of this is on request 214/215. other requests (192, 219,
690         //    222, 223, 224) are used for firmware upgrade. Probably best to
691         //    stay out of it unless you know what you're doing.
692         //
693         //
694         // register 16:
695         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
696         //
697         // theories:
698         //   0x01 - stable signal
699         //   0x04 - deep color
700         //   0x08 - unknown (audio??)
701         //   0x20 - 720p??
702         //   0x30 - 576p??
703
704         struct ctrl {
705                 int endpoint;
706                 int request;
707                 int index;
708                 uint32_t data;
709         };
710         static const ctrl ctrls[] = {
711                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
712                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
713                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
714                 { LIBUSB_ENDPOINT_IN,  214,  4, 0 },
715                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
716                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
717                 { LIBUSB_ENDPOINT_IN,  214, 20, 0 },
718                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
719                 { LIBUSB_ENDPOINT_IN,  214, 28, 0 },
720                 { LIBUSB_ENDPOINT_IN,  215, 32, 0 },
721                 { LIBUSB_ENDPOINT_IN,  214, 36, 0 },
722                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
723                 { LIBUSB_ENDPOINT_IN,  216, 44, 0 },
724                 { LIBUSB_ENDPOINT_IN,  214, 48, 0 },
725                 { LIBUSB_ENDPOINT_IN,  214, 52, 0 },
726                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
727                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
728                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
729                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
730                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
731                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
732                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
733                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
734                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
735                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
736                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
737                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
738                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
739                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
740                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
741                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
742                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
743                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
744                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
745                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
746                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
747                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
748                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
749                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
750                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
751                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
752                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
753                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
754                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
755                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
756                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
757                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
758                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },  // packet 354
759                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
760                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
761                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
762                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
763                 // more...
764                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
765                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },  // wow, some kind of mode
766
767                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
768                 // capture (v210).
769                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
770                 // 0x10000000 = analog audio instead of embedded audio, it seems
771                 // 0x3a000000 = component video? (analog audio)
772                 // 0x3c000000 = composite video? (analog audio)
773                 // 0x3e000000 = s-video? (analog audio)
774                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
775                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
776
777                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff },
778                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff },
779                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 },
780                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 },
781                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a },
782                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
783                 //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 },  // latch for frame start?
784                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
785                 //{ LIBUSB_ENDPOINT_OUT, 215,  4, 0x00000000 },  // appears to have no e fect
786                 //{ LIBUSB_ENDPOINT_OUT, 215,  8, 0x00000000 },  // appears to have no effect
787                 //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 },  // appears to have no effect
788                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 },  // appears to have no effect
789                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 },  // appears to have no effect
790                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 },  // appears to have no effect
791 #if 0
792                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
793                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
794                 { LIBUSB_ENDPOINT_OUT, 215, 28 },
795                 { LIBUSB_ENDPOINT_OUT, 215, 32 },
796                 { LIBUSB_ENDPOINT_OUT, 215, 36 },
797                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
798                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
799                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
800                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
801                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
802                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
803                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
804                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
805                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
806                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
807                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
808                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
809                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
810                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
811                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
812                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
813                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
814 #endif
815         };
816
817         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
818                 uint32_t flipped = htonl(ctrls[req].data);
819                 static uint8_t value[4];
820                 memcpy(value, &flipped, sizeof(flipped));
821                 int size = sizeof(value);
822                 //if (ctrls[req].request == 215) size = 0;
823                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
824                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
825                 if (rc < 0) {
826                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
827                         exit(1);
828                 }
829                 
830                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
831                 for (int i = 0; i < rc; ++i) {
832                         printf("%02x", value[i]);
833                 }
834                 printf("\n");
835         }
836
837 #if 0
838         // DEBUG
839         for ( ;; ) {
840                 static int my_index = 0;
841                 static uint8_t value[4];
842                 int size = sizeof(value);
843                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
844                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
845                 if (rc < 0) {
846                         fprintf(stderr, "Error on control\n");
847                         exit(1);
848                 }
849                 printf("rc=%d index=%d: 0x", rc, my_index);
850                 for (int i = 0; i < rc; ++i) {
851                         printf("%02x", value[i]);
852                 }
853                 printf("\n");
854         }
855 #endif
856
857 #if 0
858         // set up an asynchronous transfer of the timer register
859         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
860         static int completed = 0;
861
862         xfr = libusb_alloc_transfer(0);
863         libusb_fill_control_setup(cmdbuf,
864             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
865                 /*index=*/44, /*length=*/4);
866         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
867         xfr->user_data = this;
868         libusb_submit_transfer(xfr);
869
870         // set up an asynchronous transfer of register 24
871         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
872         static int completed2 = 0;
873
874         xfr = libusb_alloc_transfer(0);
875         libusb_fill_control_setup(cmdbuf2,
876             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
877                 /*index=*/24, /*length=*/4);
878         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
879         xfr->user_data = this;
880         libusb_submit_transfer(xfr);
881 #endif
882
883         // set up an asynchronous transfer of the register dump
884         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
885         static int completed3 = 0;
886
887         xfr = libusb_alloc_transfer(0);
888         libusb_fill_control_setup(cmdbuf3,
889             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
890                 /*index=*/current_register, /*length=*/4);
891         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
892         xfr->user_data = this;
893         //libusb_submit_transfer(xfr);
894
895         audiofp = fopen("audio.raw", "wb");
896
897         // set up isochronous transfers for audio and video
898         for (int e = 3; e <= 4; ++e) {
899                 //int num_transfers = (e == 3) ? 6 : 6;
900                 int num_transfers = 6;
901                 for (int i = 0; i < num_transfers; ++i) {
902                         int num_iso_pack, size;
903                         if (e == 3) {
904                                 // Video seems to require isochronous packets scaled with the width; 
905                                 // seemingly six lines is about right, rounded up to the required 1kB
906                                 // multiple.
907                                 size = WIDTH * 2 * 6;
908                                 // Note that for 10-bit input, you'll need to increase size accordingly.
909                                 //size = size * 4 / 3;
910                                 if (size % 1024 != 0) {
911                                         size &= ~1023;
912                                         size += 1024;
913                                 }
914                                 num_iso_pack = (2 << 20) / size;  // 2 MB.
915                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
916                         } else {
917                                 size = 0xc0;
918                                 num_iso_pack = 80;
919                         }
920                         int num_bytes = num_iso_pack * size;
921                         uint8_t *buf = new uint8_t[num_bytes];
922
923                         xfr = libusb_alloc_transfer(num_iso_pack);
924                         if (!xfr) {
925                                 fprintf(stderr, "oom\n");
926                                 exit(1);
927                         }
928
929                         int ep = LIBUSB_ENDPOINT_IN | e;
930                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
931                                 num_iso_pack, cb_xfr, nullptr, 0);
932                         libusb_set_iso_packet_lengths(xfr, size);
933                         xfr->user_data = this;
934                         iso_xfrs.push_back(xfr);
935                 }
936         }
937
938         {
939                 int i = 0;
940                 for (libusb_transfer *xfr : iso_xfrs) {
941                         rc = libusb_submit_transfer(xfr);
942                         ++i;
943                         if (rc < 0) {
944                                 //printf("num_bytes=%d\n", num_bytes);
945                                 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
946                                         xfr->endpoint, i, libusb_error_name(rc));
947                                 exit(1);
948                         }
949                 }
950         }
951
952         should_quit = false;
953         usb_thread = thread(&BMUSBCapture::usb_thread_func, this);
954
955
956 #if 0
957         libusb_release_interface(devh, 0);
958 out:
959         if (devh)
960                 libusb_close(devh);
961         libusb_exit(nullptr);
962         return rc;
963 #endif
964 }
965
966 void BMUSBCapture::stop_bm_capture()
967 {
968         should_quit = true;
969         usb_thread.join();
970 }