]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Add an SSE2/AVX2 fast path to fuse the memmem() into the memcpy.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <libusb.h>
10 #include <arpa/inet.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <fcntl.h>
14 #include <stdint.h>
15 #include <assert.h>
16 #ifdef __SSE2__
17 #include <immintrin.h>
18 #endif
19 #include <algorithm>
20 #include <functional>
21 #include <memory>
22 #include <deque>
23 #include <utility>
24 #include <mutex>
25 #include <condition_variable>
26 #include <thread>
27 #include <stack>
28 #include <atomic>
29 #include "bmusb.h"
30
31 using namespace std;
32
33 static int current_register = 0;
34
35 #define NUM_REGISTERS 60
36 uint8_t register_file[NUM_REGISTERS];
37
38 #define WIDTH 1280
39 #define HEIGHT 750  /* 30 lines ancillary data? */
40 //#define WIDTH 1920
41 //#define HEIGHT 1125  /* ??? lines ancillary data? */
42 #define HEADER_SIZE 44
43 //#define HEADER_SIZE 0
44 #define AUDIO_HEADER_SIZE 4
45
46 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
47 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
48 #define FRAME_SIZE (8 << 20)
49
50 FILE *audiofp;
51
52 FrameAllocator::Frame current_video_frame;
53 FrameAllocator::Frame current_audio_frame;
54
55 struct QueuedFrame {
56         uint16_t timecode;
57         uint16_t format;
58         FrameAllocator::Frame frame;
59 };
60
61 mutex queue_lock;
62 condition_variable queues_not_empty;
63 deque<QueuedFrame> pending_video_frames;
64 deque<QueuedFrame> pending_audio_frames;
65
66 thread usb_thread;
67 atomic<bool> should_quit;
68
69 FrameAllocator::~FrameAllocator() {}
70
71 #define NUM_QUEUED_FRAMES 8
72 class MallocFrameAllocator : public FrameAllocator {
73 public:
74         MallocFrameAllocator(size_t frame_size);
75         Frame alloc_frame() override;
76         void release_frame(Frame frame) override;
77
78 private:
79         size_t frame_size;
80
81         mutex freelist_mutex;
82         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
83 };
84
85 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
86         : frame_size(frame_size)
87 {
88         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
89                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
90         }
91 }
92
93 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
94 {
95         Frame vf;
96         vf.owner = this;
97
98         unique_lock<mutex> lock(freelist_mutex);  // Meh.
99         if (freelist.empty()) {
100                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
101                         frame_size);
102         } else {
103                 vf.data = freelist.top().release();
104                 vf.size = frame_size;
105                 freelist.pop();  // Meh.
106         }
107         return vf;
108 }
109
110 void MallocFrameAllocator::release_frame(Frame frame)
111 {
112         unique_lock<mutex> lock(freelist_mutex);
113         freelist.push(unique_ptr<uint8_t[]>(frame.data));
114 }
115
116 FrameAllocator *video_frame_allocator = nullptr;
117 FrameAllocator *audio_frame_allocator = nullptr;
118 frame_callback_t frame_callback = nullptr;
119
120 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
121 {
122         if (a == b) {
123                 return false;
124         } else if (a < b) {
125                 return (b - a < 0x8000);
126         } else {
127                 int wrap_b = 0x10000 + int(b);
128                 return (wrap_b - a < 0x8000);
129         }
130 }
131
132 void queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
133 {
134         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
135                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
136                         q->back().timecode, timecode);
137                 frame.owner->release_frame(frame);
138                 return;
139         }
140
141         QueuedFrame qf;
142         qf.format = format;
143         qf.timecode = timecode;
144         qf.frame = frame;
145
146         {
147                 unique_lock<mutex> lock(queue_lock);
148                 q->push_back(move(qf));
149         }
150         queues_not_empty.notify_one();  // might be spurious
151 }
152
153 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
154 {
155         FILE *fp = fopen(filename, "wb");
156         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
157                 printf("short write!\n");
158         }
159         fclose(fp);
160 }
161
162 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
163 {
164         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
165 }
166
167 void dequeue_thread()
168 {
169         for ( ;; ) {
170                 unique_lock<mutex> lock(queue_lock);
171                 queues_not_empty.wait(lock, []{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
172
173                 uint16_t video_timecode = pending_video_frames.front().timecode;
174                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
175                 if (video_timecode < audio_timecode) {
176                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
177                                 video_timecode);
178                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
179                         pending_video_frames.pop_front();
180                 } else if (audio_timecode < video_timecode) {
181                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
182                                 audio_timecode);
183                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
184                         pending_audio_frames.pop_front();
185                 } else {
186                         QueuedFrame video_frame = pending_video_frames.front();
187                         QueuedFrame audio_frame = pending_audio_frames.front();
188                         pending_audio_frames.pop_front();
189                         pending_video_frames.pop_front();
190                         lock.unlock();
191
192 #if 0
193                         char filename[255];
194                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
195                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
196                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
197 #endif
198
199                         frame_callback(video_timecode,
200                                        video_frame.frame, HEADER_SIZE, video_frame.format,
201                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
202                 }
203         }
204 }
205
206 void start_new_frame(const uint8_t *start)
207 {
208         uint16_t format = (start[3] << 8) | start[2];
209         uint16_t timecode = (start[1] << 8) | start[0];
210
211         if (current_video_frame.len > 0) {
212                 //dump_frame();
213                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
214         }
215         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
216         //      format, timecode,
217         //      //start[7], start[6], start[5], start[4],
218         //      read_current_frame, FRAME_SIZE);
219
220         current_video_frame = video_frame_allocator->alloc_frame();
221         //if (current_video_frame.data == nullptr) {
222         //      read_current_frame = -1;
223         //} else {
224         //      read_current_frame = 0;
225         //}
226 }
227
228 void start_new_audio_block(const uint8_t *start)
229 {
230         uint16_t format = (start[3] << 8) | start[2];
231         uint16_t timecode = (start[1] << 8) | start[0];
232         if (current_audio_frame.len > 0) {
233                 //dump_audio_block();
234                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
235         }
236         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
237         //      format, timecode, read_current_audio_block);
238         current_audio_frame = audio_frame_allocator->alloc_frame();
239 }
240
241 #if 0
242 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
243 {
244         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
245         for (unsigned j = 0; j < pack->actual_length; j++) {
246         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
247                 printf("%02x", xfr->buffer[j + offset]);
248                 if ((j % 16) == 15)
249                         printf("\n");
250                 else if ((j % 8) == 7)
251                         printf("  ");
252                 else
253                         printf(" ");
254         }
255 }
256 #endif
257
258 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
259 {
260         if (current_frame->data == nullptr ||
261             current_frame->len > current_frame->size ||
262             start == end) {
263                 return;
264         }
265
266         int bytes = end - start;
267         if (current_frame->len + bytes > current_frame->size) {
268                 printf("%d bytes overflow after last %s frame\n",
269                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
270                 //dump_frame();
271         } else {
272                 memcpy(current_frame->data + current_frame->len, start, bytes);
273                 current_frame->len += bytes;
274         }
275 }
276
277 #ifdef __SSE2__
278
279 // Does a memcpy and memchr in one to reduce processing time.
280 // Note that the benefit is somewhat limited if your L3 cache is small,
281 // as you'll (unfortunately) spend most of the time loading the data
282 // from main memory.
283 //
284 // Complicated cases are left to the slow path; it basically stops copying
285 // up until the first instance of "sync_char" (usually a bit before, actually).
286 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
287 // data, and what we really need this for is the 00 00 ff ff marker in video data.
288 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
289 {
290         if (current_frame->data == nullptr ||
291             current_frame->len > current_frame->size ||
292             start == limit) {
293                 return start;
294         }
295         size_t orig_bytes = limit - start;
296         if (orig_bytes < 128) {
297                 // Don't bother.
298                 return start;
299         }
300
301         // Don't read more bytes than we can write.
302         limit = min(limit, start + (current_frame->size - current_frame->len));
303
304         // Align end to 32 bytes.
305         limit = (const uint8_t *)(intptr_t(limit) & ~31);
306
307         if (start >= limit) {
308                 return start;
309         }
310
311         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
312         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
313         if (aligned_start != start) {
314                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
315                 if (sync_start == nullptr) {
316                         memcpy(current_frame->data, start, aligned_start - start);
317                         current_frame->len += aligned_start - start;
318                 } else {
319                         memcpy(current_frame->data, start, sync_start - start);
320                         current_frame->len += sync_start - start;
321                         return sync_start;
322                 }
323         }
324
325 #if __AVX2__
326         const __m256i needle = _mm256_set1_epi8(sync_char);
327
328         const __m256i *in = (const __m256i *)aligned_start;
329         __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
330         while (in < (const __m256i *)limit) {
331                 __m256i data = _mm256_load_si256(in);
332                 _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
333                 __m256i found = _mm256_cmpeq_epi8(data, needle);
334                 if (!_mm256_testz_si256(found, found)) {
335                         break;
336                 }
337
338                 ++in;
339                 ++out;
340         }
341 #else
342         const __m128i needle = _mm_set1_epi8(sync_char);
343
344         const __m128i *in = (const __m128i *)aligned_start;
345         __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
346         while (in < (const __m128i *)limit) {
347                 __m128i data = _mm_load_si128(in);
348                 _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
349                 __m128i found = _mm_cmpeq_epi8(data, needle);
350                 if (!_mm_testz_si128(found, found)) {
351                         break;
352                 }
353
354                 ++in;
355                 ++out;
356         }
357 #endif
358
359         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
360
361         current_frame->len = (uint8_t *)out - current_frame->data;
362         return (const uint8_t *)in;
363 }
364 #endif
365
366 void decode_packs(const libusb_transfer *xfr,
367                   const char *sync_pattern,
368                   int sync_length,
369                   FrameAllocator::Frame *current_frame,
370                   const char *frame_type_name,
371                   function<void(const uint8_t *start)> start_callback)
372 {
373         int offset = 0;
374         for (int i = 0; i < xfr->num_iso_packets; i++) {
375                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
376
377                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
378                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
379                         continue;
380 //exit(5);
381                 }
382
383                 const uint8_t *start = xfr->buffer + offset;
384                 const uint8_t *limit = start + pack->actual_length;
385                 while (start < limit) {  // Usually runs only one iteration.
386 #ifdef __SSE2__
387                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
388                         if (start == limit) break;
389                         assert(start < limit);
390 #endif
391
392                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
393                         if (start_next_frame == nullptr) {
394                                 // add the rest of the buffer
395                                 add_to_frame(current_frame, frame_type_name, start, limit);
396                                 break;
397                         } else {
398                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
399                                 start = start_next_frame + sync_length;  // skip sync
400                                 start_callback(start);
401                         }
402                 }
403 #if 0
404                 dump_pack(xfr, offset, pack);
405 #endif
406                 offset += pack->length;
407         }
408 }
409
410 static void cb_xfr(struct libusb_transfer *xfr)
411 {
412         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
413                 fprintf(stderr, "transfer status %d\n", xfr->status);
414                 libusb_free_transfer(xfr);
415                 exit(3);
416         }
417
418         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
419                 if (xfr->endpoint == 0x84) {
420                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &current_audio_frame, "audio", start_new_audio_block);
421                 } else {
422                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &current_video_frame, "video", start_new_frame);
423                 }
424         }
425         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
426                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
427                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
428 #if 0
429                 if (setup->wIndex == 44) {
430                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
431                 } else {
432                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
433                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
434                 }
435 #else
436                 memcpy(register_file + current_register, buf, 4);
437                 current_register = (current_register + 4) % NUM_REGISTERS;
438                 if (current_register == 0) {
439                         // read through all of them
440                         printf("register dump:");
441                         for (int i = 0; i < NUM_REGISTERS; i += 4) {
442                                 printf(" 0x%02x%02x%02x%02x", register_file[i], register_file[i + 1], register_file[i + 2], register_file[i + 3]);
443                         }
444                         printf("\n");
445                 }
446                 libusb_fill_control_setup(xfr->buffer,
447                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
448                         /*index=*/current_register, /*length=*/4);
449 #endif
450         }
451
452 #if 0
453         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
454         for (i = 0; i < xfr->actual_length; i++) {
455                 printf("%02x", xfr->buffer[i]);
456                 if (i % 16)
457                         printf("\n");
458                 else if (i % 8)
459                         printf("  ");
460                 else
461                         printf(" ");
462         }
463 #endif
464
465         if (libusb_submit_transfer(xfr) < 0) {
466                 fprintf(stderr, "error re-submitting URB\n");
467                 exit(1);
468         }
469 }
470
471 void usb_thread_func()
472 {
473         printf("usb thread started\n");
474
475         sched_param param;
476         memset(&param, 0, sizeof(param));
477         param.sched_priority = 1;
478         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
479                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
480         }
481         while (!should_quit) {
482                 int rc = libusb_handle_events(nullptr);
483                 if (rc != LIBUSB_SUCCESS)
484                         break;
485         }
486 }
487
488 FrameAllocator *get_video_frame_allocator()
489 {
490         return video_frame_allocator;
491 }
492
493 void set_video_frame_allocator(FrameAllocator *allocator)
494 {
495         video_frame_allocator = allocator;
496 }
497
498 FrameAllocator *get_audio_frame_allocator()
499 {
500         return audio_frame_allocator;
501 }
502
503 void set_audio_frame_allocator(FrameAllocator *allocator)
504 {
505         audio_frame_allocator = allocator;
506 }
507
508 void set_frame_callback(frame_callback_t callback)
509 {
510         frame_callback = callback;
511 }
512
513 void start_bm_capture()
514 {
515         if (video_frame_allocator == nullptr) {
516                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
517         }
518         if (audio_frame_allocator == nullptr) {
519                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
520         }
521         thread(dequeue_thread).detach();
522
523         int rc;
524         struct libusb_transfer *xfr;
525         vector<libusb_transfer *> iso_xfrs;
526
527         rc = libusb_init(nullptr);
528         if (rc < 0) {
529                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
530                 exit(1);
531         }
532
533         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
534         if (!devh) {
535                 fprintf(stderr, "Error finding USB device\n");
536                 exit(1);
537         }
538
539         libusb_config_descriptor *config;
540         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
541         if (rc < 0) {
542                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
543                 exit(1);
544         }
545         printf("%d interface\n", config->bNumInterfaces);
546         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
547                 printf("  interface %d\n", interface_number);
548                 const libusb_interface *interface = &config->interface[interface_number];
549                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
550                         printf("    alternate setting %d\n", altsetting);
551                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
552                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
553                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
554                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
555                         }
556                 }
557         }
558
559         rc = libusb_set_configuration(devh, /*configuration=*/1);
560         if (rc < 0) {
561                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
562                 exit(1);
563         }
564
565         rc = libusb_claim_interface(devh, 0);
566         if (rc < 0) {
567                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
568                 exit(1);
569         }
570
571         // Alternate setting 1 is output, alternate setting 2 is input.
572         // Card is reset when switching alternates, so the driver uses
573         // this “double switch” when it wants to reset.
574         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
575         if (rc < 0) {
576                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
577                 exit(1);
578         }
579         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
580         if (rc < 0) {
581                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
582                 exit(1);
583         }
584 #if 0
585         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
586         if (rc < 0) {
587                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
588                 exit(1);
589         }
590 #endif
591
592 #if 0
593         rc = libusb_claim_interface(devh, 3);
594         if (rc < 0) {
595                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
596                 exit(1);
597         }
598 #endif
599
600         // theories:
601         //   44 is some kind of timer register (first 16 bits count upwards)
602         //   24 is some sort of watchdog?
603         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
604         //      (or will go to 0x73c60010?), also seen 0x73c60100
605         //   12 also changes all the time, unclear why  
606         //   16 seems to be autodetected mode somehow
607         //      --    this is e00115e0 after reset?
608         //                    ed0115e0 after mode change [to output?]
609         //                    2d0015e0 after more mode change [to input]
610         //                    ed0115e0 after more mode change
611         //                    2d0015e0 after more mode change
612         //
613         //                    390115e0 seems to indicate we have signal
614         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
615         //
616         //                    200015e0 on startup
617         //         changes to 250115e0 when we sync to the signal
618         //
619         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
620         //
621         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
622         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
623         //
624         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
625         //    perhaps some of them are related to analog output?
626         //
627         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
628         //    but the driver sets it to 0x8036802a at some point.
629         //
630         // register 16:
631         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
632         //
633         // theories:
634         //   0x01 - stable signal
635         //   0x04 - deep color
636         //   0x08 - unknown (audio??)
637         //   0x20 - 720p??
638         //   0x30 - 576p??
639
640         struct ctrl {
641                 int endpoint;
642                 int request;
643                 int index;
644                 uint32_t data;
645         };
646         static const ctrl ctrls[] = {
647                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
648                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
649                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
650                 { LIBUSB_ENDPOINT_IN,  214,  4, 0 },
651                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
652                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
653                 { LIBUSB_ENDPOINT_IN,  214, 20, 0 },
654                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
655                 { LIBUSB_ENDPOINT_IN,  214, 28, 0 },
656                 { LIBUSB_ENDPOINT_IN,  215, 32, 0 },
657                 { LIBUSB_ENDPOINT_IN,  214, 36, 0 },
658                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
659                 { LIBUSB_ENDPOINT_IN,  216, 44, 0 },
660                 { LIBUSB_ENDPOINT_IN,  214, 48, 0 },
661                 { LIBUSB_ENDPOINT_IN,  214, 52, 0 },
662                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
663                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
664                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
665                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
666                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
667                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
668                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
669                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
670                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
671                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
672                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
673                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
674                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
675                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
676                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
677                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
678                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
679                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
680                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
681                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
682                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
683                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
684                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
685                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
686                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
687                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
688                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
689                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
690                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
691                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
692                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
693                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
694                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },  // packet 354
695                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
696                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
697                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
698                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
699                 // more...
700                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
701                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },  // wow, some kind of mode
702
703                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
704                 // capture (v210).
705                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
706                 // 0x10000000 = analog audio instead of embedded audio, it seems
707                 // 0x3a000000 = component video? (analog audio)
708                 // 0x3c000000 = composite video? (analog audio)
709                 // 0x3e000000 = s-video? (analog audio)
710                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
711                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
712
713                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff },
714                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff },
715                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 },
716                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 },
717                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a },
718                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
719                 //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 },  // latch for frame start?
720                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
721                 //{ LIBUSB_ENDPOINT_OUT, 215,  4, 0x00000000 },  // appears to have no e fect
722                 //{ LIBUSB_ENDPOINT_OUT, 215,  8, 0x00000000 },  // appears to have no effect
723                 //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 },  // appears to have no effect
724                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 },  // appears to have no effect
725                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 },  // appears to have no effect
726                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 },  // appears to have no effect
727 #if 0
728                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
729                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
730                 { LIBUSB_ENDPOINT_OUT, 215, 28 },
731                 { LIBUSB_ENDPOINT_OUT, 215, 32 },
732                 { LIBUSB_ENDPOINT_OUT, 215, 36 },
733                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
734                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
735                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
736                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
737                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
738                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
739                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
740                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
741                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
742                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
743                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
744                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
745                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
746                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
747                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
748                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
749                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
750 #endif
751         };
752
753         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
754                 uint32_t flipped = htonl(ctrls[req].data);
755                 static uint8_t value[4];
756                 memcpy(value, &flipped, sizeof(flipped));
757                 int size = sizeof(value);
758                 //if (ctrls[req].request == 215) size = 0;
759                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
760                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
761                 if (rc < 0) {
762                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
763                         exit(1);
764                 }
765                 
766                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
767                 for (int i = 0; i < rc; ++i) {
768                         printf("%02x", value[i]);
769                 }
770                 printf("\n");
771         }
772
773 #if 0
774         // DEBUG
775         for ( ;; ) {
776                 static int my_index = 0;
777                 static uint8_t value[4];
778                 int size = sizeof(value);
779                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
780                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
781                 if (rc < 0) {
782                         fprintf(stderr, "Error on control\n");
783                         exit(1);
784                 }
785                 printf("rc=%d index=%d: 0x", rc, my_index);
786                 for (int i = 0; i < rc; ++i) {
787                         printf("%02x", value[i]);
788                 }
789                 printf("\n");
790         }
791 #endif
792
793 #if 0
794         // set up an asynchronous transfer of the timer register
795         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
796         static int completed = 0;
797
798         xfr = libusb_alloc_transfer(0);
799         libusb_fill_control_setup(cmdbuf,
800             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
801                 /*index=*/44, /*length=*/4);
802         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
803         libusb_submit_transfer(xfr);
804
805         // set up an asynchronous transfer of register 24
806         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
807         static int completed2 = 0;
808
809         xfr = libusb_alloc_transfer(0);
810         libusb_fill_control_setup(cmdbuf2,
811             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
812                 /*index=*/24, /*length=*/4);
813         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
814         libusb_submit_transfer(xfr);
815 #endif
816
817         // set up an asynchronous transfer of the register dump
818         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
819         static int completed3 = 0;
820
821         xfr = libusb_alloc_transfer(0);
822         libusb_fill_control_setup(cmdbuf3,
823             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
824                 /*index=*/current_register, /*length=*/4);
825         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
826         //libusb_submit_transfer(xfr);
827
828         audiofp = fopen("audio.raw", "wb");
829
830         // set up isochronous transfers for audio and video
831         for (int e = 3; e <= 4; ++e) {
832                 //int num_transfers = (e == 3) ? 6 : 6;
833                 int num_transfers = 6;
834                 for (int i = 0; i < num_transfers; ++i) {
835                         int num_iso_pack, size;
836                         if (e == 3) {
837                                 // Video seems to require isochronous packets scaled with the width; 
838                                 // seemingly six lines is about right, rounded up to the required 1kB
839                                 // multiple.
840                                 size = WIDTH * 2 * 6;
841                                 // Note that for 10-bit input, you'll need to increase size accordingly.
842                                 //size = size * 4 / 3;
843                                 if (size % 1024 != 0) {
844                                         size &= ~1023;
845                                         size += 1024;
846                                 }
847                                 num_iso_pack = (2 << 20) / size;  // 2 MB.
848                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
849                         } else {
850                                 size = 0xc0;
851                                 num_iso_pack = 80;
852                         }
853                         int num_bytes = num_iso_pack * size;
854                         uint8_t *buf = new uint8_t[num_bytes];
855
856                         xfr = libusb_alloc_transfer(num_iso_pack);
857                         if (!xfr) {
858                                 fprintf(stderr, "oom\n");
859                                 exit(1);
860                         }
861
862                         int ep = LIBUSB_ENDPOINT_IN | e;
863                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
864                                 num_iso_pack, cb_xfr, nullptr, 0);
865                         libusb_set_iso_packet_lengths(xfr, size);
866                         iso_xfrs.push_back(xfr);
867                 }
868         }
869
870         {
871                 int i = 0;
872                 for (libusb_transfer *xfr : iso_xfrs) {
873                         rc = libusb_submit_transfer(xfr);
874                         ++i;
875                         if (rc < 0) {
876                                 //printf("num_bytes=%d\n", num_bytes);
877                                 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
878                                         xfr->endpoint, i, libusb_error_name(rc));
879                                 exit(1);
880                         }
881                 }
882         }
883
884         usb_thread = thread(usb_thread_func);
885
886
887 #if 0
888         libusb_release_interface(devh, 0);
889 out:
890         if (devh)
891                 libusb_close(devh);
892         libusb_exit(nullptr);
893         return rc;
894 #endif
895 }
896
897 void stop_bm_capture()
898 {
899         should_quit = true;
900         usb_thread.join();
901 }