]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Add an interleaved mode to split UYVY into YV and YY on-the-fly.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <libusb.h>
10 #include <arpa/inet.h>
11 #include <unistd.h>
12 #include <string.h>
13 #include <fcntl.h>
14 #include <stdint.h>
15 #include <assert.h>
16 #ifdef __SSE2__
17 #include <immintrin.h>
18 #endif
19 #include <algorithm>
20 #include <functional>
21 #include <memory>
22 #include <deque>
23 #include <utility>
24 #include <mutex>
25 #include <condition_variable>
26 #include <thread>
27 #include <stack>
28 #include <atomic>
29 #include "bmusb.h"
30
31 using namespace std;
32
33 static int current_register = 0;
34
35 #define NUM_REGISTERS 60
36 uint8_t register_file[NUM_REGISTERS];
37
38 #define WIDTH 1280
39 #define HEIGHT 750  /* 30 lines ancillary data? */
40 //#define WIDTH 1920
41 //#define HEIGHT 1125  /* ??? lines ancillary data? */
42 #define HEADER_SIZE 44
43 //#define HEADER_SIZE 0
44 #define AUDIO_HEADER_SIZE 4
45
46 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
47 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
48 #define FRAME_SIZE (8 << 20)
49
50 FILE *audiofp;
51
52 FrameAllocator::Frame current_video_frame;
53 FrameAllocator::Frame current_audio_frame;
54
55 struct QueuedFrame {
56         uint16_t timecode;
57         uint16_t format;
58         FrameAllocator::Frame frame;
59 };
60
61 mutex queue_lock;
62 condition_variable queues_not_empty;
63 deque<QueuedFrame> pending_video_frames;
64 deque<QueuedFrame> pending_audio_frames;
65
66 thread usb_thread;
67 atomic<bool> should_quit;
68
69 FrameAllocator::~FrameAllocator() {}
70
71 #define NUM_QUEUED_FRAMES 8
72 class MallocFrameAllocator : public FrameAllocator {
73 public:
74         MallocFrameAllocator(size_t frame_size);
75         Frame alloc_frame() override;
76         void release_frame(Frame frame) override;
77
78 private:
79         size_t frame_size;
80
81         mutex freelist_mutex;
82         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
83 };
84
85 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
86         : frame_size(frame_size)
87 {
88         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
89                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
90         }
91 }
92
93 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
94 {
95         Frame vf;
96         vf.owner = this;
97
98         unique_lock<mutex> lock(freelist_mutex);  // Meh.
99         if (freelist.empty()) {
100                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
101                         frame_size);
102         } else {
103                 vf.data = freelist.top().release();
104                 vf.size = frame_size;
105                 freelist.pop();  // Meh.
106         }
107         return vf;
108 }
109
110 void MallocFrameAllocator::release_frame(Frame frame)
111 {
112         unique_lock<mutex> lock(freelist_mutex);
113         freelist.push(unique_ptr<uint8_t[]>(frame.data));
114 }
115
116 FrameAllocator *video_frame_allocator = nullptr;
117 FrameAllocator *audio_frame_allocator = nullptr;
118 frame_callback_t frame_callback = nullptr;
119
120 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
121 {
122         if (a == b) {
123                 return false;
124         } else if (a < b) {
125                 return (b - a < 0x8000);
126         } else {
127                 int wrap_b = 0x10000 + int(b);
128                 return (wrap_b - a < 0x8000);
129         }
130 }
131
132 void queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
133 {
134         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
135                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
136                         q->back().timecode, timecode);
137                 frame.owner->release_frame(frame);
138                 return;
139         }
140
141         QueuedFrame qf;
142         qf.format = format;
143         qf.timecode = timecode;
144         qf.frame = frame;
145
146         {
147                 unique_lock<mutex> lock(queue_lock);
148                 q->push_back(move(qf));
149         }
150         queues_not_empty.notify_one();  // might be spurious
151 }
152
153 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
154 {
155         FILE *fp = fopen(filename, "wb");
156         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
157                 printf("short write!\n");
158         }
159         fclose(fp);
160 }
161
162 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
163 {
164         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
165 }
166
167 void dequeue_thread()
168 {
169         for ( ;; ) {
170                 unique_lock<mutex> lock(queue_lock);
171                 queues_not_empty.wait(lock, []{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
172
173                 uint16_t video_timecode = pending_video_frames.front().timecode;
174                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
175                 if (video_timecode < audio_timecode) {
176                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
177                                 video_timecode);
178                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
179                         pending_video_frames.pop_front();
180                 } else if (audio_timecode < video_timecode) {
181                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
182                                 audio_timecode);
183                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
184                         pending_audio_frames.pop_front();
185                 } else {
186                         QueuedFrame video_frame = pending_video_frames.front();
187                         QueuedFrame audio_frame = pending_audio_frames.front();
188                         pending_audio_frames.pop_front();
189                         pending_video_frames.pop_front();
190                         lock.unlock();
191
192 #if 0
193                         char filename[255];
194                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
195                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
196                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
197 #endif
198
199                         frame_callback(video_timecode,
200                                        video_frame.frame, HEADER_SIZE, video_frame.format,
201                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
202                 }
203         }
204 }
205
206 void start_new_frame(const uint8_t *start)
207 {
208         uint16_t format = (start[3] << 8) | start[2];
209         uint16_t timecode = (start[1] << 8) | start[0];
210
211         if (current_video_frame.len > 0) {
212                 //dump_frame();
213                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
214         }
215         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
216         //      format, timecode,
217         //      //start[7], start[6], start[5], start[4],
218         //      read_current_frame, FRAME_SIZE);
219
220         current_video_frame = video_frame_allocator->alloc_frame();
221         //if (current_video_frame.data == nullptr) {
222         //      read_current_frame = -1;
223         //} else {
224         //      read_current_frame = 0;
225         //}
226 }
227
228 void start_new_audio_block(const uint8_t *start)
229 {
230         uint16_t format = (start[3] << 8) | start[2];
231         uint16_t timecode = (start[1] << 8) | start[0];
232         if (current_audio_frame.len > 0) {
233                 //dump_audio_block();
234                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
235         }
236         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
237         //      format, timecode, read_current_audio_block);
238         current_audio_frame = audio_frame_allocator->alloc_frame();
239 }
240
241 #if 0
242 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
243 {
244         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
245         for (unsigned j = 0; j < pack->actual_length; j++) {
246         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
247                 printf("%02x", xfr->buffer[j + offset]);
248                 if ((j % 16) == 15)
249                         printf("\n");
250                 else if ((j % 8) == 7)
251                         printf("  ");
252                 else
253                         printf(" ");
254         }
255 }
256 #endif
257
258 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
259 {
260         assert(n % 2 == 0);
261         uint8_t *dptr1 = dest1;
262         uint8_t *dptr2 = dest2;
263
264         for (size_t i = 0; i < n; i += 2) {
265                 *dptr1++ = *src++;
266                 *dptr2++ = *src++;
267         }
268 }
269
270 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
271 {
272         if (current_frame->data == nullptr ||
273             current_frame->len > current_frame->size ||
274             start == end) {
275                 return;
276         }
277
278         int bytes = end - start;
279         if (current_frame->len + bytes > current_frame->size) {
280                 printf("%d bytes overflow after last %s frame\n",
281                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
282                 //dump_frame();
283         } else {
284                 if (current_frame->interleaved) {
285                         uint8_t *data = current_frame->data + current_frame->len / 2;
286                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
287                         if (current_frame->len % 2 == 1) {
288                                 ++data;
289                                 swap(data, data2);
290                         }
291                         if (bytes % 2 == 1) {
292                                 *data++ = *start++;
293                                 swap(data, data2);
294                                 ++current_frame->len;
295                                 --bytes;
296                         }
297                         memcpy_interleaved(data, data2, start, bytes);
298                         current_frame->len += bytes;
299                 } else {
300                         memcpy(current_frame->data + current_frame->len, start, bytes);
301                         current_frame->len += bytes;
302                 }
303         }
304 }
305
306 #ifdef __SSE2__
307
308 // Does a memcpy and memchr in one to reduce processing time.
309 // Note that the benefit is somewhat limited if your L3 cache is small,
310 // as you'll (unfortunately) spend most of the time loading the data
311 // from main memory.
312 //
313 // Complicated cases are left to the slow path; it basically stops copying
314 // up until the first instance of "sync_char" (usually a bit before, actually).
315 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
316 // data, and what we really need this for is the 00 00 ff ff marker in video data.
317 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
318 {
319         if (current_frame->data == nullptr ||
320             current_frame->len > current_frame->size ||
321             start == limit) {
322                 return start;
323         }
324         size_t orig_bytes = limit - start;
325         if (orig_bytes < 128) {
326                 // Don't bother.
327                 return start;
328         }
329
330         // Don't read more bytes than we can write.
331         limit = min(limit, start + (current_frame->size - current_frame->len));
332
333         // Align end to 32 bytes.
334         limit = (const uint8_t *)(intptr_t(limit) & ~31);
335
336         if (start >= limit) {
337                 return start;
338         }
339
340         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
341         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
342         if (aligned_start != start) {
343                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
344                 if (sync_start == nullptr) {
345                         add_to_frame(current_frame, "", start, aligned_start);
346                 } else {
347                         add_to_frame(current_frame, "", start, sync_start);
348                         return sync_start;
349                 }
350         }
351
352         // Make the length a multiple of 64.
353         if (current_frame->interleaved) {
354                 if (((limit - aligned_start) % 64) != 0) {
355                         limit -= 32;
356                 }
357                 assert(((limit - aligned_start) % 64) == 0);
358         }
359
360 #if __AVX2__
361         const __m256i needle = _mm256_set1_epi8(sync_char);
362
363         const __m256i *in = (const __m256i *)aligned_start;
364         if (current_frame->interleaved) {
365                 __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
366                 __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
367                 if (current_frame->len % 2 == 1) {
368                         swap(out1, out2);
369                 }
370
371                 __m256i mask_lower_byte = _mm256_set1_epi16(0x00ff);
372                 while (in < (const __m256i *)limit) {
373                         __m256i data1 = _mm256_load_si256(in);
374                         __m256i data2 = _mm256_load_si256(in + 1);
375                         __m256i data1_lo = _mm256_and_si256(data1, mask_lower_byte);
376                         __m256i data2_lo = _mm256_and_si256(data2, mask_lower_byte);
377                         __m256i data1_hi = _mm256_srli_epi16(data1, 8);
378                         __m256i data2_hi = _mm256_srli_epi16(data2, 8);
379                         __m256i lo = _mm256_packus_epi16(data1_lo, data2_lo);
380                         lo = _mm256_permute4x64_epi64(lo, 0b11011000);
381                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
382                         __m256i hi = _mm256_packus_epi16(data1_hi, data2_hi);
383                         hi = _mm256_permute4x64_epi64(hi, 0b11011000);
384                         _mm256_storeu_si256(out2, hi);
385                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
386                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
387                         if (!_mm256_testz_si256(found1, found1) ||
388                             !_mm256_testz_si256(found2, found2)) {
389                                 break;
390                         }
391
392                         in += 2;
393                         ++out1;
394                         ++out2;
395                 }
396                 current_frame->len += (uint8_t *)in - aligned_start;
397         } else {
398                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
399                 while (in < (const __m256i *)limit) {
400                         __m256i data = _mm256_load_si256(in);
401                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
402                         __m256i found = _mm256_cmpeq_epi8(data, needle);
403                         if (!_mm256_testz_si256(found, found)) {
404                                 break;
405                         }
406
407                         ++in;
408                         ++out;
409                 }
410                 current_frame->len = (uint8_t *)out - current_frame->data;
411         }
412 #else
413         const __m128i needle = _mm_set1_epi8(sync_char);
414
415         const __m128i *in = (const __m128i *)aligned_start;
416         if (current_frame->interleaved) {
417                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
418                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
419                 if (current_frame->len % 2 == 1) {
420                         swap(out1, out2);
421                 }
422
423                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
424                 while (in < (const __m128i *)limit) {
425                         __m128i data1 = _mm_load_si128(in);
426                         __m128i data2 = _mm_load_si128(in + 1);
427                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
428                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
429                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
430                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
431                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
432                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
433                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
434                         _mm_storeu_si128(out2, hi);
435                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
436                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
437                         if (!_mm_testz_si128(found1, found1) ||
438                             !_mm_testz_si128(found2, found2)) {
439                                 break;
440                         }
441
442                         in += 2;
443                         ++out1;
444                         ++out2;
445                 }
446                 current_frame->len += (uint8_t *)in - aligned_start;
447         } else {
448                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
449                 while (in < (const __m128i *)limit) {
450                         __m128i data = _mm_load_si128(in);
451                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
452                         __m128i found = _mm_cmpeq_epi8(data, needle);
453                         if (!_mm_testz_si128(found, found)) {
454                                 break;
455                         }
456
457                         ++in;
458                         ++out;
459                 }
460                 current_frame->len = (uint8_t *)out - current_frame->data;
461         }
462 #endif
463
464         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
465
466         return (const uint8_t *)in;
467 }
468 #endif
469
470 void decode_packs(const libusb_transfer *xfr,
471                   const char *sync_pattern,
472                   int sync_length,
473                   FrameAllocator::Frame *current_frame,
474                   const char *frame_type_name,
475                   function<void(const uint8_t *start)> start_callback)
476 {
477         int offset = 0;
478         for (int i = 0; i < xfr->num_iso_packets; i++) {
479                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
480
481                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
482                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
483                         continue;
484 //exit(5);
485                 }
486
487                 const uint8_t *start = xfr->buffer + offset;
488                 const uint8_t *limit = start + pack->actual_length;
489                 while (start < limit) {  // Usually runs only one iteration.
490 #ifdef __SSE2__
491                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
492                         if (start == limit) break;
493                         assert(start < limit);
494 #endif
495
496                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
497                         if (start_next_frame == nullptr) {
498                                 // add the rest of the buffer
499                                 add_to_frame(current_frame, frame_type_name, start, limit);
500                                 break;
501                         } else {
502                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
503                                 start = start_next_frame + sync_length;  // skip sync
504                                 start_callback(start);
505                         }
506                 }
507 #if 0
508                 dump_pack(xfr, offset, pack);
509 #endif
510                 offset += pack->length;
511         }
512 }
513
514 static void cb_xfr(struct libusb_transfer *xfr)
515 {
516         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
517                 fprintf(stderr, "transfer status %d\n", xfr->status);
518                 libusb_free_transfer(xfr);
519                 exit(3);
520         }
521
522         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
523                 if (xfr->endpoint == 0x84) {
524                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &current_audio_frame, "audio", start_new_audio_block);
525                 } else {
526                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &current_video_frame, "video", start_new_frame);
527                 }
528         }
529         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
530                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
531                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
532 #if 0
533                 if (setup->wIndex == 44) {
534                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
535                 } else {
536                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
537                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
538                 }
539 #else
540                 memcpy(register_file + current_register, buf, 4);
541                 current_register = (current_register + 4) % NUM_REGISTERS;
542                 if (current_register == 0) {
543                         // read through all of them
544                         printf("register dump:");
545                         for (int i = 0; i < NUM_REGISTERS; i += 4) {
546                                 printf(" 0x%02x%02x%02x%02x", register_file[i], register_file[i + 1], register_file[i + 2], register_file[i + 3]);
547                         }
548                         printf("\n");
549                 }
550                 libusb_fill_control_setup(xfr->buffer,
551                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
552                         /*index=*/current_register, /*length=*/4);
553 #endif
554         }
555
556 #if 0
557         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
558         for (i = 0; i < xfr->actual_length; i++) {
559                 printf("%02x", xfr->buffer[i]);
560                 if (i % 16)
561                         printf("\n");
562                 else if (i % 8)
563                         printf("  ");
564                 else
565                         printf(" ");
566         }
567 #endif
568
569         if (libusb_submit_transfer(xfr) < 0) {
570                 fprintf(stderr, "error re-submitting URB\n");
571                 exit(1);
572         }
573 }
574
575 void usb_thread_func()
576 {
577         printf("usb thread started\n");
578
579         sched_param param;
580         memset(&param, 0, sizeof(param));
581         param.sched_priority = 1;
582         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
583                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
584         }
585         while (!should_quit) {
586                 int rc = libusb_handle_events(nullptr);
587                 if (rc != LIBUSB_SUCCESS)
588                         break;
589         }
590 }
591
592 FrameAllocator *get_video_frame_allocator()
593 {
594         return video_frame_allocator;
595 }
596
597 void set_video_frame_allocator(FrameAllocator *allocator)
598 {
599         video_frame_allocator = allocator;
600 }
601
602 FrameAllocator *get_audio_frame_allocator()
603 {
604         return audio_frame_allocator;
605 }
606
607 void set_audio_frame_allocator(FrameAllocator *allocator)
608 {
609         audio_frame_allocator = allocator;
610 }
611
612 void set_frame_callback(frame_callback_t callback)
613 {
614         frame_callback = callback;
615 }
616
617 void start_bm_capture()
618 {
619         if (video_frame_allocator == nullptr) {
620                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
621         }
622         if (audio_frame_allocator == nullptr) {
623                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
624         }
625         thread(dequeue_thread).detach();
626
627         int rc;
628         struct libusb_transfer *xfr;
629         vector<libusb_transfer *> iso_xfrs;
630
631         rc = libusb_init(nullptr);
632         if (rc < 0) {
633                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
634                 exit(1);
635         }
636
637         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
638         if (!devh) {
639                 fprintf(stderr, "Error finding USB device\n");
640                 exit(1);
641         }
642
643         libusb_config_descriptor *config;
644         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
645         if (rc < 0) {
646                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
647                 exit(1);
648         }
649         printf("%d interface\n", config->bNumInterfaces);
650         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
651                 printf("  interface %d\n", interface_number);
652                 const libusb_interface *interface = &config->interface[interface_number];
653                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
654                         printf("    alternate setting %d\n", altsetting);
655                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
656                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
657                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
658                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
659                         }
660                 }
661         }
662
663         rc = libusb_set_configuration(devh, /*configuration=*/1);
664         if (rc < 0) {
665                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
666                 exit(1);
667         }
668
669         rc = libusb_claim_interface(devh, 0);
670         if (rc < 0) {
671                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
672                 exit(1);
673         }
674
675         // Alternate setting 1 is output, alternate setting 2 is input.
676         // Card is reset when switching alternates, so the driver uses
677         // this “double switch” when it wants to reset.
678         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
679         if (rc < 0) {
680                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
681                 exit(1);
682         }
683         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
684         if (rc < 0) {
685                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
686                 exit(1);
687         }
688 #if 0
689         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
690         if (rc < 0) {
691                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
692                 exit(1);
693         }
694 #endif
695
696 #if 0
697         rc = libusb_claim_interface(devh, 3);
698         if (rc < 0) {
699                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
700                 exit(1);
701         }
702 #endif
703
704         // theories:
705         //   44 is some kind of timer register (first 16 bits count upwards)
706         //   24 is some sort of watchdog?
707         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
708         //      (or will go to 0x73c60010?), also seen 0x73c60100
709         //   12 also changes all the time, unclear why  
710         //   16 seems to be autodetected mode somehow
711         //      --    this is e00115e0 after reset?
712         //                    ed0115e0 after mode change [to output?]
713         //                    2d0015e0 after more mode change [to input]
714         //                    ed0115e0 after more mode change
715         //                    2d0015e0 after more mode change
716         //
717         //                    390115e0 seems to indicate we have signal
718         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
719         //
720         //                    200015e0 on startup
721         //         changes to 250115e0 when we sync to the signal
722         //
723         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
724         //
725         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
726         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
727         //
728         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
729         //    perhaps some of them are related to analog output?
730         //
731         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
732         //    but the driver sets it to 0x8036802a at some point.
733         //
734         // register 16:
735         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
736         //
737         // theories:
738         //   0x01 - stable signal
739         //   0x04 - deep color
740         //   0x08 - unknown (audio??)
741         //   0x20 - 720p??
742         //   0x30 - 576p??
743
744         struct ctrl {
745                 int endpoint;
746                 int request;
747                 int index;
748                 uint32_t data;
749         };
750         static const ctrl ctrls[] = {
751                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
752                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
753                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
754                 { LIBUSB_ENDPOINT_IN,  214,  4, 0 },
755                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
756                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
757                 { LIBUSB_ENDPOINT_IN,  214, 20, 0 },
758                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
759                 { LIBUSB_ENDPOINT_IN,  214, 28, 0 },
760                 { LIBUSB_ENDPOINT_IN,  215, 32, 0 },
761                 { LIBUSB_ENDPOINT_IN,  214, 36, 0 },
762                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
763                 { LIBUSB_ENDPOINT_IN,  216, 44, 0 },
764                 { LIBUSB_ENDPOINT_IN,  214, 48, 0 },
765                 { LIBUSB_ENDPOINT_IN,  214, 52, 0 },
766                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
767                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
768                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
769                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
770                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
771                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
772                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
773                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
774                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
775                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
776                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
777                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
778                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
779                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
780                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
781                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
782                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
783                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
784                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
785                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
786                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
787                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
788                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
789                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
790                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
791                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
792                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
793                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
794                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
795                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
796                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
797                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
798                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },  // packet 354
799                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },
800                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
801                 { LIBUSB_ENDPOINT_IN,  214, 12, 0 },
802                 { LIBUSB_ENDPOINT_IN,  214, 40, 0 },
803                 // more...
804                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
805                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },  // wow, some kind of mode
806
807                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
808                 // capture (v210).
809                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
810                 // 0x10000000 = analog audio instead of embedded audio, it seems
811                 // 0x3a000000 = component video? (analog audio)
812                 // 0x3c000000 = composite video? (analog audio)
813                 // 0x3e000000 = s-video? (analog audio)
814                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
815                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
816
817                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0xffffffff },
818                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0xffffffff },
819                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x40404040 },
820                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x40404040 },
821                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x8036802a },
822                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
823                 //{ LIBUSB_ENDPOINT_OUT, 215, 24, 0x13370001 },  // latch for frame start?
824                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
825                 //{ LIBUSB_ENDPOINT_OUT, 215,  4, 0x00000000 },  // appears to have no e fect
826                 //{ LIBUSB_ENDPOINT_OUT, 215,  8, 0x00000000 },  // appears to have no effect
827                 //{ LIBUSB_ENDPOINT_OUT, 215, 20, 0x00000000 },  // appears to have no effect
828                 //{ LIBUSB_ENDPOINT_OUT, 215, 28, 0x00000000 },  // appears to have no effect
829                 //{ LIBUSB_ENDPOINT_OUT, 215, 32, 0x00000000 },  // appears to have no effect
830                 //{ LIBUSB_ENDPOINT_OUT, 215, 36, 0x00000000 },  // appears to have no effect
831 #if 0
832                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
833                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
834                 { LIBUSB_ENDPOINT_OUT, 215, 28 },
835                 { LIBUSB_ENDPOINT_OUT, 215, 32 },
836                 { LIBUSB_ENDPOINT_OUT, 215, 36 },
837                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
838                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
839                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
840                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
841                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
842                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
843                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
844                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
845                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
846                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
847                 { LIBUSB_ENDPOINT_OUT, 215,  0 },
848                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
849                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
850                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
851                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
852                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
853                 { LIBUSB_ENDPOINT_OUT, 215, 24 },
854 #endif
855         };
856
857         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
858                 uint32_t flipped = htonl(ctrls[req].data);
859                 static uint8_t value[4];
860                 memcpy(value, &flipped, sizeof(flipped));
861                 int size = sizeof(value);
862                 //if (ctrls[req].request == 215) size = 0;
863                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
864                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
865                 if (rc < 0) {
866                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
867                         exit(1);
868                 }
869                 
870                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
871                 for (int i = 0; i < rc; ++i) {
872                         printf("%02x", value[i]);
873                 }
874                 printf("\n");
875         }
876
877 #if 0
878         // DEBUG
879         for ( ;; ) {
880                 static int my_index = 0;
881                 static uint8_t value[4];
882                 int size = sizeof(value);
883                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
884                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
885                 if (rc < 0) {
886                         fprintf(stderr, "Error on control\n");
887                         exit(1);
888                 }
889                 printf("rc=%d index=%d: 0x", rc, my_index);
890                 for (int i = 0; i < rc; ++i) {
891                         printf("%02x", value[i]);
892                 }
893                 printf("\n");
894         }
895 #endif
896
897 #if 0
898         // set up an asynchronous transfer of the timer register
899         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
900         static int completed = 0;
901
902         xfr = libusb_alloc_transfer(0);
903         libusb_fill_control_setup(cmdbuf,
904             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
905                 /*index=*/44, /*length=*/4);
906         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
907         libusb_submit_transfer(xfr);
908
909         // set up an asynchronous transfer of register 24
910         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
911         static int completed2 = 0;
912
913         xfr = libusb_alloc_transfer(0);
914         libusb_fill_control_setup(cmdbuf2,
915             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
916                 /*index=*/24, /*length=*/4);
917         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
918         libusb_submit_transfer(xfr);
919 #endif
920
921         // set up an asynchronous transfer of the register dump
922         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
923         static int completed3 = 0;
924
925         xfr = libusb_alloc_transfer(0);
926         libusb_fill_control_setup(cmdbuf3,
927             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
928                 /*index=*/current_register, /*length=*/4);
929         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
930         //libusb_submit_transfer(xfr);
931
932         audiofp = fopen("audio.raw", "wb");
933
934         // set up isochronous transfers for audio and video
935         for (int e = 3; e <= 4; ++e) {
936                 //int num_transfers = (e == 3) ? 6 : 6;
937                 int num_transfers = 6;
938                 for (int i = 0; i < num_transfers; ++i) {
939                         int num_iso_pack, size;
940                         if (e == 3) {
941                                 // Video seems to require isochronous packets scaled with the width; 
942                                 // seemingly six lines is about right, rounded up to the required 1kB
943                                 // multiple.
944                                 size = WIDTH * 2 * 6;
945                                 // Note that for 10-bit input, you'll need to increase size accordingly.
946                                 //size = size * 4 / 3;
947                                 if (size % 1024 != 0) {
948                                         size &= ~1023;
949                                         size += 1024;
950                                 }
951                                 num_iso_pack = (2 << 20) / size;  // 2 MB.
952                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
953                         } else {
954                                 size = 0xc0;
955                                 num_iso_pack = 80;
956                         }
957                         int num_bytes = num_iso_pack * size;
958                         uint8_t *buf = new uint8_t[num_bytes];
959
960                         xfr = libusb_alloc_transfer(num_iso_pack);
961                         if (!xfr) {
962                                 fprintf(stderr, "oom\n");
963                                 exit(1);
964                         }
965
966                         int ep = LIBUSB_ENDPOINT_IN | e;
967                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
968                                 num_iso_pack, cb_xfr, nullptr, 0);
969                         libusb_set_iso_packet_lengths(xfr, size);
970                         iso_xfrs.push_back(xfr);
971                 }
972         }
973
974         {
975                 int i = 0;
976                 for (libusb_transfer *xfr : iso_xfrs) {
977                         rc = libusb_submit_transfer(xfr);
978                         ++i;
979                         if (rc < 0) {
980                                 //printf("num_bytes=%d\n", num_bytes);
981                                 fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
982                                         xfr->endpoint, i, libusb_error_name(rc));
983                                 exit(1);
984                         }
985                 }
986         }
987
988         usb_thread = thread(usb_thread_func);
989
990
991 #if 0
992         libusb_release_interface(devh, 0);
993 out:
994         if (devh)
995                 libusb_close(devh);
996         libusb_exit(nullptr);
997         return rc;
998 #endif
999 }
1000
1001 void stop_bm_capture()
1002 {
1003         should_quit = true;
1004         usb_thread.join();
1005 }