]> git.sesse.net Git - bmusb/blob - bmusb.cpp
Document alternates 3 and 4.
[bmusb] / bmusb.cpp
1 // Intensity Shuttle USB3 prototype capture driver, v0.3
2 // Can download 8-bit and 10-bit UYVY/v210 frames from HDMI, quite stable
3 // (can do captures for hours at a time with no drops), except during startup
4 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
5 // Audio comes out as 8-channel 24-bit raw audio.
6
7 #include <assert.h>
8 #include <errno.h>
9 #include <libusb.h>
10 #include <netinet/in.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #ifdef __SSE4_1__
17 #include <immintrin.h>
18 #endif
19 #include "bmusb.h"
20
21 #include <algorithm>
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <functional>
28 #include <memory>
29 #include <mutex>
30 #include <stack>
31 #include <thread>
32
33 using namespace std;
34 using namespace std::placeholders;
35
36 #define WIDTH 1280
37 #define HEIGHT 750  /* 30 lines ancillary data? */
38 //#define WIDTH 1920
39 //#define HEIGHT 1125  /* ??? lines ancillary data? */
40 #define HEADER_SIZE 44
41 //#define HEADER_SIZE 0
42 #define AUDIO_HEADER_SIZE 4
43
44 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 + HEADER_SIZE)  // UYVY
45 //#define FRAME_SIZE (WIDTH * HEIGHT * 2 * 4 / 3 + HEADER_SIZE)  // v210
46 #define FRAME_SIZE (8 << 20)
47
48 FILE *audiofp;
49
50 thread usb_thread;
51 atomic<bool> should_quit;
52
53 FrameAllocator::~FrameAllocator() {}
54
55 #define NUM_QUEUED_FRAMES 8
56 class MallocFrameAllocator : public FrameAllocator {
57 public:
58         MallocFrameAllocator(size_t frame_size);
59         Frame alloc_frame() override;
60         void release_frame(Frame frame) override;
61
62 private:
63         size_t frame_size;
64
65         mutex freelist_mutex;
66         stack<unique_ptr<uint8_t[]>> freelist;  // All of size <frame_size>.
67 };
68
69 MallocFrameAllocator::MallocFrameAllocator(size_t frame_size)
70         : frame_size(frame_size)
71 {
72         for (int i = 0; i < NUM_QUEUED_FRAMES; ++i) {
73                 freelist.push(unique_ptr<uint8_t[]>(new uint8_t[frame_size]));
74         }
75 }
76
77 FrameAllocator::Frame MallocFrameAllocator::alloc_frame()
78 {
79         Frame vf;
80         vf.owner = this;
81
82         unique_lock<mutex> lock(freelist_mutex);  // Meh.
83         if (freelist.empty()) {
84                 printf("Frame overrun (no more spare frames of size %ld), dropping frame!\n",
85                         frame_size);
86         } else {
87                 vf.data = freelist.top().release();
88                 vf.size = frame_size;
89                 freelist.pop();  // Meh.
90         }
91         return vf;
92 }
93
94 void MallocFrameAllocator::release_frame(Frame frame)
95 {
96         unique_lock<mutex> lock(freelist_mutex);
97         freelist.push(unique_ptr<uint8_t[]>(frame.data));
98 }
99
100 bool uint16_less_than_with_wraparound(uint16_t a, uint16_t b)
101 {
102         if (a == b) {
103                 return false;
104         } else if (a < b) {
105                 return (b - a < 0x8000);
106         } else {
107                 int wrap_b = 0x10000 + int(b);
108                 return (wrap_b - a < 0x8000);
109         }
110 }
111
112 void BMUSBCapture::queue_frame(uint16_t format, uint16_t timecode, FrameAllocator::Frame frame, deque<QueuedFrame> *q)
113 {
114         if (!q->empty() && !uint16_less_than_with_wraparound(q->back().timecode, timecode)) {
115                 printf("Blocks going backwards: prev=0x%04x, cur=0x%04x (dropped)\n",
116                         q->back().timecode, timecode);
117                 frame.owner->release_frame(frame);
118                 return;
119         }
120
121         QueuedFrame qf;
122         qf.format = format;
123         qf.timecode = timecode;
124         qf.frame = frame;
125
126         {
127                 unique_lock<mutex> lock(queue_lock);
128                 q->push_back(move(qf));
129         }
130         queues_not_empty.notify_one();  // might be spurious
131 }
132
133 void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
134 {
135         FILE *fp = fopen(filename, "wb");
136         if (fwrite(frame_start + HEADER_SIZE, frame_len - HEADER_SIZE, 1, fp) != 1) {
137                 printf("short write!\n");
138         }
139         fclose(fp);
140 }
141
142 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
143 {
144         fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
145 }
146
147 void BMUSBCapture::dequeue_thread()
148 {
149         for ( ;; ) {
150                 unique_lock<mutex> lock(queue_lock);
151                 queues_not_empty.wait(lock, [this]{ return !pending_video_frames.empty() && !pending_audio_frames.empty(); });
152
153                 uint16_t video_timecode = pending_video_frames.front().timecode;
154                 uint16_t audio_timecode = pending_audio_frames.front().timecode;
155                 if (video_timecode < audio_timecode) {
156                         printf("Video block 0x%04x without corresponding audio block, dropping.\n",
157                                 video_timecode);
158                         video_frame_allocator->release_frame(pending_video_frames.front().frame);
159                         pending_video_frames.pop_front();
160                 } else if (audio_timecode < video_timecode) {
161                         printf("Audio block 0x%04x without corresponding video block, dropping.\n",
162                                 audio_timecode);
163                         audio_frame_allocator->release_frame(pending_audio_frames.front().frame);
164                         pending_audio_frames.pop_front();
165                 } else {
166                         QueuedFrame video_frame = pending_video_frames.front();
167                         QueuedFrame audio_frame = pending_audio_frames.front();
168                         pending_audio_frames.pop_front();
169                         pending_video_frames.pop_front();
170                         lock.unlock();
171
172 #if 0
173                         char filename[255];
174                         snprintf(filename, sizeof(filename), "%04x%04x.uyvy", video_frame.format, video_timecode);
175                         dump_frame(filename, video_frame.frame.data, video_frame.data_len);
176                         dump_audio_block(audio_frame.frame.data, audio_frame.data_len); 
177 #endif
178
179                         frame_callback(video_timecode,
180                                        video_frame.frame, HEADER_SIZE, video_frame.format,
181                                        audio_frame.frame, AUDIO_HEADER_SIZE, audio_frame.format);
182                 }
183         }
184 }
185
186 void BMUSBCapture::start_new_frame(const uint8_t *start)
187 {
188         uint16_t format = (start[3] << 8) | start[2];
189         uint16_t timecode = (start[1] << 8) | start[0];
190
191         if (current_video_frame.len > 0) {
192                 //dump_frame();
193                 queue_frame(format, timecode, current_video_frame, &pending_video_frames);
194         }
195         //printf("Found frame start, format 0x%04x timecode 0x%04x, previous frame length was %d/%d\n",
196         //      format, timecode,
197         //      //start[7], start[6], start[5], start[4],
198         //      read_current_frame, FRAME_SIZE);
199
200         current_video_frame = video_frame_allocator->alloc_frame();
201         //if (current_video_frame.data == nullptr) {
202         //      read_current_frame = -1;
203         //} else {
204         //      read_current_frame = 0;
205         //}
206 }
207
208 void BMUSBCapture::start_new_audio_block(const uint8_t *start)
209 {
210         uint16_t format = (start[3] << 8) | start[2];
211         uint16_t timecode = (start[1] << 8) | start[0];
212         if (current_audio_frame.len > 0) {
213                 //dump_audio_block();
214                 queue_frame(format, timecode, current_audio_frame, &pending_audio_frames);
215         }
216         //printf("Found audio block start, format 0x%04x timecode 0x%04x, previous block length was %d\n",
217         //      format, timecode, read_current_audio_block);
218         current_audio_frame = audio_frame_allocator->alloc_frame();
219 }
220
221 #if 0
222 static void dump_pack(const libusb_transfer *xfr, int offset, const libusb_iso_packet_descriptor *pack)
223 {
224         //      printf("ISO pack%u length:%u, actual_length:%u, offset:%u\n", i, pack->length, pack->actual_length, offset);
225         for (unsigned j = 0; j < pack->actual_length; j++) {
226         //for (int j = 0; j < min(pack->actual_length, 16u); j++) {
227                 printf("%02x", xfr->buffer[j + offset]);
228                 if ((j % 16) == 15)
229                         printf("\n");
230                 else if ((j % 8) == 7)
231                         printf("  ");
232                 else
233                         printf(" ");
234         }
235 }
236 #endif
237
238 void memcpy_interleaved(uint8_t *dest1, uint8_t *dest2, const uint8_t *src, size_t n)
239 {
240         assert(n % 2 == 0);
241         uint8_t *dptr1 = dest1;
242         uint8_t *dptr2 = dest2;
243
244         for (size_t i = 0; i < n; i += 2) {
245                 *dptr1++ = *src++;
246                 *dptr2++ = *src++;
247         }
248 }
249
250 void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_name, const uint8_t *start, const uint8_t *end)
251 {
252         if (current_frame->data == nullptr ||
253             current_frame->len > current_frame->size ||
254             start == end) {
255                 return;
256         }
257
258         int bytes = end - start;
259         if (current_frame->len + bytes > current_frame->size) {
260                 printf("%d bytes overflow after last %s frame\n",
261                         int(current_frame->len + bytes - current_frame->size), frame_type_name);
262                 //dump_frame();
263         } else {
264                 if (current_frame->interleaved) {
265                         uint8_t *data = current_frame->data + current_frame->len / 2;
266                         uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
267                         if (current_frame->len % 2 == 1) {
268                                 ++data;
269                                 swap(data, data2);
270                         }
271                         if (bytes % 2 == 1) {
272                                 *data++ = *start++;
273                                 swap(data, data2);
274                                 ++current_frame->len;
275                                 --bytes;
276                         }
277                         memcpy_interleaved(data, data2, start, bytes);
278                         current_frame->len += bytes;
279                 } else {
280                         memcpy(current_frame->data + current_frame->len, start, bytes);
281                         current_frame->len += bytes;
282                 }
283         }
284 }
285
286 #ifdef __SSE4_1__
287
288 #if 0
289 void avx2_dump(const char *name, __m256i n)
290 {
291         printf("%-10s:", name);
292         printf(" %02x", _mm256_extract_epi8(n, 0));
293         printf(" %02x", _mm256_extract_epi8(n, 1));
294         printf(" %02x", _mm256_extract_epi8(n, 2));
295         printf(" %02x", _mm256_extract_epi8(n, 3));
296         printf(" %02x", _mm256_extract_epi8(n, 4));
297         printf(" %02x", _mm256_extract_epi8(n, 5));
298         printf(" %02x", _mm256_extract_epi8(n, 6));
299         printf(" %02x", _mm256_extract_epi8(n, 7));
300         printf(" ");
301         printf(" %02x", _mm256_extract_epi8(n, 8));
302         printf(" %02x", _mm256_extract_epi8(n, 9));
303         printf(" %02x", _mm256_extract_epi8(n, 10));
304         printf(" %02x", _mm256_extract_epi8(n, 11));
305         printf(" %02x", _mm256_extract_epi8(n, 12));
306         printf(" %02x", _mm256_extract_epi8(n, 13));
307         printf(" %02x", _mm256_extract_epi8(n, 14));
308         printf(" %02x", _mm256_extract_epi8(n, 15));
309         printf(" ");
310         printf(" %02x", _mm256_extract_epi8(n, 16));
311         printf(" %02x", _mm256_extract_epi8(n, 17));
312         printf(" %02x", _mm256_extract_epi8(n, 18));
313         printf(" %02x", _mm256_extract_epi8(n, 19));
314         printf(" %02x", _mm256_extract_epi8(n, 20));
315         printf(" %02x", _mm256_extract_epi8(n, 21));
316         printf(" %02x", _mm256_extract_epi8(n, 22));
317         printf(" %02x", _mm256_extract_epi8(n, 23));
318         printf(" ");
319         printf(" %02x", _mm256_extract_epi8(n, 24));
320         printf(" %02x", _mm256_extract_epi8(n, 25));
321         printf(" %02x", _mm256_extract_epi8(n, 26));
322         printf(" %02x", _mm256_extract_epi8(n, 27));
323         printf(" %02x", _mm256_extract_epi8(n, 28));
324         printf(" %02x", _mm256_extract_epi8(n, 29));
325         printf(" %02x", _mm256_extract_epi8(n, 30));
326         printf(" %02x", _mm256_extract_epi8(n, 31));
327         printf("\n");
328 }
329 #endif
330
331 // Does a memcpy and memchr in one to reduce processing time.
332 // Note that the benefit is somewhat limited if your L3 cache is small,
333 // as you'll (unfortunately) spend most of the time loading the data
334 // from main memory.
335 //
336 // Complicated cases are left to the slow path; it basically stops copying
337 // up until the first instance of "sync_char" (usually a bit before, actually).
338 // This is fine, since 0x00 bytes shouldn't really show up in normal picture
339 // data, and what we really need this for is the 00 00 ff ff marker in video data.
340 const uint8_t *add_to_frame_fastpath(FrameAllocator::Frame *current_frame, const uint8_t *start, const uint8_t *limit, const char sync_char)
341 {
342         if (current_frame->data == nullptr ||
343             current_frame->len > current_frame->size ||
344             start == limit) {
345                 return start;
346         }
347         size_t orig_bytes = limit - start;
348         if (orig_bytes < 128) {
349                 // Don't bother.
350                 return start;
351         }
352
353         // Don't read more bytes than we can write.
354         limit = min(limit, start + (current_frame->size - current_frame->len));
355
356         // Align end to 32 bytes.
357         limit = (const uint8_t *)(intptr_t(limit) & ~31);
358
359         if (start >= limit) {
360                 return start;
361         }
362
363         // Process [0,31] bytes, such that start gets aligned to 32 bytes.
364         const uint8_t *aligned_start = (const uint8_t *)(intptr_t(start + 31) & ~31);
365         if (aligned_start != start) {
366                 const uint8_t *sync_start = (const uint8_t *)memchr(start, sync_char, aligned_start - start);
367                 if (sync_start == nullptr) {
368                         add_to_frame(current_frame, "", start, aligned_start);
369                 } else {
370                         add_to_frame(current_frame, "", start, sync_start);
371                         return sync_start;
372                 }
373         }
374
375         // Make the length a multiple of 64.
376         if (current_frame->interleaved) {
377                 if (((limit - aligned_start) % 64) != 0) {
378                         limit -= 32;
379                 }
380                 assert(((limit - aligned_start) % 64) == 0);
381         }
382
383 #if __AVX2__
384         const __m256i needle = _mm256_set1_epi8(sync_char);
385
386         const __restrict __m256i *in = (const __m256i *)aligned_start;
387         if (current_frame->interleaved) {
388                 __restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
389                 __restrict __m256i *out2 = (__m256i *)(current_frame->data2 + current_frame->len / 2);
390                 if (current_frame->len % 2 == 1) {
391                         swap(out1, out2);
392                 }
393
394                 __m256i shuffle_cw = _mm256_set_epi8(
395                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0,
396                         15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0);
397                 while (in < (const __m256i *)limit) {
398                         // Note: For brevity, comments show lanes as if they were 2x64-bit (they're actually 2x128).
399                         __m256i data1 = _mm256_stream_load_si256(in);         // AaBbCcDd EeFfGgHh
400                         __m256i data2 = _mm256_stream_load_si256(in + 1);     // IiJjKkLl MmNnOoPp
401
402                         __m256i found1 = _mm256_cmpeq_epi8(data1, needle);
403                         __m256i found2 = _mm256_cmpeq_epi8(data2, needle);
404                         __m256i found = _mm256_or_si256(found1, found2);
405
406                         data1 = _mm256_shuffle_epi8(data1, shuffle_cw);       // ABCDabcd EFGHefgh
407                         data2 = _mm256_shuffle_epi8(data2, shuffle_cw);       // IJKLijkl MNOPmnop
408                 
409                         data1 = _mm256_permute4x64_epi64(data1, 0b11011000);  // ABCDEFGH abcdefgh
410                         data2 = _mm256_permute4x64_epi64(data2, 0b11011000);  // IJKLMNOP ijklmnop
411
412                         __m256i lo = _mm256_permute2x128_si256(data1, data2, 0b00100000);
413                         __m256i hi = _mm256_permute2x128_si256(data1, data2, 0b00110001);
414
415                         _mm256_storeu_si256(out1, lo);  // Store as early as possible, even if the data isn't used.
416                         _mm256_storeu_si256(out2, hi);
417
418                         if (!_mm256_testz_si256(found, found)) {
419                                 break;
420                         }
421
422                         in += 2;
423                         ++out1;
424                         ++out2;
425                 }
426                 current_frame->len += (uint8_t *)in - aligned_start;
427         } else {
428                 __m256i *out = (__m256i *)(current_frame->data + current_frame->len);
429                 while (in < (const __m256i *)limit) {
430                         __m256i data = _mm256_load_si256(in);
431                         _mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
432                         __m256i found = _mm256_cmpeq_epi8(data, needle);
433                         if (!_mm256_testz_si256(found, found)) {
434                                 break;
435                         }
436
437                         ++in;
438                         ++out;
439                 }
440                 current_frame->len = (uint8_t *)out - current_frame->data;
441         }
442 #else
443         const __m128i needle = _mm_set1_epi8(sync_char);
444
445         const __m128i *in = (const __m128i *)aligned_start;
446         if (current_frame->interleaved) {
447                 __m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
448                 __m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
449                 if (current_frame->len % 2 == 1) {
450                         swap(out1, out2);
451                 }
452
453                 __m128i mask_lower_byte = _mm_set1_epi16(0x00ff);
454                 while (in < (const __m128i *)limit) {
455                         __m128i data1 = _mm_load_si128(in);
456                         __m128i data2 = _mm_load_si128(in + 1);
457                         __m128i data1_lo = _mm_and_si128(data1, mask_lower_byte);
458                         __m128i data2_lo = _mm_and_si128(data2, mask_lower_byte);
459                         __m128i data1_hi = _mm_srli_epi16(data1, 8);
460                         __m128i data2_hi = _mm_srli_epi16(data2, 8);
461                         __m128i lo = _mm_packus_epi16(data1_lo, data2_lo);
462                         _mm_storeu_si128(out1, lo);  // Store as early as possible, even if the data isn't used.
463                         __m128i hi = _mm_packus_epi16(data1_hi, data2_hi);
464                         _mm_storeu_si128(out2, hi);
465                         __m128i found1 = _mm_cmpeq_epi8(data1, needle);
466                         __m128i found2 = _mm_cmpeq_epi8(data2, needle);
467                         if (!_mm_testz_si128(found1, found1) ||
468                             !_mm_testz_si128(found2, found2)) {
469                                 break;
470                         }
471
472                         in += 2;
473                         ++out1;
474                         ++out2;
475                 }
476                 current_frame->len += (uint8_t *)in - aligned_start;
477         } else {
478                 __m128i *out = (__m128i *)(current_frame->data + current_frame->len);
479                 while (in < (const __m128i *)limit) {
480                         __m128i data = _mm_load_si128(in);
481                         _mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
482                         __m128i found = _mm_cmpeq_epi8(data, needle);
483                         if (!_mm_testz_si128(found, found)) {
484                                 break;
485                         }
486
487                         ++in;
488                         ++out;
489                 }
490                 current_frame->len = (uint8_t *)out - current_frame->data;
491         }
492 #endif
493
494         //printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
495
496         return (const uint8_t *)in;
497 }
498 #endif
499
500 void decode_packs(const libusb_transfer *xfr,
501                   const char *sync_pattern,
502                   int sync_length,
503                   FrameAllocator::Frame *current_frame,
504                   const char *frame_type_name,
505                   function<void(const uint8_t *start)> start_callback)
506 {
507         int offset = 0;
508         for (int i = 0; i < xfr->num_iso_packets; i++) {
509                 const libusb_iso_packet_descriptor *pack = &xfr->iso_packet_desc[i];
510
511                 if (pack->status != LIBUSB_TRANSFER_COMPLETED) {
512                         fprintf(stderr, "Error: pack %u/%u status %d\n", i, xfr->num_iso_packets, pack->status);
513                         continue;
514 //exit(5);
515                 }
516
517                 const uint8_t *start = xfr->buffer + offset;
518                 const uint8_t *limit = start + pack->actual_length;
519                 while (start < limit) {  // Usually runs only one iteration.
520 #ifdef __SSE4_1__
521                         start = add_to_frame_fastpath(current_frame, start, limit, sync_pattern[0]);
522                         if (start == limit) break;
523                         assert(start < limit);
524 #endif
525
526                         const unsigned char* start_next_frame = (const unsigned char *)memmem(start, limit - start, sync_pattern, sync_length);
527                         if (start_next_frame == nullptr) {
528                                 // add the rest of the buffer
529                                 add_to_frame(current_frame, frame_type_name, start, limit);
530                                 break;
531                         } else {
532                                 add_to_frame(current_frame, frame_type_name, start, start_next_frame);
533                                 start = start_next_frame + sync_length;  // skip sync
534                                 start_callback(start);
535                         }
536                 }
537 #if 0
538                 dump_pack(xfr, offset, pack);
539 #endif
540                 offset += pack->length;
541         }
542 }
543
544 void BMUSBCapture::cb_xfr(struct libusb_transfer *xfr)
545 {
546         if (xfr->status != LIBUSB_TRANSFER_COMPLETED) {
547                 fprintf(stderr, "transfer status %d\n", xfr->status);
548                 libusb_free_transfer(xfr);
549                 exit(3);
550         }
551
552         assert(xfr->user_data != nullptr);
553         BMUSBCapture *usb = static_cast<BMUSBCapture *>(xfr->user_data);
554
555         if (xfr->type == LIBUSB_TRANSFER_TYPE_ISOCHRONOUS) {
556                 if (xfr->endpoint == 0x84) {
557                         decode_packs(xfr, "DeckLinkAudioResyncT", 20, &usb->current_audio_frame, "audio", bind(&BMUSBCapture::start_new_audio_block, usb, _1));
558                 } else {
559                         decode_packs(xfr, "\x00\x00\xff\xff", 4, &usb->current_video_frame, "video", bind(&BMUSBCapture::start_new_frame, usb, _1));
560                 }
561         }
562         if (xfr->type == LIBUSB_TRANSFER_TYPE_CONTROL) {
563                 //const libusb_control_setup *setup = libusb_control_transfer_get_setup(xfr);
564                 uint8_t *buf = libusb_control_transfer_get_data(xfr);
565 #if 0
566                 if (setup->wIndex == 44) {
567                         printf("read timer register: 0x%02x%02x%02x%02x\n", buf[0], buf[1], buf[2], buf[3]);
568                 } else {
569                         printf("read register %2d:                      0x%02x%02x%02x%02x\n",
570                                 setup->wIndex, buf[0], buf[1], buf[2], buf[3]);
571                 }
572 #else
573                 memcpy(usb->register_file + usb->current_register, buf, 4);
574                 usb->current_register = (usb->current_register + 4) % NUM_BMUSB_REGISTERS;
575                 if (usb->current_register == 0) {
576                         // read through all of them
577                         printf("register dump:");
578                         for (int i = 0; i < NUM_BMUSB_REGISTERS; i += 4) {
579                                 printf(" 0x%02x%02x%02x%02x", usb->register_file[i], usb->register_file[i + 1], usb->register_file[i + 2], usb->register_file[i + 3]);
580                         }
581                         printf("\n");
582                 }
583                 libusb_fill_control_setup(xfr->buffer,
584                     LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
585                         /*index=*/usb->current_register, /*length=*/4);
586 #endif
587         }
588
589 #if 0
590         printf("length:%u, actual_length:%u\n", xfr->length, xfr->actual_length);
591         for (i = 0; i < xfr->actual_length; i++) {
592                 printf("%02x", xfr->buffer[i]);
593                 if (i % 16)
594                         printf("\n");
595                 else if (i % 8)
596                         printf("  ");
597                 else
598                         printf(" ");
599         }
600 #endif
601
602         if (libusb_submit_transfer(xfr) < 0) {
603                 fprintf(stderr, "error re-submitting URB\n");
604                 exit(1);
605         }
606 }
607
608 void BMUSBCapture::usb_thread_func()
609 {
610         sched_param param;
611         memset(&param, 0, sizeof(param));
612         param.sched_priority = 1;
613         if (sched_setscheduler(0, SCHED_RR, &param) == -1) {
614                 printf("couldn't set realtime priority for USB thread: %s\n", strerror(errno));
615         }
616         while (!should_quit) {
617                 int rc = libusb_handle_events(nullptr);
618                 if (rc != LIBUSB_SUCCESS)
619                         break;
620         }
621 }
622
623 void BMUSBCapture::configure_card()
624 {
625         if (video_frame_allocator == nullptr) {
626                 set_video_frame_allocator(new MallocFrameAllocator(FRAME_SIZE));  // FIXME: leak.
627         }
628         if (audio_frame_allocator == nullptr) {
629                 set_audio_frame_allocator(new MallocFrameAllocator(65536));  // FIXME: leak.
630         }
631         thread(&BMUSBCapture::dequeue_thread, this).detach();
632
633         int rc;
634         struct libusb_transfer *xfr;
635
636         rc = libusb_init(nullptr);
637         if (rc < 0) {
638                 fprintf(stderr, "Error initializing libusb: %s\n", libusb_error_name(rc));
639                 exit(1);
640         }
641
642         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd3b);
643         //struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, 0x1edb, 0xbd4f);
644         struct libusb_device_handle *devh = libusb_open_device_with_vid_pid(nullptr, vid, pid);
645         if (!devh) {
646                 fprintf(stderr, "Error finding USB device\n");
647                 exit(1);
648         }
649
650         libusb_config_descriptor *config;
651         rc = libusb_get_config_descriptor(libusb_get_device(devh), /*config_index=*/0, &config);
652         if (rc < 0) {
653                 fprintf(stderr, "Error getting configuration: %s\n", libusb_error_name(rc));
654                 exit(1);
655         }
656         printf("%d interface\n", config->bNumInterfaces);
657         for (int interface_number = 0; interface_number < config->bNumInterfaces; ++interface_number) {
658                 printf("  interface %d\n", interface_number);
659                 const libusb_interface *interface = &config->interface[interface_number];
660                 for (int altsetting = 0; altsetting < interface->num_altsetting; ++altsetting) {
661                         const libusb_interface_descriptor *interface_desc = &interface->altsetting[altsetting];
662                         printf("    alternate setting %d\n", interface_desc->bAlternateSetting);
663                         for (int endpoint_number = 0; endpoint_number < interface_desc->bNumEndpoints; ++endpoint_number) {
664                                 const libusb_endpoint_descriptor *endpoint = &interface_desc->endpoint[endpoint_number];
665                                 printf("        endpoint address 0x%02x\n", endpoint->bEndpointAddress);
666                         }
667                 }
668         }
669
670         rc = libusb_set_configuration(devh, /*configuration=*/1);
671         if (rc < 0) {
672                 fprintf(stderr, "Error setting configuration 1: %s\n", libusb_error_name(rc));
673                 exit(1);
674         }
675
676         rc = libusb_claim_interface(devh, 0);
677         if (rc < 0) {
678                 fprintf(stderr, "Error claiming interface 0: %s\n", libusb_error_name(rc));
679                 exit(1);
680         }
681
682         // Alternate setting 1 is output, alternate setting 2 is input.
683         // Card is reset when switching alternates, so the driver uses
684         // this “double switch” when it wants to reset.
685         //
686         // There's also alternate settings 3 and 4, which seem to be
687         // like 1 and 2 except they advertise less bandwidth needed.
688         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
689         if (rc < 0) {
690                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
691                 exit(1);
692         }
693         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/2);
694         if (rc < 0) {
695                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
696                 exit(1);
697         }
698 #if 0
699         rc = libusb_set_interface_alt_setting(devh, /*interface=*/0, /*alternate_setting=*/1);
700         if (rc < 0) {
701                 fprintf(stderr, "Error setting alternate 1: %s\n", libusb_error_name(rc));
702                 exit(1);
703         }
704 #endif
705
706 #if 0
707         rc = libusb_claim_interface(devh, 3);
708         if (rc < 0) {
709                 fprintf(stderr, "Error claiming interface 3: %s\n", libusb_error_name(rc));
710                 exit(1);
711         }
712 #endif
713
714         // theories:
715         //   44 is some kind of timer register (first 16 bits count upwards)
716         //   24 is some sort of watchdog?
717         //      you can seemingly set it to 0x73c60001 and that bit will eventually disappear
718         //      (or will go to 0x73c60010?), also seen 0x73c60100
719         //   12 also changes all the time, unclear why  
720         //   16 seems to be autodetected mode somehow
721         //      --    this is e00115e0 after reset?
722         //                    ed0115e0 after mode change [to output?]
723         //                    2d0015e0 after more mode change [to input]
724         //                    ed0115e0 after more mode change
725         //                    2d0015e0 after more mode change
726         //
727         //                    390115e0 seems to indicate we have signal
728         //         changes to 200115e0 when resolution changes/we lose signal, driver resets after a while
729         //
730         //                    200015e0 on startup
731         //         changes to 250115e0 when we sync to the signal
732         //
733         //    so only first 16 bits count, and 0x0100 is a mask for ok/stable signal?
734         //
735         //    Bottom 16 bits of this register seem to be firmware version number (possibly not all all of them).
736         //
737         //    28 and 32 seems to be analog audio input levels (one byte for each of the eight channels).
738         //    however, if setting 32 with HDMI embedded audio, it is immediately overwritten back (to 0xe137002a).
739         //
740         //    4, 8, 20 are unclear. seem to be some sort of bitmask, but we can set them to 0 with no apparent effect.
741         //    perhaps some of them are related to analog output?
742         //
743         //    36 can be set to 0 with no apparent effect (all of this tested on both video and audio),
744         //    but the driver sets it to 0x8036802a at some point.
745         //
746         //    all of this is on request 214/215. other requests (192, 219,
747         //    222, 223, 224) are used for firmware upgrade. Probably best to
748         //    stay out of it unless you know what you're doing.
749         //
750         //
751         // register 16:
752         // first byte is 0x39 for a stable 576p60 signal, 0x2d for a stable 720p60 signal, 0x20 for no signal
753         //
754         // theories:
755         //   0x01 - stable signal
756         //   0x04 - deep color
757         //   0x08 - unknown (audio??)
758         //   0x20 - 720p??
759         //   0x30 - 576p??
760
761         struct ctrl {
762                 int endpoint;
763                 int request;
764                 int index;
765                 uint32_t data;
766         };
767         static const ctrl ctrls[] = {
768                 { LIBUSB_ENDPOINT_IN,  214, 16, 0 },
769                 { LIBUSB_ENDPOINT_IN,  214,  0, 0 },
770
771                 // seems to capture on HDMI, clearing the 0x20000000 bit seems to activate 10-bit
772                 // capture (v210).
773                 // clearing the 0x08000000 bit seems to change the capture format (other source?)
774                 // 0x10000000 = analog audio instead of embedded audio, it seems
775                 // 0x3a000000 = component video? (analog audio)
776                 // 0x3c000000 = composite video? (analog audio)
777                 // 0x3e000000 = s-video? (analog audio)
778                 { LIBUSB_ENDPOINT_OUT, 215,  0, 0x29000000 },
779                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x80000100 },
780                 //{ LIBUSB_ENDPOINT_OUT, 215,  0, 0x09000000 },
781                 { LIBUSB_ENDPOINT_OUT, 215, 24, 0x73c60001 },  // latch for frame start?
782                 { LIBUSB_ENDPOINT_IN,  214, 24, 0 },  // 
783         };
784
785         for (unsigned req = 0; req < sizeof(ctrls) / sizeof(ctrls[0]); ++req) {
786                 uint32_t flipped = htonl(ctrls[req].data);
787                 static uint8_t value[4];
788                 memcpy(value, &flipped, sizeof(flipped));
789                 int size = sizeof(value);
790                 //if (ctrls[req].request == 215) size = 0;
791                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | ctrls[req].endpoint,
792                         /*request=*/ctrls[req].request, /*value=*/0, /*index=*/ctrls[req].index, value, size, /*timeout=*/0);
793                 if (rc < 0) {
794                         fprintf(stderr, "Error on control %d: %s\n", ctrls[req].index, libusb_error_name(rc));
795                         exit(1);
796                 }
797                 
798                 printf("rc=%d: ep=%d@%d %d -> 0x", rc, ctrls[req].endpoint, ctrls[req].request, ctrls[req].index);
799                 for (int i = 0; i < rc; ++i) {
800                         printf("%02x", value[i]);
801                 }
802                 printf("\n");
803         }
804
805 #if 0
806         // DEBUG
807         for ( ;; ) {
808                 static int my_index = 0;
809                 static uint8_t value[4];
810                 int size = sizeof(value);
811                 rc = libusb_control_transfer(devh, LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN,
812                         /*request=*/214, /*value=*/0, /*index=*/my_index, value, size, /*timeout=*/0);
813                 if (rc < 0) {
814                         fprintf(stderr, "Error on control\n");
815                         exit(1);
816                 }
817                 printf("rc=%d index=%d: 0x", rc, my_index);
818                 for (int i = 0; i < rc; ++i) {
819                         printf("%02x", value[i]);
820                 }
821                 printf("\n");
822         }
823 #endif
824
825 #if 0
826         // set up an asynchronous transfer of the timer register
827         static uint8_t cmdbuf[LIBUSB_CONTROL_SETUP_SIZE + 4];
828         static int completed = 0;
829
830         xfr = libusb_alloc_transfer(0);
831         libusb_fill_control_setup(cmdbuf,
832             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
833                 /*index=*/44, /*length=*/4);
834         libusb_fill_control_transfer(xfr, devh, cmdbuf, cb_xfr, &completed, 0);
835         xfr->user_data = this;
836         libusb_submit_transfer(xfr);
837
838         // set up an asynchronous transfer of register 24
839         static uint8_t cmdbuf2[LIBUSB_CONTROL_SETUP_SIZE + 4];
840         static int completed2 = 0;
841
842         xfr = libusb_alloc_transfer(0);
843         libusb_fill_control_setup(cmdbuf2,
844             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
845                 /*index=*/24, /*length=*/4);
846         libusb_fill_control_transfer(xfr, devh, cmdbuf2, cb_xfr, &completed2, 0);
847         xfr->user_data = this;
848         libusb_submit_transfer(xfr);
849 #endif
850
851         // set up an asynchronous transfer of the register dump
852         static uint8_t cmdbuf3[LIBUSB_CONTROL_SETUP_SIZE + 4];
853         static int completed3 = 0;
854
855         xfr = libusb_alloc_transfer(0);
856         libusb_fill_control_setup(cmdbuf3,
857             LIBUSB_REQUEST_TYPE_VENDOR | LIBUSB_ENDPOINT_IN, /*request=*/214, /*value=*/0,
858                 /*index=*/current_register, /*length=*/4);
859         libusb_fill_control_transfer(xfr, devh, cmdbuf3, cb_xfr, &completed3, 0);
860         xfr->user_data = this;
861         //libusb_submit_transfer(xfr);
862
863         audiofp = fopen("audio.raw", "wb");
864
865         // set up isochronous transfers for audio and video
866         for (int e = 3; e <= 4; ++e) {
867                 //int num_transfers = (e == 3) ? 6 : 6;
868                 int num_transfers = 6;
869                 for (int i = 0; i < num_transfers; ++i) {
870                         int num_iso_pack, size;
871                         if (e == 3) {
872                                 // Video seems to require isochronous packets scaled with the width; 
873                                 // seemingly six lines is about right, rounded up to the required 1kB
874                                 // multiple.
875                                 size = WIDTH * 2 * 6;
876                                 // Note that for 10-bit input, you'll need to increase size accordingly.
877                                 //size = size * 4 / 3;
878                                 if (size % 1024 != 0) {
879                                         size &= ~1023;
880                                         size += 1024;
881                                 }
882                                 num_iso_pack = (2 << 18) / size;  // 512 kB.
883                                 printf("Picking %d packets of 0x%x bytes each\n", num_iso_pack, size);
884                         } else {
885                                 size = 0xc0;
886                                 num_iso_pack = 80;
887                         }
888                         int num_bytes = num_iso_pack * size;
889                         uint8_t *buf = new uint8_t[num_bytes];
890
891                         xfr = libusb_alloc_transfer(num_iso_pack);
892                         if (!xfr) {
893                                 fprintf(stderr, "oom\n");
894                                 exit(1);
895                         }
896
897                         int ep = LIBUSB_ENDPOINT_IN | e;
898                         libusb_fill_iso_transfer(xfr, devh, ep, buf, num_bytes,
899                                 num_iso_pack, cb_xfr, nullptr, 0);
900                         libusb_set_iso_packet_lengths(xfr, size);
901                         xfr->user_data = this;
902                         iso_xfrs.push_back(xfr);
903                 }
904         }
905 }
906
907 void BMUSBCapture::start_bm_capture()
908 {
909         printf("starting capture\n");
910         int i = 0;
911         for (libusb_transfer *xfr : iso_xfrs) {
912                 printf("submitting transfer...\n");
913                 int rc = libusb_submit_transfer(xfr);
914                 ++i;
915                 if (rc < 0) {
916                         //printf("num_bytes=%d\n", num_bytes);
917                         fprintf(stderr, "Error submitting iso to endpoint 0x%02x, number %d: %s\n",
918                                 xfr->endpoint, i, libusb_error_name(rc));
919                         exit(1);
920                 }
921         }
922
923
924 #if 0
925         libusb_release_interface(devh, 0);
926 out:
927         if (devh)
928                 libusb_close(devh);
929         libusb_exit(nullptr);
930         return rc;
931 #endif
932 }
933
934 void BMUSBCapture::start_bm_thread()
935 {
936         should_quit = false;
937         usb_thread = thread(&BMUSBCapture::usb_thread_func);
938 }
939
940 void BMUSBCapture::stop_bm_thread()
941 {
942         should_quit = true;
943         usb_thread.join();
944 }