X-Git-Url: https://git.sesse.net/?p=bmusb;a=blobdiff_plain;f=bmusb.cpp;h=0169d062dad8d1823ed441e29cc3938eee2c2e09;hp=2e0f5c3e4c83264c25f780bf22be984891269f5d;hb=HEAD;hpb=32043c95d3b9f8cb97d6d28b9996fa1bec2ce11b

diff --git a/bmusb.cpp b/bmusb.cpp
index 2e0f5c3..19a9da1 100644
--- a/bmusb.cpp
+++ b/bmusb.cpp
@@ -1,4 +1,4 @@
-// Intensity Shuttle USB3 capture driver, v0.7.0
+// Intensity Shuttle USB3 capture driver, v0.7.8
 // Can download 8-bit and 10-bit UYVY/v210-ish frames from HDMI, quite stable
 // (can do captures for hours at a time with no drops), except during startup
 // 576p60/720p60/1080i60 works, 1080p60 does not work (firmware limitation)
@@ -202,6 +202,7 @@ bool decode_video_format(uint16_t video_format, VideoFormat *decoded_video_forma
 		{ 0x0151,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50.
 		{ 0x0011,  720,  576,   0, 44,  5,    50,    1, false },  // 576p50 (5:4).
 		{ 0x0143, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
+		{ 0x0161, 1280,  720,   0, 25,  5,    50,    1, false },  // 720p50.
 		{ 0x0103, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
 		{ 0x0125, 1280,  720,   0, 25,  5,    60,    1, false },  // 720p60.
 		{ 0x0121, 1280,  720,   0, 25,  5, 60000, 1001, false },  // 720p59.94.
@@ -252,7 +253,7 @@ int guess_sample_rate(const VideoFormat &video_format, size_t len, int default_r
 	// See if we match or are very close to any of the mandatory HDMI sample rates.
 	const int candidate_sample_rates[] = { 32000, 44100, 48000 };
 	for (int rate : candidate_sample_rates) {
-		if (abs(int(num_samples_per_second) - rate) < 50) {
+		if (abs(int(num_samples_per_second) - rate) <= 100) {
 			return rate;
 		}
 	}
@@ -341,7 +342,9 @@ void dump_frame(const char *filename, uint8_t *frame_start, size_t frame_len)
 
 void dump_audio_block(uint8_t *audio_start, size_t audio_len)
 {
-	fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+	if (audiofp != nullptr) {
+		fwrite(audio_start + AUDIO_HEADER_SIZE, 1, audio_len - AUDIO_HEADER_SIZE, audiofp);
+	}
 }
 
 void BMUSBCapture::dequeue_thread_func()
@@ -535,6 +538,9 @@ void add_to_frame(FrameAllocator::Frame *current_frame, const char *frame_type_n
 		}
 		//dump_frame();
 	} else {
+		if (current_frame->data_copy != nullptr) {
+			memcpy(current_frame->data_copy + current_frame->len, start, bytes);
+		}
 		if (current_frame->interleaved) {
 			uint8_t *data = current_frame->data + current_frame->len / 2;
 			uint8_t *data2 = current_frame->data2 + current_frame->len / 2;
@@ -684,6 +690,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 {
 	const __m256i needle = _mm256_set1_epi8(sync_char);
 
+	size_t bytes_copied;
 	const __restrict __m256i *in = (const __m256i *)aligned_start;
 	if (current_frame->interleaved) {
 		__restrict __m256i *out1 = (__m256i *)(current_frame->data + (current_frame->len + 1) / 2);
@@ -724,9 +731,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 			++out1;
 			++out2;
 		}
-		current_frame->len += (uint8_t *)in - aligned_start;
+		bytes_copied = (uint8_t *)in - aligned_start;
 	} else {
-		__m256i *out = (__m256i *)(current_frame->data + current_frame->len);
+		uint8_t *old_end = current_frame->data + current_frame->len;
+		__m256i *out = (__m256i *)old_end;
 		while (in < (const __m256i *)limit) {
 			__m256i data = _mm256_load_si256(in);
 			_mm256_storeu_si256(out, data);  // Store as early as possible, even if the data isn't used.
@@ -738,8 +746,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 			++in;
 			++out;
 		}
-		current_frame->len = (uint8_t *)out - current_frame->data;
+		bytes_copied = (uint8_t *)out - old_end;
 	}
+	if (current_frame->data_copy != nullptr) {
+		// TODO: It would be somewhat more cache-efficient to write this in the
+		// same loop as above. However, it might not be worth the extra complexity.
+		memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
+	}
+	current_frame->len += bytes_copied;
 
 	//printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
 	return (const uint8_t *)in;
@@ -751,6 +765,7 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 	const __m128i needle = _mm_set1_epi8(sync_char);
 
 	const __m128i *in = (const __m128i *)aligned_start;
+	size_t bytes_copied;
 	if (current_frame->interleaved) {
 		__m128i *out1 = (__m128i *)(current_frame->data + (current_frame->len + 1) / 2);
 		__m128i *out2 = (__m128i *)(current_frame->data2 + current_frame->len / 2);
@@ -781,9 +796,10 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 			++out1;
 			++out2;
 		}
-		current_frame->len += (uint8_t *)in - aligned_start;
+		bytes_copied = (uint8_t *)in - aligned_start;
 	} else {
-		__m128i *out = (__m128i *)(current_frame->data + current_frame->len);
+		uint8_t *old_end = current_frame->data + current_frame->len;
+		__m128i *out = (__m128i *)old_end;
 		while (in < (const __m128i *)limit) {
 			__m128i data = _mm_load_si128(in);
 			_mm_storeu_si128(out, data);  // Store as early as possible, even if the data isn't used.
@@ -795,8 +811,14 @@ const uint8_t *add_to_frame_fastpath_core(FrameAllocator::Frame *current_frame,
 			++in;
 			++out;
 		}
-		current_frame->len = (uint8_t *)out - current_frame->data;
+		bytes_copied = (uint8_t *)out - old_end;
+	}
+	if (current_frame->data_copy != nullptr) {
+		// TODO: It would be somewhat more cache-efficient to write this in the
+		// same loop as above. However, it might not be worth the extra complexity.
+		memcpy(current_frame->data_copy + current_frame->len, aligned_start, bytes_copied);
 	}
+	current_frame->len += bytes_copied;
 
 	//printf("managed to fastpath %ld/%ld bytes\n", (const uint8_t *)in - (const uint8_t *)aligned_start, orig_bytes);
 	return (const uint8_t *)in;
@@ -1479,6 +1501,7 @@ void BMUSBCapture::start_bm_thread()
 void BMUSBCapture::stop_bm_thread()
 {
 	should_quit = true;
+	libusb_interrupt_event_handler(nullptr);
 	usb_thread.join();
 }