summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
2667316)
Also make two get_weight_index() static methods constexpr, for
consistency with the other static get_hash_value() method right above.
Tested for speed by user Torom (thanks).
closes https://github.com/official-stockfish/Stockfish/pull/4708
No functional change
- static IndexType get_weight_index_scrambled(IndexType i)
+ static constexpr IndexType get_weight_index_scrambled(IndexType i)
{
return
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
{
return
(i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 +
- static IndexType get_weight_index(IndexType i)
+ static constexpr IndexType get_weight_index(IndexType i)
{
#if defined (USE_SSSE3)
return get_weight_index_scrambled(i);
{
#if defined (USE_SSSE3)
return get_weight_index_scrambled(i);
return !stream.fail();
}
// Forward propagation
return !stream.fail();
}
// Forward propagation
- const OutputType* propagate(
const InputType* input, OutputType* output) const {
#if defined (USE_AVX512)
const InputType* input, OutputType* output) const {
#if defined (USE_AVX512)
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
template <IndexType InDims, IndexType OutDims>
class AffineTransformSparseInput {
public:
template <IndexType InDims, IndexType OutDims>
class AffineTransformSparseInput {
public:
// Input/output type
using InputType = std::uint8_t;
using OutputType = std::int32_t;
// Input/output type
using InputType = std::uint8_t;
using OutputType = std::int32_t;
- static IndexType get_weight_index_scrambled(IndexType i)
+ static constexpr IndexType get_weight_index_scrambled(IndexType i)
{
return
(i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize +
{
return
(i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize +
- static IndexType get_weight_index(IndexType i)
+ static constexpr IndexType get_weight_index(IndexType i)
{
#if defined (USE_SSSE3)
return get_weight_index_scrambled(i);
{
#if defined (USE_SSSE3)
return get_weight_index_scrambled(i);
return !stream.fail();
}
// Forward propagation
return !stream.fail();
}
// Forward propagation
- const OutputType* propagate(
const InputType* input, OutputType* output) const {
#if defined (USE_SSSE3)
const InputType* input, OutputType* output) const {
#if defined (USE_SSSE3)
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
PaddedInputDimensions,
OutputDimensions>(output, weights, biases, input);
#endif
- const OutputType* propagate(
const InputType* input, OutputType* output) const {
#if defined(USE_AVX2)
const InputType* input, OutputType* output) const {
#if defined(USE_AVX2)
output[i] = static_cast<OutputType>(
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
}
output[i] = static_cast<OutputType>(
std::max(0, std::min(127, input[i] >> WeightScaleBits)));
}
- const OutputType* propagate(
const InputType* input, OutputType* output) const {
#if defined(USE_SSE2)
const InputType* input, OutputType* output) const {
#if defined(USE_SSE2)
// needs to be accounted for in the trainer
std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128)));
}
// needs to be accounted for in the trainer
std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128)));
}