Here is the situation:
Input:
uint8<4>: [1 2 3 4 hidden: 5 6 7 8 9 10 11 12 13 14 15 16]
Output (bytes):
OK to_uint16(): [1 0 2 0 3 0 4 0 hidden: 5 0 6 0 7 0 8 0]
Unexpected to_uint32(): [13 0 0 0 14 0 0 0 15 0 0 0 16 0 0 0]
OK to_uint64(): [1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0]
Expected Output (bytes):
to_uint16(): [1 0 2 0 3 0 4 0 hidden: 5 0 6 0 7 0 8 0]
to_uint32(): [1 0 0 0 2 0 0 0 3 0 0 0 4 0 0 0]
to_uint64(): [1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0]
Maybe I overlooked something. Is this the correct behavior?
Here is the minimal example:
main.cpp:
#include <simdpp/simd.h>
#include <iostream>
using namespace simdpp;
#define DBG(var_name) std::cout<<#var_name": "<<(var_name)<<std::endl
// Output operator for vectors
template <unsigned N, typename V>
std::ostream& operator<<(std::ostream& oss, const simdpp::any_vec<N, V>& v)
{
constexpr size_t length_bytes = simdpp::any_vec<N, V>::length_bytes;
SIMDPP_ALIGN(SIMDPP_FAST_INT8_SIZE) unsigned char a[SIMDPP_FAST_INT8_SIZE];
store(&a[0], v);
oss << "[";
std::string sep = "";
for (size_t i=0; i<SIMDPP_FAST_INT8_SIZE; i++) {
if (i == length_bytes) { oss << " hidden:"; }
oss << sep << int(a[i]);
sep = " ";
}
return oss << "]";
}
int main(int argc, char** argv)
{
// v8 is 4-times shorter than the native vector size
uint8 <SIMDPP_FAST_INT32_SIZE> v8 = make_uint<uint8<SIMDPP_FAST_INT32_SIZE>>(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
auto v16 = to_uint16(v8);
auto v32 = to_uint32(v8);
auto v64 = to_uint64(v8);
DBG(v8);
DBG(v16);
DBG(v32);
DBG(v64);
return 0;
}
Makefile:
CXXFLAGS += -I./libsimdpp
CXXFLAGS += -msse2 -DSIMDPP_ARCH_X86_SSE2
main.exe: main.cpp
$(CXX) $(CXXFLAGS) -o $@ $<
Output:
v8: [1 2 3 4 hidden: 5 6 7 8 9 10 11 12 13 14 15 16]
v16: [1 0 2 0 3 0 4 0 hidden: 5 0 6 0 7 0 8 0]
v32: [13 0 0 0 14 0 0 0 15 0 0 0 16 0 0 0]
v64: [1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0]
Here is the situation:
Maybe I overlooked something. Is this the correct behavior?
Here is the minimal example:
main.cpp:
Makefile:
Output: