25 #ifndef MSHADOW_PACKET_SSE_INL_H_ 26 #define MSHADOW_PACKET_SSE_INL_H_ 28 #include <emmintrin.h> 30 #include "../packet-inl.h" 44 explicit Packet(__m128 data) : data_(data) {}
59 data_ = _mm_set1_ps(s);
64 _mm_store_ps(dst, data_);
68 __m128 ans = _mm_add_ps(data_, _mm_movehl_ps(data_, data_));
69 __m128 rst = _mm_add_ss(ans, _mm_shuffle_ps(ans, ans, 1));
70 #if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) 71 return rst.m128_f32[0];
73 float rr = _mm_cvtss_f32(rst);
89 explicit Packet(__m128d data) : data_(data) {}
103 data_ = _mm_set1_pd(s);
108 _mm_store_pd(dst, data_);
111 inline double Sum(
void)
const {
112 __m128d tmp = _mm_add_sd(data_, _mm_unpackhi_pd(data_, data_));
113 #if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) 114 return tmp.m128d_f64[0];
116 double ans = _mm_cvtsd_f64(tmp);
165 #endif // MSHADOW_PACKET_SSE_INL_H_ vector real type for float
Definition: sse-inl.h:82
static MSHADOW_CINLINE Packet< float, kSSE2 > Fill(float s)
Definition: sse-inl.h:46
MSHADOW_CINLINE Packet< float, kSSE2 > & operator=(float s)
Definition: sse-inl.h:58
MSHADOW_CINLINE float Sum() const
Definition: sse-inl.h:67
static MSHADOW_CINLINE Packet< float, kSSE2 > LoadUnAligned(const float *src)
Definition: sse-inl.h:54
MSHADOW_CINLINE Packet< DType, kPlain > operator-(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:77
double Sum(void) const
Definition: sse-inl.h:111
MSHADOW_CINLINE Packet< DType, kPlain > operator/(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:88
static MSHADOW_CINLINE Packet< double, kSSE2 > Load(const double *src)
Definition: sse-inl.h:95
Packet(__m128 data)
Definition: sse-inl.h:44
__m128d data_
Definition: sse-inl.h:86
int32_t index_t
type that will be used for index
Definition: base.h:343
Packet(__m128d data)
Definition: sse-inl.h:89
MSHADOW_CINLINE Packet< DType, kPlain > operator*(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:82
Definition: packet-inl.h:43
MSHADOW_CINLINE Packet< double, kSSE2 > & operator=(double s)
Definition: sse-inl.h:102
MSHADOW_CINLINE void Store(float *dst) const
Definition: sse-inl.h:63
__m128 data_
The internal data.
Definition: sse-inl.h:40
MSHADOW_CINLINE Packet< DType, kPlain > operator+(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:71
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:233
MSHADOW_CINLINE void Store(double *dst) const
Definition: sse-inl.h:107
static MSHADOW_CINLINE Packet< float, kSSE2 > Load(const float *src)
Definition: sse-inl.h:50
overloaded + operator between half_t and bf16_t
Definition: base.h:334
static MSHADOW_CINLINE Packet< double, kSSE2 > LoadUnAligned(const double *src)
Definition: sse-inl.h:98
Packet(void)
Definition: sse-inl.h:42
static MSHADOW_CINLINE Packet< double, kSSE2 > Fill(double s)
Definition: sse-inl.h:91
Generic packet type.
Definition: packet-inl.h:59
Packet(void)
Definition: sse-inl.h:88