25 #ifndef MSHADOW_PACKET_INL_H_ 26 #define MSHADOW_PACKET_INL_H_ 28 #if defined(__APPLE__) || defined(__FreeBSD__) 48 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2 50 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain 59 template<
typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
62 template<PacketArch Arch>
84 size_t pitch = ((lspace +
mask) >> bits) << bits;
87 void *res = _aligned_malloc(pitch * num_line, 1 << bits);
90 int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
91 CHECK_EQ(ret, 0) <<
"AlignedMallocPitch failed";
94 LOG(FATAL) <<
"AlignedMallocPitch failed";
97 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" 100 #pragma GCC diagnostic pop 116 template<PacketArch Arch>
119 return !(pitch & ((1 << bits) - 1));
123 template<PacketArch Arch>
125 return CheckAlign<Arch>(
reinterpret_cast<size_t>(ptr));
133 template<
typename DType, PacketArch Arch>
137 const index_t fsize =
sizeof(DType);
138 return (((size * fsize + mask) >> bits) << bits) / fsize;
146 template<
typename DType, PacketArch Arch>
149 const index_t fsize =
sizeof(DType);
150 return (((size * fsize) >> bits) << bits) / fsize;
159 template<
typename OP,
typename DType, PacketArch Arch>
161 static const bool kEnabled =
false;
164 template<
typename DType, PacketArch Arch>
166 static const bool kEnabled =
true;
172 template<
typename DType, PacketArch Arch>
174 static const bool kEnabled =
true;
180 template<
typename DType, PacketArch Arch>
182 static const bool kEnabled =
true;
188 template<
typename DType, PacketArch Arch>
190 static const bool kEnabled =
true;
197 template<
typename DType, PacketArch Arch>
199 static const bool kEnabled =
true;
207 template<
typename SV,
typename TFloat, PacketArch Arch>
215 template<
typename TFloat, PacketArch Arch>
216 struct Saver<sv::saveto, TFloat, Arch> {
225 #if MSHADOW_USE_SSE && !defined(__CUDACC__) 235 template<
typename ExpType,
typename DType, PacketArch Arch>
246 template <
typename Device,
int dim,
typename DType, PacketArch Arch>
250 :dptr_(t.dptr_), stride_(t.stride_) {}
255 return dptr_[y * stride_ + x];
263 template<
typename DType, PacketArch Arch>
278 template<
typename OP,
typename TA,
typename TB,
int etype,
typename DType, PacketArch Arch>
282 : lhs_(lhs), rhs_(rhs) {}
287 return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
295 template<
typename OP,
typename TA,
int etype,
typename DType, PacketArch Arch>
303 return OP::Map(src_.Eval(y, x));
310 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
314 template<PacketArch Arch,
typename DType>
318 template<PacketArch Arch,
typename T,
typename DType>
322 template<PacketArch Arch,
typename T,
int dim,
typename DType>
327 template<PacketArch Arch,
typename OP,
typename TA,
typename DType,
int etype>
330 return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.
src_));
332 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
333 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
335 return PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>,
336 DType, Arch>(MakePacketPlan<Arch>(e.
lhs_), MakePacketPlan<Arch>(e.
rhs_));
346 template<
typename E, PacketArch Arch>
348 static const bool kPass =
false;
350 template<PacketArch Arch>
352 static const bool kPass =
true;
354 template<PacketArch Arch>
356 static const bool kPass =
true;
358 template<
typename DType, PacketArch Arch>
362 template<
int dim,
typename DType, PacketArch Arch>
366 template<
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
371 template<
typename OP,
typename TA,
typename TB,
typename DType,
int etype, PacketArch Arch>
379 template<
int dim,
typename E, PacketArch Arch>
381 inline static bool Check(
const E &exp) {
385 template<
int dim,
typename DType, PacketArch Arch>
391 template<
int dim,
typename DType, PacketArch Arch>
394 return packet::CheckAlign<Arch>(t.
dptr_) &&
395 packet::CheckAlign<Arch>(t.
stride_ *
sizeof(DType));
398 template<
int dim,
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
404 template<
int dim,
typename OP,
typename TA,
typename TB,
405 typename DType,
int etype, PacketArch Arch>
416 template<
typename SV,
typename E,
int dim,
typename DType, PacketArch Arch>
420 const index_t xlen = packet::LowerAlign<DType, Arch>(dst.
size(1));
423 #pragma omp parallel for 426 for (
index_t x = 0; x < xlen; x += packetSize) {
430 SV::Save(dst[y][x], plan.
Eval(y, x));
436 #endif // MSHADOW_PACKET_INL_H_ static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:191
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:104
Definition: packet-inl.h:236
DType * dptr_
pointer to the data
Definition: tensor.h:435
const Container & self(void) const
Definition: expression.h:83
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:107
const TB & rhs_
right operand
Definition: expression.h:340
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:286
Definition: packet-inl.h:380
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:302
static bool Check(const E &exp)
Definition: packet-inl.h:381
binary map expression lhs [op] rhs
Definition: expression.h:335
static const index_t value
Definition: packet-inl.h:64
PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch > MakePacketPlan(const UnaryMapExp< OP, TA, DType, etype > &e)
Definition: packet-inl.h:329
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:78
Definition: packet-inl.h:43
base class of all rvalues
Definition: expression.h:149
DType scalar_
scalar value
Definition: expression.h:98
PacketArch
Definition: packet-inl.h:42
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:281
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:200
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name CPU
Definition: tensor.h:40
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:58
definitions of abstract expressions and expressions template
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:254
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:175
int32_t index_t
type that will be used for index
Definition: base.h:336
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:298
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:270
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:183
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:520
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:407
Definition: packet-inl.h:44
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:506
support of sse2 packet optimization of some operations
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:217
generic Packet operator
Definition: packet-inl.h:160
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:299
PacketPlan(DType scalar)
Definition: packet-inl.h:266
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:267
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:249
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:117
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:147
Definition: packet-inl.h:63
const TA & src_
source expression
Definition: expression.h:408
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:226
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:134
unary map expression op(src)
Definition: expression.h:405
scalar expression
Definition: expression.h:96
Definition: packet-inl.h:208
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:50
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:209
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:167
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:283
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:44
const TA & lhs_
left operand
Definition: expression.h:338
overloaded + operator between half_t and bf16_t
Definition: base.h:327
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size() ...
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:442
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:400
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:251
general tensor
Definition: tensor.h:421
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:393
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:417
support of plain packet that use the plain datatype.
Generic packet type.
Definition: packet-inl.h:60
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:387
index_t openmp_index_t
openmp index for linux
Definition: base.h:344
static check packet enable
Definition: packet-inl.h:347