Collaboration diagram for Dnnl_api_attributes:

Classes
struct	dnnl_primitive_attr
	An opaque structure for primitive descriptor attributes. More...

struct	dnnl_post_ops
	An opaque structure for a chain of post operations. More...

Typedefs
typedef struct dnnl_primitive_attr *	dnnl_primitive_attr_t
	A primitive descriptor attributes handle that controls primitive behavior. More...

typedef const struct dnnl_primitive_attr *	const_dnnl_primitive_attr_t
	A constant primitive descriptor attributes handle. More...

typedef struct dnnl_post_ops *	dnnl_post_ops_t
	A post operation chain handle. More...

typedef const struct dnnl_post_ops *	const_dnnl_post_ops_t
	A constant post operation chain handle. More...

Enumerations
enum	dnnl_fpmath_mode_t { dnnl_fpmath_mode_strict, dnnl_fpmath_mode_bf16, dnnl_fpmath_mode_f16, dnnl_fpmath_mode_any }
	Floating-point math mode. More...

enum	dnnl_scratchpad_mode_t { dnnl_scratchpad_mode_library, dnnl_scratchpad_mode_user }
	Scratchpad mode. More...

Functions
dnnl_status_t DNNL_API	dnnl_primitive_attr_create (dnnl_primitive_attr_t *attr)

dnnl_status_t DNNL_API	dnnl_primitive_attr_clone (dnnl_primitive_attr_t *attr, const_dnnl_primitive_attr_t existing_attr)

dnnl_status_t DNNL_API	dnnl_primitive_attr_destroy (dnnl_primitive_attr_t attr)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_fpmath_mode (const_dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t *mode)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_fpmath_mode (dnnl_primitive_attr_t attr, dnnl_fpmath_mode_t mode)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_scratchpad_mode (const_dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t *mode)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_scratchpad_mode (dnnl_primitive_attr_t attr, dnnl_scratchpad_mode_t mode)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_output_scales (const_dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float **scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_output_scales (dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_scales (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const float **scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_scales (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_zero_points (const_dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const int32_t **zero_points)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_zero_points (dnnl_primitive_attr_t attr, int arg, dnnl_dim_t count, int mask, const int32_t *zero_points)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_post_ops (const_dnnl_primitive_attr_t attr, const_dnnl_post_ops_t *post_ops)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_post_ops (dnnl_primitive_attr_t attr, const_dnnl_post_ops_t post_ops)

dnnl_status_t DNNL_API	dnnl_post_ops_create (dnnl_post_ops_t *post_ops)

dnnl_status_t DNNL_API	dnnl_post_ops_destroy (dnnl_post_ops_t post_ops)

int DNNL_API	dnnl_post_ops_len (const_dnnl_post_ops_t post_ops)

dnnl_primitive_kind_t DNNL_API	dnnl_post_ops_get_kind (const_dnnl_post_ops_t post_ops, int index)

dnnl_status_t DNNL_API	dnnl_post_ops_append_sum (dnnl_post_ops_t post_ops, float scale)

dnnl_status_t DNNL_API	dnnl_post_ops_append_sum_v2 (dnnl_post_ops_t post_ops, float scale, dnnl_data_type_t data_type)

dnnl_status_t DNNL_API	dnnl_post_ops_append_sum_v3 (dnnl_post_ops_t post_ops, float scale, int32_t zero_point, dnnl_data_type_t data_type)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_sum (const_dnnl_post_ops_t post_ops, int index, float *scale)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_sum_v2 (const_dnnl_post_ops_t post_ops, int index, float scale, dnnl_data_type_t data_type)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_sum_v3 (const_dnnl_post_ops_t post_ops, int index, float scale, int32_t zero_point, dnnl_data_type_t *data_type)

dnnl_status_t DNNL_API	dnnl_post_ops_append_eltwise (dnnl_post_ops_t post_ops, float scale, dnnl_alg_kind_t alg_kind, float alpha, float beta)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_eltwise (const_dnnl_post_ops_t post_ops, int index, float scale, dnnl_alg_kind_t alg_kind, float alpha, float beta)

dnnl_status_t DNNL_API	dnnl_post_ops_append_dw_k3s1p1 (dnnl_post_ops_t post_ops, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_dw_k3s1p1 (const_dnnl_post_ops_t post_ops, int index, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_post_ops_append_dw_k3s2p1 (dnnl_post_ops_t post_ops, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_dw_k3s2p1 (const_dnnl_post_ops_t post_ops, int index, dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type, dnnl_data_type_t dst_data_type, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_post_ops_append_binary (dnnl_post_ops_t post_ops, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *src1_desc)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_binary (const_dnnl_post_ops_t post_ops, int index, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *src1_desc)

dnnl_status_t DNNL_API	dnnl_post_ops_append_prelu (dnnl_post_ops_t post_ops, int mask)

dnnl_status_t DNNL_API	dnnl_post_ops_get_params_prelu (const_dnnl_post_ops_t post_ops, int index, int *mask)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_rnn_data_qparams (dnnl_primitive_attr_t attr, const float scale, const float shift)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_rnn_data_qparams (const_dnnl_primitive_attr_t attr, float scale, float shift)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_rnn_weights_qparams (dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_rnn_weights_qparams (const_dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float **scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_set_rnn_weights_projection_qparams (dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float *scales)

dnnl_status_t DNNL_API	dnnl_primitive_attr_get_rnn_weights_projection_qparams (const_dnnl_primitive_attr_t attr, dnnl_dim_t count, int mask, const float **scales)

Detailed Description

Typedef Documentation

◆ const_dnnl_post_ops_t

typedef const struct dnnl_post_ops* const_dnnl_post_ops_t

A constant post operation chain handle.

◆ const_dnnl_primitive_attr_t

typedef const struct dnnl_primitive_attr* const_dnnl_primitive_attr_t

A constant primitive descriptor attributes handle.

◆ dnnl_post_ops_t

typedef struct dnnl_post_ops* dnnl_post_ops_t

A post operation chain handle.

◆ dnnl_primitive_attr_t

typedef struct dnnl_primitive_attr* dnnl_primitive_attr_t

A primitive descriptor attributes handle that controls primitive behavior.

Enumeration Type Documentation

◆ dnnl_fpmath_mode_t

enum dnnl_fpmath_mode_t

Floating-point math mode.

Enumerator
dnnl_fpmath_mode_strict	Default behavior, no downconversions allowed.
dnnl_fpmath_mode_bf16	Implicit f32->bf16 conversions allowed.
dnnl_fpmath_mode_f16	Implicit f32->f16 conversions allowed.
dnnl_fpmath_mode_any	Implicit f32->f16 or f32->bf16 conversions allowed.

◆ dnnl_scratchpad_mode_t

enum dnnl_scratchpad_mode_t

Scratchpad mode.

Enumerator

dnnl_scratchpad_mode_library

The library manages the scratchpad allocation according to the policy specified by the DNNL_ENABLE_CONCURRENT_EXEC build option (default).

When DNNL_ENABLE_CONCURRENT_EXEC=OFF (default), the library scratchpad is common to all primitives to reduce the memory footprint. This configuration comes with limited thread-safety properties, namely primitives can be created and executed in parallel but cannot migrate between threads (in other words, each primitive should be executed in the same thread it was created in).

When DNNL_ENABLE_CONCURRENT_EXEC=ON, the library scratchpad is private to each primitive. The memory footprint is larger than when using DNNL_ENABLE_CONCURRENT_EXEC=OFF but different primitives can be created and run concurrently (the same primitive cannot be run concurrently from two different threads though).

dnnl_scratchpad_mode_user

The user manages the scratchpad allocation by querying and providing the scratchpad memory to primitives. This mode is thread-safe as long as the scratchpad buffers are not used concurrently by two primitive executions.

Function Documentation

◆ dnnl_post_ops_append_binary()

dnnl_status_t DNNL_API dnnl_post_ops_append_binary	(	dnnl_post_ops_t	post_ops,
		dnnl_alg_kind_t	alg_kind,
		const dnnl_memory_desc_t *	src1_desc
	)

Appends a binary post-op.

The kind of this post operation is dnnl_binary.

In the simplest case when the binary is the only post operation, the computations would be:

dst[:] <- binary_op (dst[:], another_input[:])

where binary_op is configured with the given parameters. binary_op supports broadcast semantics for a second operand.

Parameters

post_ops	Post-ops.
alg_kind	Binary algorithm for the post-op.
src1_desc	Memory descriptor of a second operand.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_append_dw_k3s1p1()

dnnl_status_t DNNL_API dnnl_post_ops_append_dw_k3s1p1	(	dnnl_post_ops_t	post_ops,
		dnnl_data_type_t	weights_data_type,
		dnnl_data_type_t	bias_data_type,
		dnnl_data_type_t	dst_data_type,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Appends a depthwise post-op convolution with stride 1.

This post-op can only be fused with a 2D 1x1 convolution (convolution with weights spatial dimension equal to 1 i.e., kh=kw=1).

The kind of this post-op is dnnl_convolution.

The number of outputs for primitive remain same as before. The output size remain same as the original primitive due to stride=1.

The Post-op can be defined as:

 dst[:] <- scales * (conv_dw(conv_1x1))

See dev_guide_attributes_post_ops_depthwise and dev_guide_attributes_post_ops_depthwise_fusion for more info.

Parameters

post_ops	Post-ops.
weights_data_type	Weights data type of depthwise post-op
bias_data_type	Bias data type of depthwise post-op
dst_data_type	Output data type of depthwise post-op
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor.
scales	Output pointer to a constant array of float scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise

◆ dnnl_post_ops_append_dw_k3s2p1()

dnnl_status_t DNNL_API dnnl_post_ops_append_dw_k3s2p1	(	dnnl_post_ops_t	post_ops,
		dnnl_data_type_t	weights_data_type,
		dnnl_data_type_t	bias_data_type,
		dnnl_data_type_t	dst_data_type,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Appends a depthwise post-op convolution with stride 2.

This post-op can only be fused with a 2D 1x1 convolution (convolution with weights spatial dimension equal to 1 i.e., kh=kw=1).

The kind of this post-op is dnnl_convolution.

The number of outputs for primitive remain same as before. The output spatial size can be derived as below:

output_height = ceil(output_height_1x1_convolution, stride) output_width = ceil(output_width_1x1_convolution, stride)

The Post-op can be defined as:

 dst[:] <- scales * (conv_dw(conv_1x1))

See dev_guide_attributes_post_ops_depthwise and dev_guide_attributes_post_ops_depthwise_fusion for more info.

Parameters

post_ops	Post-ops.
weights_data_type	Weights data type of depthwise post-op
bias_data_type	Bias data type of depthwise post-op
dst_data_type	Output data type of depthwise post-op
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor.
scales	Output pointer to a constant array of float scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise

◆ dnnl_post_ops_append_eltwise()

dnnl_status_t DNNL_API dnnl_post_ops_append_eltwise	(	dnnl_post_ops_t	post_ops,
		float	scale,
		dnnl_alg_kind_t	alg_kind,
		float	alpha,
		float	beta
	)

Appends an elementwise post-op.

The kind of this post operation is dnnl_eltwise.

In the simplest case when the elementwise is the only post operation, the computations would be:

dst[:] <- scale * eltwise_op (op(...)) // instead of dst[:] <- op(...)

where eltwise_op is configured with the given parameters.

Parameters

post_ops	Post-ops.
scale	Scaling factor.
alg_kind	Elementwise algorithm for the post-op.
alpha	Alpha parameter for the elementwise algorithm.
beta	Beta parameter for the elementwise algorithm.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_append_prelu()

dnnl_status_t DNNL_API dnnl_post_ops_append_prelu	(	dnnl_post_ops_t	post_ops,
		int	mask
	)

Appends a prelu forward post-op.

The kind of this post-op is #dnnl::primitive::kind::prelu.

The post-op can be defined as:

 dst[:] <- prelu(dst[:], weights[:])
 prelu:
 dst[:] <- dst[:] if dst[:] > 0
 dst[:] <- dst[:] * weights[:] if dst[:] <= 0

Note

The order of dimensions does not depend on how elements are laid out in memory. For example:

for a 2D CNN activations tensor the order is always (n, c)
for a 4D CNN activations tensor the order is always (n, c, h, w)
for a 5D CNN weights tensor the order is always (g, oc, ic, kh, kw)

Prelu weights tensor is passed in runtime execution phase. Prelu weights tensor data type is implicitly assumed as f32 using plain layout (a, ab, acb, acdb, acdeb)

Parameters

mask	Defines the correspondence between the output tensor dimensions and the prelu weights tensor. The set i-th bit indicates that a dedicated weights value is used for each index along that dimension. Set the mask to 0 to use a common weights value for the whole output tensor.

◆ dnnl_post_ops_append_sum()

dnnl_status_t DNNL_API dnnl_post_ops_append_sum	(	dnnl_post_ops_t	post_ops,
		float	scale
	)

Appends an accumulation (sum) to post-ops. Prior to accumulating the result, the previous value is multiplied by a scale.

The kind of this post-op is dnnl_sum.

This feature may improve performance for cases like residual learning blocks, where the result of convolution is accumulated to the previously computed activations. The parameter scale may be used for the integer-based computations when the result and previous activations have different logical scaling factors.

In the simplest case where the accumulation is the only post-op, the computations will be:

dst[:] <- scale * dst[:] + op(...) // instead of dst[:] <- op(...)

Note: This post-op executes in-place and does not change the destination layout.

Parameters

post_ops	Post-ops.
scale	Accumulation scaling factor.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_append_sum_v2()

dnnl_status_t DNNL_API dnnl_post_ops_append_sum_v2	(	dnnl_post_ops_t	post_ops,
		float	scale,
		dnnl_data_type_t	data_type
	)

Appends an accumulation v2 (sum) to post-ops. Prior to accumulating the result, the previous value is multiplied by a scale.

The kind of this post-op is dnnl_sum.

This feature may improve performance for cases like residual learning blocks, where the result of convolution is accumulated to the previously computed activations. The parameter scale may be used for the integer-based computations when the result and previous activations have different logical scaling factors.

In the simplest case where the accumulation is the only post-op, the computations will be:

dst[:] <- scale * dst[:] + op(...) // instead of dst[:] <- op(...)

If data_type is specified, original dst tensor will be reinterpreted as a tensor with provided data type. Since it is reinterpretation, data_type and dst data type should have the same size. As a result, computations will be:

dst[:] <- scale * as_data_type(dst[:]) + op(...)
                                   // instead of dst[:] <- op(...)

Note: This post-op executes in-place and does not change the destination layout.

Parameters

post_ops	Post-ops.
scale	Accumulation scaling factor.
data_type	Accumulation data_type.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_append_sum_v3()

dnnl_status_t DNNL_API dnnl_post_ops_append_sum_v3	(	dnnl_post_ops_t	post_ops,
		float	scale,
		int32_t	zero_point,
		dnnl_data_type_t	data_type
	)

Appends an accumulation v3 (sum) to post-ops. Prior to accumulating the result, a zero point is subtracted from the previous value and is multiplied by the scale.

The kind of this post-op is dnnl_sum.

This feature may improve performance for cases like dequantize the asymmetrically quantized sum's src1 tensor to f32 domain before performing the sum operation by subtracting the zero_point before the scaling.

In the simplest case where accumulation is the only post-op, the computations will be:

dst[:] <- scale * (dst[:] - zero_point) + op(...)
                                        // instead of dst[:] <- op(...)

If data_type is specified, original dst tensor will be reinterpreted as a tensor with provided data type. Since it is reinterpretation, data_type and dst data type should have the same size. As a result, computations will be:

dst[:] <- scale * (as_data_type(dst[:]) - zero_point) + op(...)
                                   // instead of dst[:] <- op(...)

Note: This post-op executes in-place and does not change the destination layout.

Parameters

post_ops	Post-ops.
scale	Accumulation scaling factor.
zero_point	Single scalar int32_t value of zero point.
data_type	Accumulation data_type.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_create()

dnnl_status_t DNNL_API dnnl_post_ops_create ( dnnl_post_ops_t * post_ops )

Creates empty post-ops sequence.

Parameters

post_ops Output post-ops.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_destroy()

dnnl_status_t DNNL_API dnnl_post_ops_destroy ( dnnl_post_ops_t post_ops )

Destroys post-ops.

Parameters

post_ops Post-ops to destroy.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_get_kind()

dnnl_primitive_kind_t DNNL_API dnnl_post_ops_get_kind	(	const_dnnl_post_ops_t	post_ops,
		int	index
	)

Returns the kind of a post-op entry.

Parameters

post_ops	Post-ops.
index	Post-op entry index.

Returns: The kind of the post-op with the specified index.; dnnl_undefined_primitive if there is no post-op at the specified index.

◆ dnnl_post_ops_get_params_binary()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_binary	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		dnnl_alg_kind_t *	alg_kind,
		const dnnl_memory_desc_t **	src1_desc
	)

Returns the parameters of a binary post-op.

Parameters

post_ops	Post-ops.
index	Index of the binary post-op.
alg_kind	Output binary algorithm kind.
src1_desc	Output memory descriptor of a second operand.

Returns: dnnl_success on success and a status describing the error otherwise.; dnnl_invalid_arguments if index does not refer to a binary post-op.

◆ dnnl_post_ops_get_params_dw_k3s1p1()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_dw_k3s1p1	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		dnnl_data_type_t *	weights_data_type,
		dnnl_data_type_t *	bias_data_type,
		dnnl_data_type_t *	dst_data_type,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns the parameters of an depthwise post-op with stride 1.

Parameters

post_ops	Post-ops.
index	Index of the elementwise post-op.
weights_data_type	Weights data type of depthwise post-op
bias_data_type	Bias data type of depthwise post-op
dst_data_type	Output data type of depthwise post-op
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor.
scales	Output pointer to a constant array of float scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise

◆ dnnl_post_ops_get_params_dw_k3s2p1()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_dw_k3s2p1	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		dnnl_data_type_t *	weights_data_type,
		dnnl_data_type_t *	bias_data_type,
		dnnl_data_type_t *	dst_data_type,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns the parameters of an depthwise post-op with stride 2.

Parameters

post_ops	Post-ops.
index	Index of the elementwise post-op.
weights_data_type	Weights data type of depthwise post-op
bias_data_type	Bias data type of depthwise post-op
dst_data_type	Output data type of depthwise post-op
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor.
scales	Output pointer to a constant array of float scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise

◆ dnnl_post_ops_get_params_eltwise()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_eltwise	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		float *	scale,
		dnnl_alg_kind_t *	alg_kind,
		float *	alpha,
		float *	beta
	)

Returns the parameters of an elementwise post-op.

Parameters

post_ops	Post-ops.
index	Index of the elementwise post-op.
scale	Output scaling factor.
alg_kind	Output elementwise algorithm kind.
alpha	Output alpha parameter for the elementwise algorithm.
beta	Output beta parameter for the elementwise algorithm.

Returns: dnnl_success on success and a status describing the error otherwise.; dnnl_invalid_arguments if index does not refer to an elementwise post-op.

◆ dnnl_post_ops_get_params_prelu()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_prelu	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		int *	mask
	)

Returns the parameters of a prelu post-op.

Parameters

post_ops	Post-ops.
index	Index of the preu post-op.
mask	Mask of the prelu post-op.

◆ dnnl_post_ops_get_params_sum()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_sum	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		float *	scale
	)

Returns the parameters of an accumulation (sum) post-op.

Parameters

post_ops	Post-ops.
index	Index of the sum post-op.
scale	Output accumulation scaling factor.

Returns: dnnl_success on success and a status describing the error otherwise.; dnnl_invalid_arguments if index does not refer to a sum post-op.

◆ dnnl_post_ops_get_params_sum_v2()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_sum_v2	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		float *	scale,
		dnnl_data_type_t *	data_type
	)

Returns the parameters of an accumulation (sum) post-op with a data type parameter.

Parameters

post_ops	Post-ops.
index	Index of the sum post-op.
scale	Output accumulation scaling factor.
data_type	Data type for accumulation.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_get_params_sum_v3()

dnnl_status_t DNNL_API dnnl_post_ops_get_params_sum_v3	(	const_dnnl_post_ops_t	post_ops,
		int	index,
		float *	scale,
		int32_t *	zero_point,
		dnnl_data_type_t *	data_type
	)

Returns the parameters of an accumulation (sum) post-op with zero point and data type parameter.

Parameters

post_ops	Post-ops.
index	Index of the sum post-op.
scale	Output accumulation scaling factor.
zero_point	Zero point.
data_type	Data type for accumulation.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_post_ops_len()

int DNNL_API dnnl_post_ops_len ( const_dnnl_post_ops_t post_ops )

Returns the length of post-ops.

Parameters

post_ops Post-ops.

Returns: The number of post-ops entries.

◆ dnnl_primitive_attr_clone()

dnnl_status_t DNNL_API dnnl_primitive_attr_clone	(	dnnl_primitive_attr_t *	attr,
		const_dnnl_primitive_attr_t	existing_attr
	)

Clones primitive attributes.

Parameters

attr	Output primitive attributes.
existing_attr	Primitive attributes to clone.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_create()

dnnl_status_t DNNL_API dnnl_primitive_attr_create ( dnnl_primitive_attr_t * attr )

Creates an empty (default) primitive attributes with all the parameters set to their default values.

Empty attributes are implied whenever the respective argument is NULL.

Parameters

attr	Output primitive attributes.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_destroy()

dnnl_status_t DNNL_API dnnl_primitive_attr_destroy ( dnnl_primitive_attr_t attr )

Destroys primitive attributes.

Parameters

attr	Primitive attributes to destroy.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_fpmath_mode()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_fpmath_mode	(	const_dnnl_primitive_attr_t	attr,
		dnnl_fpmath_mode_t *	mode
	)

Returns the floating-point math mode primitive attribute.

Parameters

attr	Primitive attributes.
mode	Output FP math mode.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_output_scales()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_output_scales	(	const_dnnl_primitive_attr_t	attr,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns primitive attributes output scaling factors correspondence mask and values.

Warning: The scales array is an internal part of the primitive attributes attr, so it is an error to modify or destroy the scales array.; The lifetime of scales array is the same as that of the primitive attributes attr to which it belongs, so it is an error to use scales after attr is destroyed.

Parameters

attr	Primitive attributes.
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` vector. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common output scaling factor for the whole output tensor.
scales	Output pointer to a constant array of scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_post_ops()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_post_ops	(	const_dnnl_primitive_attr_t	attr,
		const_dnnl_post_ops_t *	post_ops
	)

Returns primitive attributes post-ops.

Warning: The output post_ops points to the internal attr field, so it is an error to modify or destroy them. The lifetime of post_ops is the same as that of the attr it belongs to, so it is an error to use post_ops after attr has been destroyed.

Parameters

attr	Primitive attributes.
post_ops	Output post-ops.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_rnn_data_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_data_qparams	(	const_dnnl_primitive_attr_t	attr,
		float *	scale,
		float *	shift
	)

Returns the quantization scale and shift parameters for RNN data tensors.

Note: Quantization scale and shift are common for src_layer, src_iter, dst_iter, and dst_layer.

Parameters

attr	Primitive attributes.
scale	The value to scale the data by.
shift	The value to shift the data by.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_rnn_weights_projection_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_weights_projection_qparams	(	const_dnnl_primitive_attr_t	attr,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns the quantization scaling factors for RNN projection weights tensors.

Parameters

attr	Primitive attributes.
count	Number of elements in the `scales` array.
mask	Scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` vector. The set i-th bit indicates that a dedicated scaling factor should be used for each index along that dimension. Set the mask to 0 to use a common scaling factor for the whole output tensor.
scales	Array of output scaling factors that contain `count` values and the following equality must hold: $count = \prod\limits_{d \in mask} weights.dims[d].$

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_rnn_weights_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_rnn_weights_qparams	(	const_dnnl_primitive_attr_t	attr,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns the quantization scaling factors for RNN weights tensors.

Parameters

attr	Primitive attributes.
count	Number of elements in the `scales` array.
mask	Scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` vector. The set i-th bit indicates that a dedicated scaling factor should be used for each index along that dimension. Set the mask to 0 to use a common scaling factor for the whole output tensor.
scales	Array of output scaling factors that contain `count` values and the following equality must hold: $count = \prod\limits_{d \in mask} weights.dims[d].$

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_scales()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_scales	(	dnnl_primitive_attr_t	attr,
		int	arg,
		dnnl_dim_t *	count,
		int *	mask,
		const float **	scales
	)

Returns primitive attributes scaling factors correspondence mask and values for a given memory argument.

Warning: The output scales array is an internal part of the primitive attributes attr, so it is an error to modify or destroy the scales array.; The lifetime of the scales array is the same as that of the primitive attributes attr to which it belongs, so it is an error to use scales after attr is destroyed.

Parameters

attr	Primitive attributes.
arg	Parameter argument index as passed to the dnnl_primitive_execute() call.
count	Output length of the array of scaling factors `scales`.
mask	Output scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common scaling factor for the whole output tensor.
scales	Output pointer to a constant array of float scaling factors.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_scratchpad_mode()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_scratchpad_mode	(	const_dnnl_primitive_attr_t	attr,
		dnnl_scratchpad_mode_t *	mode
	)

Returns the primitive attributes scratchpad mode.

Parameters

attr	Primitive attributes.
mode	Output scratchpad mode.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_get_zero_points()

dnnl_status_t DNNL_API dnnl_primitive_attr_get_zero_points	(	const_dnnl_primitive_attr_t	attr,
		int	arg,
		dnnl_dim_t *	count,
		int *	mask,
		const int32_t **	zero_points
	)

Returns count, correspondence zero point mask, and a pointer to a constant int32_t array of zero_points for given attr and memory argument (index), previously set by dnnl_primitive_attr_set_zero_points.

Warning: The output zero_points array is an internal part of the primitive attributes attr, so it is an error to modify or destroy the zero_points array.; The lifetime of zero_points array is the same as that of the primitive attributes attr to which it belongs, so it is an error to use zero_points after attr is destroyed.

Parameters

attr	Primitive attributes.
arg	Parameter argument index as passed to the dnnl_primitive_execute() call.
count	Output length of the array of zero points `zero_points`.
mask	Output zero points correspondence mask that defines the correspondence between the output tensor dimensions and the `zero_points` array. The set i-th bit indicates that a dedicated output zero point is used for each index along that dimension. The mask value of 0 implies a common zero point for the whole output tensor.
zero_points	Output pointer to a constant array of int32_t zero points.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_fpmath_mode()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_fpmath_mode	(	dnnl_primitive_attr_t	attr,
		dnnl_fpmath_mode_t	mode
	)

Sets the floating-point math mode primitive attributes.

Parameters

attr	Primitive attributes.
mode	FP math mode. The possible values are: dnnl_fpmath_mode_strict (default), dnnl_fpmath_mode_bf16, dnnl_fpmath_mode_f16, dnnl_fpmath_mode_any.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_output_scales()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_output_scales	(	dnnl_primitive_attr_t	attr,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Sets output scaling factors correspondence mask and values.

Note

The order of dimensions does not depend on how elements are laid out in memory. For example:

for a 2D CNN activations tensor the order is always (n, c)
for a 4D CNN activations tensor the order is always (n, c, h, w)
for a 5D CNN weights tensor the order is always (g, oc, ic, kh, kw)

Example usage:

int mb = 32, oc = 32, oh = 14, ow = 14; // convolution output params
float scales[oc] = { ... }; // unique output scales per output channel
int oc_dim = 1; // mb_dim = 0, channel_dim = 1, height_dim = 2, ...
dnnl_convolution_desc_t conv_d; // create a convolution descriptor
dnnl_primitive_attr_t attr;
dnnl_primitive_attr_create(&attr); // create primitive attributes
dnnl_primitive_attr_set_output_scales(attr, oc, 1 << oc_dim, scales);
dnnl_primitive_desc_t conv_pd;
dnnl_primitive_desc_create(&conv_pd, &conv_d, attr, engine, NULL);

Parameters

attr	Primitive attributes.
count	Length of the array of scaling factors `scales`.
mask	Scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated output scaling factor is used for each index along that dimension. The mask value of 0 implies a common output scaling factor for the whole output tensor.
scales	Array of output scaling factors. If the output scaling factors are known at the time of this call, this array must contain `count` values and the following equality must hold: $count = \prod\limits_{d \in mask} output.dims[d].$ Violations can only be detected when the attributes are used to create a primitive descriptor. If the output scaling factors are not known at the time of the call, this array must contain a single DNNL_RUNTIME_F32_VAL value and the output scaling factors must be passed at execution time as an argument with index DNNL_ARG_ATTR_OUTPUT_SCALES.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_post_ops()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_post_ops	(	dnnl_primitive_attr_t	attr,
		const_dnnl_post_ops_t	post_ops
	)

Sets primitive attributes post-ops.

Note: There is no way to check whether the post-ops would be supported by the target primitive. Any error will be reported by the dnnl_primitive_desc_create() function call.

Parameters

attr	Primitive attributes.
post_ops	Post-ops to set.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_rnn_data_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_data_qparams	(	dnnl_primitive_attr_t	attr,
		const float	scale,
		const float	shift
	)

Set quantization scale and shift parameters for RNN data tensors.

For performance reasons, the low-precision configuration of the RNN primitives expects input activations to have the unsigned 8-bit integer data type. The scale and shift parameters are used to quantize floating-point data to unsigned integer and must be passed to the RNN primitive using attributes.

The quantization formula is scale * data + shift.

Note: Quantization scale and shift are common for src_layer, src_iter, dst_iter, and dst_layer.

Example usage:

// RNN parameters
int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
// Activations quantization parameters
float scale = 63.f, shift = 64.f;
dnnl_primitive_attr_t rnn_attr;
// Create default attributes
dnnl_primitive_attr_create(&rnn_attr);
// Set scale and shift for int8 quantization of activation
dnnl_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
// Create and configure rnn op_desc
dnnl_rnn_desc_t rnn_d;
dnnl_primitive_desc_t rnn_pd;
dnnl_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);

Parameters

attr	Primitive attributes.
scale	The value to scale the data by.
shift	The value to shift the data by.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_rnn_weights_projection_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_weights_projection_qparams	(	dnnl_primitive_attr_t	attr,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Sets quantization scaling factors for RNN projection weights tensors. The low-precision configuration of the RNN primitives expects input weights to use the signed 8-bit integer data type. The scaling factors are used to quantize floating-point data to signed integer and must be passed to RNN primitives using attributes.

Note: The dimension order is always native and does not depend on the actual layout used. For example, five-dimensional weights always have (l, d, i, g, o) logical dimension ordering.

Parameters

attr	Primitive attributes.
count	Number of elements in the `scales` array.
mask	Scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` vector. The set i-th bit indicates that a dedicated scaling factor should be used for each index along that dimension. Set the mask to 0 to use a common scaling factor for the whole output tensor.
scales	Array of output scaling factors that must contain `count` values and the following equality must hold: $count = \prod\limits_{d \in mask} weights.dims[d].$ Violations can only be detected when the attributes are used to create a primitive descriptor.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_rnn_weights_qparams()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_rnn_weights_qparams	(	dnnl_primitive_attr_t	attr,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Sets quantization scaling factors for RNN weights tensors. The low-precision configuration of the RNN primitives expects input weights to use the signed 8-bit integer data type. The scaling factors are used to quantize floating-point data to signed integer and must be passed to RNN primitives using attributes.

Note: The dimension order is always native and does not depend on the actual layout used. For example, five-dimensional weights always have (l, d, i, g, o) logical dimension ordering.; Quantization scales are common for weights_layer and weights_iteration

Parameters

attr	Primitive attributes.
count	Number of elements in the `scales` array.
mask	Scaling factors correspondence mask that defines the correspondence between the output tensor dimensions and the `scales` vector. The set i-th bit indicates that a dedicated scaling factor should be used for each index along that dimension. Set the mask to 0 to use a common scaling factor for the whole output tensor.
scales	Array of output scaling factors that must contain `count` values and the following equality must hold: $count = \prod\limits_{d \in mask} weights.dims[d].$ Violations can only be detected when the attributes are used to create a primitive descriptor.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_scales()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_scales	(	dnnl_primitive_attr_t	attr,
		int	arg,
		dnnl_dim_t	count,
		int	mask,
		const float *	scales
	)

Sets primitive attributes scaling factors for primitive operations for a given memory argument.

See also: dnnl_primitive_attr_set_output_scales

Parameters

attr	Primitive attributes.
arg	Parameter argument index as passed to the dnnl_primitive_execute() call.
count	Length of the array of scaling factors `scales`.
mask	Scaling factors correspondence mask that defines the correspondence between the tensor dimensions and the `scales` array. The set i-th bit indicates that a dedicated scaling factor is used for each index along that dimension. Set the mask to 0 to use a common scaling factor for the whole output tensor.
scales	Constant array of float scaling factors. This array must contain `count` scales and the following equality must hold: $count = \prod\limits_{d \in mask} output.dims[d].$

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_scratchpad_mode()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_scratchpad_mode	(	dnnl_primitive_attr_t	attr,
		dnnl_scratchpad_mode_t	mode
	)

Sets primitive attributes scratchpad mode.

Parameters

attr	Primitive attributes.
mode	Scratchpad mode. The possible values are: dnnl_scratchpad_mode_library (default) and dnnl_scratchpad_mode_user.

Returns: dnnl_success on success and a status describing the error otherwise.

◆ dnnl_primitive_attr_set_zero_points()

dnnl_status_t DNNL_API dnnl_primitive_attr_set_zero_points	(	dnnl_primitive_attr_t	attr,
		int	arg,
		dnnl_dim_t	count,
		int	mask,
		const int32_t *	zero_points
	)

Sets primitive attributes zero points for primitive operations for a given memory argument.

See also: dnnl_primitive_attr_set_output_scales

Parameters

attr	Primitive attributes.
arg	Parameter argument index as passed to the dnnl_primitive_execute() call.
count	Length of the array of zero points `zero_points`.
mask	Zero point correspondence mask that defines the correspondence between the tensor dimensions and the `zero_points` array. The set i-th bit indicates that a dedicated zero point is used for each index along that dimension. Set the mask to 0 to use a common zero point for the whole output tensor.
zero_points	Constant array of int32_t zero points. If the zero points are known at the time of this call, this array must contain `count` zero points and the following equality must hold: $count = \prod\limits_{d \in mask} output.dims[d].$ If the zero points are not known at the time of the call, this array must contain a single DNNL_RUNTIME_S32_VAL and the zero points must be passed at execution time as an argument with index DNNL_ARG_ATTR_ZERO_POINTS.

Returns: dnnl_success on success and a status describing the error otherwise.

Classes

Typedefs

Enumerations

Functions

Detailed Description

Typedef Documentation

◆ const_dnnl_post_ops_t

◆ const_dnnl_primitive_attr_t

◆ dnnl_post_ops_t

◆ dnnl_primitive_attr_t

Enumeration Type Documentation

◆ dnnl_fpmath_mode_t

◆ dnnl_scratchpad_mode_t

Function Documentation

◆ dnnl_post_ops_append_binary()

◆ dnnl_post_ops_append_dw_k3s1p1()

◆ dnnl_post_ops_append_dw_k3s2p1()

◆ dnnl_post_ops_append_eltwise()

◆ dnnl_post_ops_append_prelu()

◆ dnnl_post_ops_append_sum()

◆ dnnl_post_ops_append_sum_v2()

◆ dnnl_post_ops_append_sum_v3()

◆ dnnl_post_ops_create()

◆ dnnl_post_ops_destroy()

◆ dnnl_post_ops_get_kind()

◆ dnnl_post_ops_get_params_binary()

◆ dnnl_post_ops_get_params_dw_k3s1p1()

◆ dnnl_post_ops_get_params_dw_k3s2p1()

◆ dnnl_post_ops_get_params_eltwise()

◆ dnnl_post_ops_get_params_prelu()

◆ dnnl_post_ops_get_params_sum()

◆ dnnl_post_ops_get_params_sum_v2()

◆ dnnl_post_ops_get_params_sum_v3()

◆ dnnl_post_ops_len()

◆ dnnl_primitive_attr_clone()

◆ dnnl_primitive_attr_create()

◆ dnnl_primitive_attr_destroy()

◆ dnnl_primitive_attr_get_fpmath_mode()

◆ dnnl_primitive_attr_get_output_scales()

◆ dnnl_primitive_attr_get_post_ops()

◆ dnnl_primitive_attr_get_rnn_data_qparams()

◆ dnnl_primitive_attr_get_rnn_weights_projection_qparams()

◆ dnnl_primitive_attr_get_rnn_weights_qparams()

◆ dnnl_primitive_attr_get_scales()

◆ dnnl_primitive_attr_get_scratchpad_mode()

◆ dnnl_primitive_attr_get_zero_points()

◆ dnnl_primitive_attr_set_fpmath_mode()

◆ dnnl_primitive_attr_set_output_scales()

◆ dnnl_primitive_attr_set_post_ops()

◆ dnnl_primitive_attr_set_rnn_data_qparams()

◆ dnnl_primitive_attr_set_rnn_weights_projection_qparams()

◆ dnnl_primitive_attr_set_rnn_weights_qparams()

◆ dnnl_primitive_attr_set_scales()

◆ dnnl_primitive_attr_set_scratchpad_mode()

◆ dnnl_primitive_attr_set_zero_points()