mxnet
|
Classes | |
struct | dnnl_version_t |
Macros | |
#define | DNNL_RUNTIME_NONE 0u |
No runtime (disabled) More... | |
#define | DNNL_RUNTIME_SEQ 1u |
Sequential runtime (CPU only) More... | |
#define | DNNL_RUNTIME_OMP 2u |
OpenMP runtime (CPU only) More... | |
#define | DNNL_RUNTIME_TBB 4u |
TBB runtime (CPU only) More... | |
#define | DNNL_RUNTIME_THREADPOOL 8u |
Threadpool runtime (CPU only) More... | |
#define | DNNL_RUNTIME_OCL 256u |
OpenCL runtime. More... | |
#define | DNNL_JIT_PROFILE_NONE 0u |
Disable profiling completely. More... | |
#define | DNNL_JIT_PROFILE_VTUNE 1u |
Enable VTune Amplifier integration. More... | |
#define | DNNL_JIT_PROFILE_LINUX_PERFMAP 2u |
Enable Linux perf integration via perfmap files. More... | |
#define | DNNL_JIT_PROFILE_LINUX_JITDUMP 4u |
Enable Linux perf integration via jitdump files. More... | |
#define | DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u |
#define | DNNL_JIT_PROFILE_LINUX_PERF (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP) |
Enable Linux perf integration (both jitdump and perfmap) More... | |
Enumerations | |
enum | dnnl_cpu_isa_t { dnnl_cpu_isa_all = 0x0, dnnl_cpu_isa_sse41 = 0x1, dnnl_cpu_isa_avx = 0x3, dnnl_cpu_isa_avx2 = 0x7, dnnl_cpu_isa_avx512_mic = 0xf, dnnl_cpu_isa_avx512_mic_4ops = 0x1f, dnnl_cpu_isa_avx512_core = 0x27, dnnl_cpu_isa_avx512_core_vnni = 0x67, dnnl_cpu_isa_avx512_core_bf16 = 0xe7, dnnl_cpu_isa_avx512_core_amx = 0x3e7 } |
CPU instruction set flags. More... | |
Functions | |
dnnl_status_t DNNL_API | dnnl_set_verbose (int level) |
dnnl_status_t DNNL_API | dnnl_set_jit_dump (int enable) |
const dnnl_version_t DNNL_API * | dnnl_version () |
dnnl_status_t DNNL_API | dnnl_set_jit_profiling_flags (unsigned flags) |
dnnl_status_t DNNL_API | dnnl_set_jit_profiling_jitdumpdir (const char *dir) |
dnnl_status_t DNNL_API | dnnl_set_max_cpu_isa (dnnl_cpu_isa_t isa) |
dnnl_cpu_isa_t DNNL_API | dnnl_get_effective_cpu_isa (void) |
#define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u |
Enable Linux perf integration via jitdump files.
#define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u |
Instruct Linux perf integration via jitdump files to use TSC. DNNL_JIT_PROFILE_LINUX_JITDUMP must be set too for this to take effect.
#define DNNL_JIT_PROFILE_LINUX_PERF (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP) |
Enable Linux perf integration (both jitdump and perfmap)
#define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u |
Enable Linux perf integration via perfmap files.
#define DNNL_JIT_PROFILE_NONE 0u |
Disable profiling completely.
#define DNNL_JIT_PROFILE_VTUNE 1u |
Enable VTune Amplifier integration.
#define DNNL_RUNTIME_NONE 0u |
No runtime (disabled)
#define DNNL_RUNTIME_OCL 256u |
OpenCL runtime.
#define DNNL_RUNTIME_OMP 2u |
OpenMP runtime (CPU only)
#define DNNL_RUNTIME_SEQ 1u |
Sequential runtime (CPU only)
#define DNNL_RUNTIME_TBB 4u |
TBB runtime (CPU only)
#define DNNL_RUNTIME_THREADPOOL 8u |
Threadpool runtime (CPU only)
enum dnnl_cpu_isa_t |
CPU instruction set flags.
dnnl_cpu_isa_t DNNL_API dnnl_get_effective_cpu_isa | ( | void | ) |
Gets the maximal ISA the library can dispatch to on the CPU. See dnnl_cpu_isa_t and #dnnl::cpu_isa for the list of the values returned by the C and C++ API functions respectively.
dnnl_status_t DNNL_API dnnl_set_jit_dump | ( | int | enable | ) |
Configures dumping of JIT-generated code.
enable | Flag value. Set to 0 to disable and set to 1 to enable. |
flag
value is invalid, and dnnl_success/#dnnl::status::success on success. dnnl_status_t DNNL_API dnnl_set_jit_profiling_flags | ( | unsigned | flags | ) |
Sets library profiling flags. The flags define which profilers are supported.
flags | Profiling flags that can contain the following bits:
|
Passing DNNL_JIT_PROFILE_NONE disables profiling completely.
flags
value is invalid, and dnnl_success/#dnnl::status::success on success. dnnl_status_t DNNL_API dnnl_set_jit_profiling_jitdumpdir | ( | const char * | dir | ) |
Sets JIT dump output path. Only applicable to Linux and is only used when profiling flags have DNNL_JIT_PROFILE_LINUX_PERF bit set.
After the first JIT kernel is generated, the jitdump output will be placed into temporary directory created using the mkdtemp template 'dir/.debug/jit/dnnl.XXXXXX'.
dir | JIT dump output path. |
dnnl_status_t DNNL_API dnnl_set_max_cpu_isa | ( | dnnl_cpu_isa_t | isa | ) |
Sets the maximal ISA the library can dispatch to on the CPU. See dnnl_cpu_isa_t and #dnnl::cpu_isa for the list of the values accepted by the C and C++ API functions respectively.
This function has effect only before the first JIT kernel is generated and will return an error afterwards.
This function overrides the DNNL_MAX_CPU_ISA environment variable. The environment variable can be set to the desired maximal ISA name in upper case and with dnnl_cpu_isa prefix removed. For example: DNNL_MAX_CPU_ISA=AVX2
.
isa | Maximal ISA the library should dispatch to. Pass dnnl_cpu_isa_all/#dnnl::cpu_isa::all to remove ISA restrictions (except for ISAs with initial support in the library). |
isa
parameter is invalid or the ISA cannot be changed at this time. dnnl_status_t DNNL_API dnnl_set_verbose | ( | int | level | ) |
Configures verbose output to stdout.
level | Verbosity level:
|
level
value is invalid, and dnnl_success/#dnnl::status::success on success. const dnnl_version_t DNNL_API* dnnl_version | ( | ) |
Returns library version information.