oneAPI Deep Neural Network Library (oneDNN)
Performance library for Deep Learning
1.8.0
Reorder between CPU and GPU engines

This C API example demonstrates programming flow when reordering memory between CPU and GPU engines.

/*******************************************************************************
* Copyright 2019-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "dnnl.h"
#include "example_utils.h"
size_t product(int n_dims, const dnnl_dim_t dims[]) {
size_t n_elems = 1;
for (int d = 0; d < n_dims; ++d) {
n_elems *= (size_t)dims[d];
}
return n_elems;
}
void fill(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
const size_t n_elems = product(n_dims, dims);
float *array = (float *)malloc(n_elems * sizeof(float));
for (size_t e = 0; e < n_elems; ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
write_to_dnnl_memory(array, mem);
free(array);
}
int find_negative(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
const size_t n_elems = product(n_dims, dims);
float *array = (float *)malloc(n_elems * sizeof(float));
read_from_dnnl_memory(array, mem);
int negs = 0;
for (size_t e = 0; e < n_elems; ++e) {
negs += array[e] < 0.0f;
}
free(array);
return negs;
}
void cross_engine_reorder() {
dnnl_engine_t engine_cpu, engine_gpu;
CHECK(dnnl_engine_create(&engine_cpu, dnnl_cpu, 0));
CHECK(dnnl_engine_create(&engine_gpu, dnnl_gpu, 0));
dnnl_dim_t tz[4] = {2, 16, 1, 1};
dnnl_memory_desc_t m_cpu_md, m_gpu_md;
CHECK(dnnl_memory_desc_init_by_tag(&m_cpu_md, 4, tz, dnnl_f32, dnnl_nchw));
CHECK(dnnl_memory_desc_init_by_tag(&m_gpu_md, 4, tz, dnnl_f32, dnnl_nchw));
dnnl_memory_t m_cpu, m_gpu;
&m_cpu, &m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
&m_gpu, &m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
fill(m_cpu, 4, tz);
if (find_negative(m_cpu, 4, tz) == 0)
COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
"%s", "incorrect data fill, no negative values found");
/* reorder cpu -> gpu */
&r1_pd, &m_cpu_md, engine_cpu, &m_gpu_md, engine_gpu, NULL));
CHECK(dnnl_primitive_create(&r1, r1_pd));
/* relu gpu */
&relu_d, dnnl_forward, dnnl_eltwise_relu, &m_gpu_md, 0.0f, 0.0f));
&relu_pd, &relu_d, NULL, engine_gpu, NULL));
CHECK(dnnl_primitive_create(&relu, relu_pd));
/* reorder gpu -> cpu */
&r2_pd, &m_gpu_md, engine_gpu, &m_cpu_md, engine_cpu, NULL));
CHECK(dnnl_primitive_create(&r2, r2_pd));
dnnl_stream_t stream_gpu;
&stream_gpu, engine_gpu, dnnl_stream_default_flags));
dnnl_exec_arg_t r1_args[] = {{DNNL_ARG_FROM, m_cpu}, {DNNL_ARG_TO, m_gpu}};
CHECK(dnnl_primitive_execute(r1, stream_gpu, 2, r1_args));
dnnl_exec_arg_t relu_args[]
= {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}};
CHECK(dnnl_primitive_execute(relu, stream_gpu, 2, relu_args));
dnnl_exec_arg_t r2_args[] = {{DNNL_ARG_FROM, m_gpu}, {DNNL_ARG_TO, m_cpu}};
CHECK(dnnl_primitive_execute(r2, stream_gpu, 2, r2_args));
CHECK(dnnl_stream_wait(stream_gpu));
if (find_negative(m_cpu, 4, tz) != 0)
COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
"%s", "found negative values after ReLU applied");
/* clean up */
dnnl_stream_destroy(stream_gpu);
dnnl_engine_destroy(engine_cpu);
dnnl_engine_destroy(engine_gpu);
}
int main() {
cross_engine_reorder();
printf("Example passed on CPU/GPU.\n");
return 0;
}
dnnl_memory
An opaque structure to describe a memory.
DNNL_ARG_TO
#define DNNL_ARG_TO
A special mnemonic for reorder destination argument.
Definition: dnnl_types.h:2142
dnnl_engine
An opaque structure to describe an engine.
dnnl_eltwise_relu
@ dnnl_eltwise_relu
Eltwise: ReLU.
Definition: dnnl_types.h:952
dnnl_exec_arg_t
A structure that contains an index and a memory object, and is used to pass arguments to dnnl_primiti...
Definition: dnnl_types.h:2307
dnnl_gpu
@ dnnl_gpu
GPU engine.
Definition: dnnl_types.h:1985
DNNL_ARG_DST
#define DNNL_ARG_DST
A special mnemonic for destination argument for primitives that have a single destination.
Definition: dnnl_types.h:2139
dnnl_memory_destroy
dnnl_status_t DNNL_API dnnl_memory_destroy(dnnl_memory_t memory)
Destroys a memory object.
dnnl_primitive_execute
dnnl_status_t DNNL_API dnnl_primitive_execute(const_dnnl_primitive_t primitive, dnnl_stream_t stream, int nargs, const dnnl_exec_arg_t *args)
Executes a primitive.
dnnl_stream_destroy
dnnl_status_t DNNL_API dnnl_stream_destroy(dnnl_stream_t stream)
Destroys an execution stream.
dnnl_stream_create
dnnl_status_t DNNL_API dnnl_stream_create(dnnl_stream_t *stream, dnnl_engine_t engine, unsigned flags)
Creates an execution stream.
dnnl_stream_wait
dnnl_status_t DNNL_API dnnl_stream_wait(dnnl_stream_t stream)
Waits for all primitives in the execution stream to finish computations.
dnnl_primitive_desc
An opaque structure to describe a primitive descriptor.
DNNL_MEMORY_ALLOCATE
#define DNNL_MEMORY_ALLOCATE
Special pointer value that indicates that the library needs to allocate an underlying buffer for a me...
Definition: dnnl_types.h:1342
dnnl_forward
@ dnnl_forward
Forward data propagation (alias for dnnl_forward_training).
Definition: dnnl_types.h:872
dnnl_f32
@ dnnl_f32
32-bit/single-precision floating point.
Definition: dnnl_types.h:70
dnnl_primitive
An opaque structure to describe a primitive.
dnnl_reorder_primitive_desc_create
dnnl_status_t DNNL_API dnnl_reorder_primitive_desc_create(dnnl_primitive_desc_t *reorder_primitive_desc, const dnnl_memory_desc_t *src_desc, dnnl_engine_t src_engine, const dnnl_memory_desc_t *dst_desc, dnnl_engine_t dst_engine, const_dnnl_primitive_attr_t attr)
Creates a primitive descriptor for a reorder primitive.
dnnl_dim_t
int64_t dnnl_dim_t
A type to describe tensor dimension.
Definition: dnnl_types.h:1165
DNNL_ARG_SRC
#define DNNL_ARG_SRC
A special mnemonic for source argument for primitives that have a single source.
Definition: dnnl_types.h:2115
dnnl_stream_default_flags
@ dnnl_stream_default_flags
Default stream configuration.
Definition: dnnl_types.h:2427
dnnl_eltwise_desc_t
A descriptor of a element-wise operation.
Definition: dnnl_types.h:1441
dnnl_eltwise_forward_desc_init
dnnl_status_t DNNL_API dnnl_eltwise_forward_desc_init(dnnl_eltwise_desc_t *eltwise_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *data_desc, float alpha, float beta)
Initializes a descriptor for eltwise forward propagation primitive.
dnnl_engine_create
dnnl_status_t DNNL_API dnnl_engine_create(dnnl_engine_t *engine, dnnl_engine_kind_t kind, size_t index)
Creates an engine.
dnnl_cpu
@ dnnl_cpu
CPU engine.
Definition: dnnl_types.h:1983
dnnl_primitive_destroy
dnnl_status_t DNNL_API dnnl_primitive_destroy(dnnl_primitive_t primitive)
Destroys a primitive.
dnnl_memory_create
dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory, const dnnl_memory_desc_t *memory_desc, dnnl_engine_t engine, void *handle)
Creates a memory object.
dnnl_memory_desc_t
Memory descriptor.
Definition: dnnl_types.h:1277
dnnl_engine_destroy
dnnl_status_t DNNL_API dnnl_engine_destroy(dnnl_engine_t engine)
Destroys an engine.
dnnl.h
C API.
dnnl_primitive_create
dnnl_status_t DNNL_API dnnl_primitive_create(dnnl_primitive_t *primitive, const_dnnl_primitive_desc_t primitive_desc)
Creates a primitive.
dnnl_stream
An opaque structure to describe an execution stream.
dnnl_primitive_desc_create
dnnl_status_t DNNL_API dnnl_primitive_desc_create(dnnl_primitive_desc_t *primitive_desc, const_dnnl_op_desc_t op_desc, const_dnnl_primitive_attr_t attr, dnnl_engine_t engine, const_dnnl_primitive_desc_t hint_forward_primitive_desc)
Creates a primitive descriptor.
dnnl_memory_desc_init_by_tag
dnnl_status_t DNNL_API dnnl_memory_desc_init_by_tag(dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_format_tag_t tag)
Initializes a memory descriptor using dimensions and memory format tag.
dnnl_primitive_desc_destroy
dnnl_status_t DNNL_API dnnl_primitive_desc_destroy(dnnl_primitive_desc_t primitive_desc)
Destroys a primitive descriptor.
dnnl_nchw
@ dnnl_nchw
4D CNN activations tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:485
DNNL_ARG_FROM
#define DNNL_ARG_FROM
A special mnemonic for reorder source argument.
Definition: dnnl_types.h:2121