#include <ginkgo/core/base/executor.hpp>

Inheritance diagram for gko::Executor:

Classes
struct	exec_info

Public Member Functions
	Executor (Executor &)=delete

	Executor (Executor &&)=delete

Executor &	operator= (Executor &)=delete

Executor &	operator= (Executor &&)=delete

virtual void	run (const Operation &op) const =0

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (std::string name, const ClosureReference &op_ref, const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const

template<typename T >
T *	alloc (size_type num_elems) const

void	free (void *ptr) const noexcept

template<typename T >
void	copy_from (ptr_param< const Executor > src_exec, size_type num_elems, const T src_ptr, T dest_ptr) const

template<typename T >
void	copy (size_type num_elems, const T src_ptr, T dest_ptr) const

template<typename T >
T	copy_val_to_host (const T *ptr) const

virtual std::shared_ptr< Executor >	get_master () noexcept=0

virtual std::shared_ptr< const Executor >	get_master () const noexcept=0

virtual void	synchronize () const =0

void	add_logger (std::shared_ptr< const log::Logger > logger) override

void	remove_logger (const log::Logger *logger) override

void	set_log_propagation_mode (log_propagation_mode mode)

bool	should_propagate_log () const

bool	memory_accessible (const std::shared_ptr< const Executor > &other) const

virtual scoped_device_id_guard	get_scoped_device_id_guard () const =0

virtual std::string	get_description () const =0

Public Member Functions inherited from gko::log::EnableLogging< Executor >
void	remove_logger (ptr_param< const Logger > logger)

const std::vector< std::shared_ptr< const Logger > > &	get_loggers () const override

void	clear_loggers () override

Public Member Functions inherited from gko::log::Loggable
void	remove_logger (ptr_param< const Logger > logger)

Protected Member Functions
const exec_info &	get_exec_info () const

virtual void *	raw_alloc (size_type size) const =0

virtual void	raw_free (void *ptr) const noexcept=0

virtual void	raw_copy_from (const Executor src_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const =0

virtual void	raw_copy_to (const OmpExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const =0

virtual void	raw_copy_to (const HipExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const =0

virtual void	raw_copy_to (const DpcppExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const =0

virtual void	raw_copy_to (const CudaExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const =0

virtual bool	verify_memory_from (const Executor *src_exec) const =0

virtual bool	verify_memory_to (const OmpExecutor *dest_exec) const =0

virtual bool	verify_memory_to (const HipExecutor *dest_exec) const =0

virtual bool	verify_memory_to (const DpcppExecutor *dest_exec) const =0

virtual bool	verify_memory_to (const CudaExecutor *dest_exec) const =0

virtual bool	verify_memory_to (const ReferenceExecutor *dest_exec) const =0

virtual void	populate_exec_info (const machine_topology *mach_topo)=0

exec_info &	get_exec_info ()

Protected Member Functions inherited from gko::log::EnableLogging< Executor >
void	log (Params &&... params) const

Protected Attributes
exec_info	exec_info_

log_propagation_mode	log_propagation_mode_ {log_propagation_mode::automatic}

std::atomic< int >	propagating_logger_refcount_ {}

Protected Attributes inherited from gko::log::EnableLogging< Executor >
std::vector< std::shared_ptr< const Logger > >	loggers_

Friends
class	OmpExecutor

class	HipExecutor

class	DpcppExecutor

class	CudaExecutor

class	ReferenceExecutor

Detailed Description

The first step in using the Ginkgo library consists of creating an executor. Executors are used to specify the location for the data of linear algebra objects, and to determine where the operations will be executed. Ginkgo currently supports five different executor types:

OmpExecutor specifies that the data should be stored and the associated operations executed on an OpenMP-supporting device (e.g. host CPU);
CudaExecutor specifies that the data should be stored and the operations executed on the NVIDIA GPU accelerator;
HipExecutor specifies that the data should be stored and the operations executed on either an NVIDIA or AMD GPU accelerator;
DpcppExecutor specifies that the data should be stored and the operations executed on an hardware supporting DPC++;
ReferenceExecutor executes a non-optimized reference implementation, which can be used to debug the library.

The following code snippet demonstrates the simplest possible use of the Ginkgo library:

auto omp = gko::create<gko::OmpExecutor>();

auto A = gko::read_from_mtx<gko::matrix::Csr<float>>("A.mtx", omp);

First, we create a OMP executor, which will be used in the next line to specify where we want the data for the matrix A to be stored. The second line will read a matrix from the matrix market file 'A.mtx', and store the data on the CPU in CSR format (gko::matrix::Csr is a Ginkgo matrix class which stores its data in CSR format). At this point, matrix A is bound to the CPU, and any routines called on it will be performed on the CPU. This approach is usually desired in sparse linear algebra, as the cost of individual operations is several orders of magnitude lower than the cost of copying the matrix to the GPU.

If matrix A is going to be reused multiple times, it could be beneficial to copy it over to the accelerator, and perform the operations there, as demonstrated by the next code snippet:

auto cuda = gko::create<gko::CudaExecutor>(0, omp);

auto dA = gko::copy_to<gko::matrix::Csr<float>>(A.get(), cuda);

The first line of the snippet creates a new CUDA executor. Since there may be multiple NVIDIA GPUs present on the system, the first parameter instructs the library to use the first device (i.e. the one with device ID zero, as in cudaSetDevice() routine from the CUDA runtime API). In addition, since GPUs are not stand-alone processors, it is required to pass a "master" OmpExecutor which will be used to schedule the requested CUDA kernels on the accelerator.

The second command creates a copy of the matrix A on the GPU. Notice the use of the get() method. As Ginkgo aims to provide automatic memory management of its objects, the result of calling gko::read_from_mtx() is a smart pointer (std::unique_ptr) to the created object. On the other hand, as the library will not hold a reference to A once the copy is completed, the input parameter for gko::copy_to() is a plain pointer. Thus, the get() method is used to convert from a std::unique_ptr to a plain pointer, as expected by gko::copy_to().

As a side note, the gko::copy_to routine is far more powerful than just copying data between different devices. It can also be used to convert data between different formats. For example, if the above code used gko::matrix::Ell as the template parameter, dA would be stored on the GPU, in ELLPACK format.

Finally, if all the processing of the matrix is supposed to be done on the GPU, and a CPU copy of the matrix is not required, we could have read the matrix to the GPU directly:

auto omp = gko::create<gko::OmpExecutor>();
auto cuda = gko::create<gko::CudaExecutor>(0, omp);
auto dA = gko::read_from_mtx<gko::matrix::Csr<float>>("A.mtx", cuda);

Notice that even though reading the matrix directly from a file to the accelerator is not supported, the library is designed to abstract away the intermediate step of reading the matrix to the CPU memory. This is a general design approach taken by the library: in case an operation is not supported by the device, the data will be copied to the CPU, the operation performed there, and finally the results copied back to the device. This approach makes using the library more concise, as explicit copies are not required by the user. Nevertheless, this feature should be taken into account when considering performance implications of using such operations.

Member Function Documentation

◆ add_logger()

void gko::Executor::add_logger ( std::shared_ptr< const log::Logger > logger )

inlineoverridevirtual

Note: This specialization keeps track of whether any propagating loggers were attached to the executor.

See also: Logger::needs_propagation()

Reimplemented from gko::log::EnableLogging< Executor >.

◆ alloc()

template<typename T >

T * gko::Executor::alloc ( size_type num_elems ) const

inline

Allocates memory in this Executor.

Template Parameters

T	datatype to allocate

Parameters

num_elems number of elements of type T to allocate

Exceptions

AllocationError if the allocation failed

Returns: pointer to allocated memory

◆ copy()

template<typename T >

void gko::Executor::copy	(	size_type	num_elems,
		const T *	src_ptr,
		T *	dest_ptr
	)		const

inline

Copies data within this Executor.

Template Parameters

T	datatype to copy

Parameters

num_elems	number of elements of type T to copy
src_ptr	pointer to a block of memory containing the data to be copied
dest_ptr	pointer to an allocated block of memory where the data will be copied to

◆ copy_from()

template<typename T >

void gko::Executor::copy_from	(	ptr_param< const Executor >	src_exec,
		size_type	num_elems,
		const T *	src_ptr,
		T *	dest_ptr
	)		const

inline

Copies data from another Executor.

Template Parameters

T	datatype to copy

Parameters

src_exec	Executor from which the memory will be copied
num_elems	number of elements of type T to copy
src_ptr	pointer to a block of memory containing the data to be copied
dest_ptr	pointer to an allocated block of memory where the data will be copied to

References gko::ptr_param< T >::get().

◆ copy_val_to_host()

template<typename T >

T gko::Executor::copy_val_to_host ( const T * ptr ) const

inline

Retrieves a single element at the given location from executor memory.

Template Parameters

T	datatype to copy

Parameters

ptr	the pointer to the element to be copied

Returns: the value stored at ptr

◆ free()

void gko::Executor::free ( void * ptr ) const

inlinenoexcept

Frees memory previously allocated with Executor::alloc().

If ptr is a nullptr, the function has no effect.

Parameters

ptr	pointer to the allocated memory block

◆ get_description()

virtual std::string gko::Executor::get_description ( ) const

pure virtual

Returns: a textual representation of the executor and its device.

Implemented in gko::OmpExecutor, gko::ReferenceExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ get_exec_info() [1/2]

exec_info & gko::Executor::get_exec_info ( )

inlineprotected

Gets the modifiable exec info object

Returns: the pointer to the exec_info object

◆ get_exec_info() [2/2]

const exec_info & gko::Executor::get_exec_info ( ) const

inlineprotected

Gets the exec info struct

Returns: the exec_info struct

◆ get_master() [1/2]

virtual std::shared_ptr< const Executor > gko::Executor::get_master ( ) const

pure virtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implemented in gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ get_master() [2/2]

virtual std::shared_ptr< Executor > gko::Executor::get_master ( )

pure virtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implemented in gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ memory_accessible()

bool gko::Executor::memory_accessible ( const std::shared_ptr< const Executor > & other ) const

inline

Verifies whether the executors share the same memory.

Parameters

other the other Executor to compare against

Returns: whether the executors this and other share the same memory.

◆ populate_exec_info()

virtual void gko::Executor::populate_exec_info ( const machine_topology * mach_topo )

protectedpure virtual

Populates the executor specific info from the global machine topology object.

Parameters

mach_topo the machine topology object.

Implemented in gko::ReferenceExecutor, gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ raw_alloc()

virtual void * gko::Executor::raw_alloc ( size_type size ) const

protectedpure virtual

Allocates raw memory in this Executor.

Parameters

size	number of bytes to allocate

Exceptions

AllocationError if the allocation failed

Returns: raw pointer to allocated memory

Implemented in gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ raw_copy_from()

virtual void gko::Executor::raw_copy_from	(	const Executor *	src_exec,
		size_type	n_bytes,
		const void *	src_ptr,
		void *	dest_ptr
	)		const

protectedpure virtual

Copies raw data from another Executor.

Parameters

src_exec	Executor from which the memory will be copied
n_bytes	number of bytes to copy
src_ptr	pointer to a block of memory containing the data to be copied
dest_ptr	pointer to an allocated block of memory where the data will be copied to

◆ raw_free()

virtual void gko::Executor::raw_free ( void * ptr ) const

protectedpure virtualnoexcept

Frees memory previously allocated with Executor::alloc().

If ptr is a nullptr, the function has no effect.

Parameters

ptr	pointer to the allocated memory block

Implemented in gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ remove_logger()

void gko::Executor::remove_logger ( const log::Logger * logger )

inlineoverridevirtual

Note: This specialization keeps track of whether any propagating loggers were attached to the executor.

See also: Logger::needs_propagation()

Reimplemented from gko::log::EnableLogging< Executor >.

References gko::log::Logger::needs_propagation().

◆ run() [1/3]

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	const ClosureOmp &	op_omp,
		const ClosureCuda &	op_cuda,
		const ClosureHip &	op_hip,
		const ClosureDpcpp &	op_dpcpp
	)		const

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

op_omp	functor to run in case of a OmpExecutor or ReferenceExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

◆ run() [2/3]

virtual void gko::Executor::run ( const Operation & op ) const

pure virtual

Runs the specified Operation using this Executor.

Parameters

op	the operation to run

Implemented in gko::ReferenceExecutor, gko::OmpExecutor, gko::ReferenceExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

◆ run() [3/3]

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	std::string	name,
		const ClosureReference &	op_ref,
		const ClosureOmp &	op_omp,
		const ClosureCuda &	op_cuda,
		const ClosureHip &	op_hip,
		const ClosureDpcpp &	op_dpcpp
	)		const

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureReference	type of op_ref
ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

name	the name of the operation
op_ref	functor to run in case of a ReferenceExecutor
op_omp	functor to run in case of a OmpExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

◆ set_log_propagation_mode()

void gko::Executor::set_log_propagation_mode ( log_propagation_mode mode )

inline

Sets the logger event propagation mode for the executor. This controls whether events that happen at objects created on this executor will also be logged at propagating loggers attached to the executor.

See also: Logger::needs_propagation()

◆ should_propagate_log()

bool gko::Executor::should_propagate_log ( ) const

inline

Returns true iff events occurring at an object created on this executor should be logged at propagating loggers attached to this executor, and there is at least one such propagating logger.

See also: Logger::needs_propagation(); Executor::set_log_propagation_mode(log_propagation_mode)

◆ synchronize()

virtual void gko::Executor::synchronize ( ) const

pure virtual

Synchronize the operations launched on the executor with its master.

Implemented in gko::OmpExecutor, gko::CudaExecutor, gko::HipExecutor, and gko::DpcppExecutor.

Referenced by loggers::OperationLogger::on_copy_completed(), and loggers::OperationLogger::on_copy_started().

◆ verify_memory_from()

virtual bool gko::Executor::verify_memory_from ( const Executor * src_exec ) const

protectedpure virtual

Verify the memory from another Executor.

Parameters

src_exec Executor from which to verify the memory.

Returns: whether this executor and src_exec share the same memory.

Implemented in gko::ReferenceExecutor.

The documentation for this class was generated from the following file:

ginkgo/core/base/executor.hpp

Executor Class Reference

Executor Class Reference#

Classes

Public Member Functions

Protected Member Functions

Protected Attributes

Friends

Detailed Description

Member Function Documentation

◆ add_logger()

◆ alloc()

◆ copy()

◆ copy_from()

◆ copy_val_to_host()

◆ free()

◆ get_description()

◆ get_exec_info() [1/2]

◆ get_exec_info() [2/2]

◆ get_master() [1/2]

◆ get_master() [2/2]

◆ memory_accessible()

◆ populate_exec_info()

◆ raw_alloc()

◆ raw_copy_from()

◆ raw_free()

◆ remove_logger()

◆ run() [1/3]

◆ run() [2/3]

◆ run() [3/3]

◆ set_log_propagation_mode()

◆ should_propagate_log()

◆ synchronize()

◆ verify_memory_from()