#include <ginkgo/core/base/executor.hpp>

Inheritance diagram for gko::CudaExecutor:

Public Member Functions
std::shared_ptr< Executor >	get_master () noexcept override

std::shared_ptr< const Executor >	get_master () const noexcept override

void	synchronize () const override

scoped_device_id_guard	get_scoped_device_id_guard () const override

std::string	get_description () const override

int	get_device_id () const noexcept

int	get_num_warps_per_sm () const noexcept

int	get_num_multiprocessor () const noexcept

int	get_num_warps () const noexcept

int	get_warp_size () const noexcept

int	get_major_version () const noexcept

int	get_minor_version () const noexcept

cublasContext *	get_cublas_handle () const

cublasContext *	get_blas_handle () const

cusparseContext *	get_cusparse_handle () const

cusparseContext *	get_sparselib_handle () const

std::vector< int >	get_closest_pus () const

int	get_closest_numa () const

CUstream_st *	get_stream () const

virtual void	run (const Operation &op) const=0

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void	run (std::string name, const ClosureReference &op_ref, const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const

Static Public Member Functions
static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)

static std::shared_ptr< CudaExecutor >	create (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc=std::make_shared< CudaAllocator >(), CUstream_st *stream=nullptr)

static int	get_num_devices ()

Protected Member Functions
void	set_gpu_property ()

void	init_handles ()

	CudaExecutor (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc, CUstream_st *stream)

void *	raw_alloc (size_type size) const override

void	raw_free (void *ptr) const noexcept override

void	raw_copy_to (const OmpExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const override

void	raw_copy_to (const HipExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const override

void	raw_copy_to (const DpcppExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const override

void	raw_copy_to (const CudaExecutor dest_exec, size_type n_bytes, const void src_ptr, void *dest_ptr) const override

virtual bool	verify_memory_to (const OmpExecutor *other) const override

virtual bool	verify_memory_to (const ReferenceExecutor *other) const override

virtual bool	verify_memory_to (const DpcppExecutor *other) const override

bool	verify_memory_to (const HipExecutor *dest_exec) const override

bool	verify_memory_to (const CudaExecutor *dest_exec) const override

void	populate_exec_info (const machine_topology *mach_topo) override

Detailed Description

This is the Executor subclass which represents the CUDA device.

Member Function Documentation

◆ create() [1/2]

static std::shared_ptr< CudaExecutor > gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		bool	device_reset,
		allocation_mode	alloc_mode = `default_cuda_alloc_mode`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels
device_reset	this option no longer has any effect.
alloc_mode	the allocation mode that the executor should operate on. See @allocation_mode for more details
stream	the stream to execute operations on.

◆ create() [2/2]

static std::shared_ptr< CudaExecutor > gko::CudaExecutor::create	(	int	device_id,
		std::shared_ptr< Executor >	master,
		std::shared_ptr< CudaAllocatorBase >	alloc = `std::make_shared< CudaAllocator >()`,
		CUstream_st *	stream = `nullptr`
	)

static

Creates a new CudaExecutor with a custom allocator and device stream.

Parameters

device_id	the CUDA device id of this device
master	an executor on the host that is used to invoke the device kernels.
alloc	the allocator to use for device memory allocations.
stream	the stream to execute operations on.

◆ get_blas_handle()

cublasContext * gko::CudaExecutor::get_blas_handle ( ) const

inline

Get the cublas handle for this executor

Returns: the cublas handle (cublasContext*) for this executor

◆ get_closest_numa()

int gko::CudaExecutor::get_closest_numa ( ) const

inline

Get the closest NUMA node

Returns: the closest NUMA node closest to this device

◆ get_closest_pus()

std::vector< int > gko::CudaExecutor::get_closest_pus ( ) const

inline

Get the closest PUs

Returns: the array of PUs closest to this device

◆ get_cublas_handle()

cublasContext * gko::CudaExecutor::get_cublas_handle ( ) const

inline

Get the cublas handle for this executor

Returns: the cublas handle (cublasContext*) for this executor

◆ get_cusparse_handle()

cusparseContext * gko::CudaExecutor::get_cusparse_handle ( ) const

inline

Get the cusparse handle for this executor

Returns: the cusparse handle (cusparseContext*) for this executor

◆ get_description()

std::string gko::CudaExecutor::get_description ( ) const

overridevirtual

Returns: a textual representation of the executor and its device.

Implements gko::Executor.

◆ get_device_id()

int gko::CudaExecutor::get_device_id ( ) const

inlinenoexcept

Get the CUDA device id of the device associated to this executor.

◆ get_major_version()

int gko::CudaExecutor::get_major_version ( ) const

inlinenoexcept

Get the major version of compute capability.

◆ get_master() [1/2]

std::shared_ptr< const Executor > gko::CudaExecutor::get_master ( ) const

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_master() [2/2]

std::shared_ptr< Executor > gko::CudaExecutor::get_master ( )

overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns: the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_minor_version()

int gko::CudaExecutor::get_minor_version ( ) const

inlinenoexcept

Get the minor version of compute capability.

◆ get_num_devices()

static int gko::CudaExecutor::get_num_devices ( )

static

Get the number of devices present on the system.

◆ get_num_multiprocessor()

int gko::CudaExecutor::get_num_multiprocessor ( ) const

inlinenoexcept

Get the number of multiprocessor of this executor.

◆ get_num_warps()

int gko::CudaExecutor::get_num_warps ( ) const

inlinenoexcept

Get the number of warps of this executor.

◆ get_num_warps_per_sm()

int gko::CudaExecutor::get_num_warps_per_sm ( ) const

inlinenoexcept

Get the number of warps per SM of this executor.

◆ get_scoped_device_id_guard()

scoped_device_id_guard gko::CudaExecutor::get_scoped_device_id_guard ( ) const

overridevirtual

Implements gko::Executor.

◆ get_sparselib_handle()

cusparseContext * gko::CudaExecutor::get_sparselib_handle ( ) const

inline

Get the cusparse handle for this executor

Returns: the cusparse handle (cusparseContext*) for this executor

◆ get_stream()

CUstream_st * gko::CudaExecutor::get_stream ( ) const

inline

Returns the CUDA stream used by this executor. Can be nullptr for the default stream.

Returns: the stream used to execute kernels and memory operations.

◆ get_warp_size()

int gko::CudaExecutor::get_warp_size ( ) const

inlinenoexcept

Get the warp size of this executor.

◆ populate_exec_info()

void gko::CudaExecutor::populate_exec_info ( const machine_topology * mach_topo )

overrideprotectedvirtual

Populates the executor specific info from the global machine topology object.

Parameters

mach_topo the machine topology object.

Implements gko::Executor.

◆ raw_alloc()

void * gko::CudaExecutor::raw_alloc ( size_type size ) const

overrideprotectedvirtual

Allocates raw memory in this Executor.

Parameters

size	number of bytes to allocate

Exceptions

AllocationError if the allocation failed

Returns: raw pointer to allocated memory

Implements gko::Executor.

◆ raw_copy_to() [1/4]

void gko::CudaExecutor::raw_copy_to	(	const CudaExecutor *	dest_exec,
		size_type	n_bytes,
		const void *	src_ptr,
		void *	dest_ptr
	)		const

overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [2/4]

void gko::CudaExecutor::raw_copy_to	(	const DpcppExecutor *	dest_exec,
		size_type	n_bytes,
		const void *	src_ptr,
		void *	dest_ptr
	)		const

overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [3/4]

void gko::CudaExecutor::raw_copy_to	(	const HipExecutor *	dest_exec,
		size_type	n_bytes,
		const void *	src_ptr,
		void *	dest_ptr
	)		const

overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [4/4]

void gko::CudaExecutor::raw_copy_to	(	const OmpExecutor *	dest_exec,
		size_type	n_bytes,
		const void *	src_ptr,
		void *	dest_ptr
	)		const

overrideprotectedvirtual

Implements gko::Executor.

◆ raw_free()

void gko::CudaExecutor::raw_free ( void * ptr ) const

overrideprotectedvirtualnoexcept

Frees memory previously allocated with Executor::alloc().

If ptr is a nullptr, the function has no effect.

Parameters

ptr	pointer to the allocated memory block

Implements gko::Executor.

◆ run() [1/3]

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	const ClosureOmp &	op_omp,
		const ClosureCuda &	op_cuda,
		const ClosureHip &	op_hip,
		const ClosureDpcpp &	op_dpcpp
	)		const

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

op_omp	functor to run in case of a OmpExecutor or ReferenceExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

◆ run() [2/3]

virtual void gko::Executor::run ( const Operation & op ) const

virtual

Runs the specified Operation using this Executor.

Parameters

op	the operation to run

Implements gko::Executor.

◆ run() [3/3]

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >

void gko::Executor::run	(	std::string	name,
		const ClosureReference &	op_ref,
		const ClosureOmp &	op_omp,
		const ClosureCuda &	op_cuda,
		const ClosureHip &	op_hip,
		const ClosureDpcpp &	op_dpcpp
	)		const

inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters

ClosureReference	type of op_ref
ClosureOmp	type of op_omp
ClosureCuda	type of op_cuda
ClosureHip	type of op_hip
ClosureDpcpp	type of op_dpcpp

Parameters

name	the name of the operation
op_ref	functor to run in case of a ReferenceExecutor
op_omp	functor to run in case of a OmpExecutor
op_cuda	functor to run in case of a CudaExecutor
op_hip	functor to run in case of a HipExecutor
op_dpcpp	functor to run in case of a DpcppExecutor

◆ synchronize()

void gko::CudaExecutor::synchronize ( ) const

overridevirtual

Synchronize the operations launched on the executor with its master.

Implements gko::Executor.

◆ verify_memory_to() [1/5]

bool gko::CudaExecutor::verify_memory_to ( const CudaExecutor * dest_exec ) const

overrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [2/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const DpcppExecutor * other ) const

inlineoverrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [3/5]

bool gko::CudaExecutor::verify_memory_to ( const HipExecutor * dest_exec ) const

overrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [4/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const OmpExecutor * other ) const

inlineoverrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [5/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const ReferenceExecutor * other ) const

inlineoverrideprotectedvirtual

Implements gko::Executor.

The documentation for this class was generated from the following file:

ginkgo/core/base/executor.hpp

CudaExecutor Class Reference

CudaExecutor Class Reference#

Public Member Functions

Static Public Member Functions

Protected Member Functions

Detailed Description

Member Function Documentation

◆ create() [1/2]

◆ create() [2/2]

◆ get_blas_handle()

◆ get_closest_numa()

◆ get_closest_pus()

◆ get_cublas_handle()

◆ get_cusparse_handle()

◆ get_description()

◆ get_device_id()

◆ get_major_version()

◆ get_master() [1/2]

◆ get_master() [2/2]

◆ get_minor_version()

◆ get_num_devices()

◆ get_num_multiprocessor()

◆ get_num_warps()

◆ get_num_warps_per_sm()

◆ get_scoped_device_id_guard()

◆ get_sparselib_handle()

◆ get_stream()

◆ get_warp_size()

◆ populate_exec_info()

◆ raw_alloc()

◆ raw_copy_to() [1/4]

◆ raw_copy_to() [2/4]

◆ raw_copy_to() [3/4]

◆ raw_copy_to() [4/4]

◆ raw_free()

◆ run() [1/3]

◆ run() [2/3]

◆ run() [3/3]

◆ synchronize()

◆ verify_memory_to() [1/5]

◆ verify_memory_to() [2/5]

◆ verify_memory_to() [3/5]

◆ verify_memory_to() [4/5]

◆ verify_memory_to() [5/5]