CudaExecutor Class Reference

CudaExecutor Class Reference#

Reference API: gko::CudaExecutor Class Reference
Reference API

#include <ginkgo/core/base/executor.hpp>

Inheritance diagram for gko::CudaExecutor:
[legend]

Public Member Functions

std::shared_ptr< Executorget_master () noexcept override
 
std::shared_ptr< const Executorget_master () const noexcept override
 
void synchronize () const override
 
scoped_device_id_guard get_scoped_device_id_guard () const override
 
std::string get_description () const override
 
int get_device_id () const noexcept
 
int get_num_warps_per_sm () const noexcept
 
int get_num_multiprocessor () const noexcept
 
int get_num_warps () const noexcept
 
int get_warp_size () const noexcept
 
int get_major_version () const noexcept
 
int get_minor_version () const noexcept
 
cublasContext * get_cublas_handle () const
 
cublasContext * get_blas_handle () const
 
cusparseContext * get_cusparse_handle () const
 
cusparseContext * get_sparselib_handle () const
 
std::vector< int > get_closest_pus () const
 
int get_closest_numa () const
 
CUstream_st * get_stream () const
 
virtual void run (const Operation &op) const=0
 
template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void run (const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const
 
template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void run (std::string name, const ClosureReference &op_ref, const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const
 

Static Public Member Functions

static std::shared_ptr< CudaExecutorcreate (int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
 
static std::shared_ptr< CudaExecutorcreate (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc=std::make_shared< CudaAllocator >(), CUstream_st *stream=nullptr)
 
static int get_num_devices ()
 

Protected Member Functions

void set_gpu_property ()
 
void init_handles ()
 
 CudaExecutor (int device_id, std::shared_ptr< Executor > master, std::shared_ptr< CudaAllocatorBase > alloc, CUstream_st *stream)
 
void * raw_alloc (size_type size) const override
 
void raw_free (void *ptr) const noexcept override
 
void raw_copy_to (const OmpExecutor *dest_exec, size_type n_bytes, const void *src_ptr, void *dest_ptr) const override
 
void raw_copy_to (const HipExecutor *dest_exec, size_type n_bytes, const void *src_ptr, void *dest_ptr) const override
 
void raw_copy_to (const DpcppExecutor *dest_exec, size_type n_bytes, const void *src_ptr, void *dest_ptr) const override
 
void raw_copy_to (const CudaExecutor *dest_exec, size_type n_bytes, const void *src_ptr, void *dest_ptr) const override
 
virtual bool verify_memory_to (const OmpExecutor *other) const override
 
virtual bool verify_memory_to (const ReferenceExecutor *other) const override
 
virtual bool verify_memory_to (const DpcppExecutor *other) const override
 
bool verify_memory_to (const HipExecutor *dest_exec) const override
 
bool verify_memory_to (const CudaExecutor *dest_exec) const override
 
void populate_exec_info (const machine_topology *mach_topo) override
 

Detailed Description

This is the Executor subclass which represents the CUDA device.

Member Function Documentation

◆ create() [1/2]

static std::shared_ptr< CudaExecutor > gko::CudaExecutor::create ( int  device_id,
std::shared_ptr< Executor master,
bool  device_reset,
allocation_mode  alloc_mode = default_cuda_alloc_mode,
CUstream_st *  stream = nullptr 
)
static

Creates a new CudaExecutor.

Parameters
device_idthe CUDA device id of this device
masteran executor on the host that is used to invoke the device kernels
device_resetthis option no longer has any effect.
alloc_modethe allocation mode that the executor should operate on. See @allocation_mode for more details
streamthe stream to execute operations on.

◆ create() [2/2]

static std::shared_ptr< CudaExecutor > gko::CudaExecutor::create ( int  device_id,
std::shared_ptr< Executor master,
std::shared_ptr< CudaAllocatorBase alloc = std::make_shared< CudaAllocator >(),
CUstream_st *  stream = nullptr 
)
static

Creates a new CudaExecutor with a custom allocator and device stream.

Parameters
device_idthe CUDA device id of this device
masteran executor on the host that is used to invoke the device kernels.
allocthe allocator to use for device memory allocations.
streamthe stream to execute operations on.

◆ get_blas_handle()

cublasContext * gko::CudaExecutor::get_blas_handle ( ) const
inline

Get the cublas handle for this executor

Returns
the cublas handle (cublasContext*) for this executor

◆ get_closest_numa()

int gko::CudaExecutor::get_closest_numa ( ) const
inline

Get the closest NUMA node

Returns
the closest NUMA node closest to this device

◆ get_closest_pus()

std::vector< int > gko::CudaExecutor::get_closest_pus ( ) const
inline

Get the closest PUs

Returns
the array of PUs closest to this device

◆ get_cublas_handle()

cublasContext * gko::CudaExecutor::get_cublas_handle ( ) const
inline

Get the cublas handle for this executor

Returns
the cublas handle (cublasContext*) for this executor

◆ get_cusparse_handle()

cusparseContext * gko::CudaExecutor::get_cusparse_handle ( ) const
inline

Get the cusparse handle for this executor

Returns
the cusparse handle (cusparseContext*) for this executor

◆ get_description()

std::string gko::CudaExecutor::get_description ( ) const
overridevirtual
Returns
a textual representation of the executor and its device.

Implements gko::Executor.

◆ get_device_id()

int gko::CudaExecutor::get_device_id ( ) const
inlinenoexcept

Get the CUDA device id of the device associated to this executor.

◆ get_major_version()

int gko::CudaExecutor::get_major_version ( ) const
inlinenoexcept

Get the major version of compute capability.

◆ get_master() [1/2]

std::shared_ptr< const Executor > gko::CudaExecutor::get_master ( ) const
overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns
the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_master() [2/2]

std::shared_ptr< Executor > gko::CudaExecutor::get_master ( )
overridevirtualnoexcept

Returns the master OmpExecutor of this Executor.

Returns
the master OmpExecutor of this Executor.

Implements gko::Executor.

◆ get_minor_version()

int gko::CudaExecutor::get_minor_version ( ) const
inlinenoexcept

Get the minor version of compute capability.

◆ get_num_devices()

static int gko::CudaExecutor::get_num_devices ( )
static

Get the number of devices present on the system.

◆ get_num_multiprocessor()

int gko::CudaExecutor::get_num_multiprocessor ( ) const
inlinenoexcept

Get the number of multiprocessor of this executor.

◆ get_num_warps()

int gko::CudaExecutor::get_num_warps ( ) const
inlinenoexcept

Get the number of warps of this executor.

◆ get_num_warps_per_sm()

int gko::CudaExecutor::get_num_warps_per_sm ( ) const
inlinenoexcept

Get the number of warps per SM of this executor.

◆ get_scoped_device_id_guard()

scoped_device_id_guard gko::CudaExecutor::get_scoped_device_id_guard ( ) const
overridevirtual

Implements gko::Executor.

◆ get_sparselib_handle()

cusparseContext * gko::CudaExecutor::get_sparselib_handle ( ) const
inline

Get the cusparse handle for this executor

Returns
the cusparse handle (cusparseContext*) for this executor

◆ get_stream()

CUstream_st * gko::CudaExecutor::get_stream ( ) const
inline

Returns the CUDA stream used by this executor. Can be nullptr for the default stream.

Returns
the stream used to execute kernels and memory operations.

◆ get_warp_size()

int gko::CudaExecutor::get_warp_size ( ) const
inlinenoexcept

Get the warp size of this executor.

◆ populate_exec_info()

void gko::CudaExecutor::populate_exec_info ( const machine_topology mach_topo)
overrideprotectedvirtual

Populates the executor specific info from the global machine topology object.

Parameters
mach_topothe machine topology object.

Implements gko::Executor.

◆ raw_alloc()

void * gko::CudaExecutor::raw_alloc ( size_type  size) const
overrideprotectedvirtual

Allocates raw memory in this Executor.

Parameters
sizenumber of bytes to allocate
Exceptions
AllocationErrorif the allocation failed
Returns
raw pointer to allocated memory

Implements gko::Executor.

◆ raw_copy_to() [1/4]

void gko::CudaExecutor::raw_copy_to ( const CudaExecutor dest_exec,
size_type  n_bytes,
const void *  src_ptr,
void *  dest_ptr 
) const
overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [2/4]

void gko::CudaExecutor::raw_copy_to ( const DpcppExecutor dest_exec,
size_type  n_bytes,
const void *  src_ptr,
void *  dest_ptr 
) const
overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [3/4]

void gko::CudaExecutor::raw_copy_to ( const HipExecutor dest_exec,
size_type  n_bytes,
const void *  src_ptr,
void *  dest_ptr 
) const
overrideprotectedvirtual

Implements gko::Executor.

◆ raw_copy_to() [4/4]

void gko::CudaExecutor::raw_copy_to ( const OmpExecutor dest_exec,
size_type  n_bytes,
const void *  src_ptr,
void *  dest_ptr 
) const
overrideprotectedvirtual

Implements gko::Executor.

◆ raw_free()

void gko::CudaExecutor::raw_free ( void *  ptr) const
overrideprotectedvirtualnoexcept

Frees memory previously allocated with Executor::alloc().

If ptr is a nullptr, the function has no effect.

Parameters
ptrpointer to the allocated memory block

Implements gko::Executor.

◆ run() [1/3]

template<typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void gko::Executor::run ( const ClosureOmp &  op_omp,
const ClosureCuda &  op_cuda,
const ClosureHip &  op_hip,
const ClosureDpcpp &  op_dpcpp 
) const
inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters
ClosureOmptype of op_omp
ClosureCudatype of op_cuda
ClosureHiptype of op_hip
ClosureDpcpptype of op_dpcpp
Parameters
op_ompfunctor to run in case of a OmpExecutor or ReferenceExecutor
op_cudafunctor to run in case of a CudaExecutor
op_hipfunctor to run in case of a HipExecutor
op_dpcppfunctor to run in case of a DpcppExecutor

◆ run() [2/3]

virtual void gko::Executor::run ( const Operation op) const
virtual

Runs the specified Operation using this Executor.

Parameters
opthe operation to run

Implements gko::Executor.

◆ run() [3/3]

template<typename ClosureReference , typename ClosureOmp , typename ClosureCuda , typename ClosureHip , typename ClosureDpcpp >
void gko::Executor::run ( std::string  name,
const ClosureReference &  op_ref,
const ClosureOmp &  op_omp,
const ClosureCuda &  op_cuda,
const ClosureHip &  op_hip,
const ClosureDpcpp &  op_dpcpp 
) const
inline

Runs one of the passed in functors, depending on the Executor type.

Template Parameters
ClosureReferencetype of op_ref
ClosureOmptype of op_omp
ClosureCudatype of op_cuda
ClosureHiptype of op_hip
ClosureDpcpptype of op_dpcpp
Parameters
namethe name of the operation
op_reffunctor to run in case of a ReferenceExecutor
op_ompfunctor to run in case of a OmpExecutor
op_cudafunctor to run in case of a CudaExecutor
op_hipfunctor to run in case of a HipExecutor
op_dpcppfunctor to run in case of a DpcppExecutor

◆ synchronize()

void gko::CudaExecutor::synchronize ( ) const
overridevirtual

Synchronize the operations launched on the executor with its master.

Implements gko::Executor.

◆ verify_memory_to() [1/5]

bool gko::CudaExecutor::verify_memory_to ( const CudaExecutor dest_exec) const
overrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [2/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const DpcppExecutor other) const
inlineoverrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [3/5]

bool gko::CudaExecutor::verify_memory_to ( const HipExecutor dest_exec) const
overrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [4/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const OmpExecutor other) const
inlineoverrideprotectedvirtual

Implements gko::Executor.

◆ verify_memory_to() [5/5]

virtual bool gko::CudaExecutor::verify_memory_to ( const ReferenceExecutor other) const
inlineoverrideprotectedvirtual

Implements gko::Executor.


The documentation for this class was generated from the following file: