488 *
template <
typename ValueType,
typename ValueDistribution,
typename Engine>
489 *
typename std::enable_if<!gko::is_complex_s<ValueType>::value, ValueType>::type
490 * get_rand_value(ValueDistribution&& value_dist, Engine&& gen)
492 *
return value_dist(gen);
500 *
template <
typename ValueType,
typename ValueDistribution,
typename Engine>
501 *
typename std::enable_if<gko::is_complex_s<ValueType>::value, ValueType>::type
502 * get_rand_value(ValueDistribution&& value_dist, Engine&& gen)
504 *
return ValueType(value_dist(gen), value_dist(gen));
513 *
double timing(std::shared_ptr<const gko::Executor> exec,
514 * std::shared_ptr<const gko::LinOp> A,
515 * std::shared_ptr<const gko::LinOp> b,
516 * std::shared_ptr<gko::LinOp> x)
520 *
for (
int i = 0; i < warmup; i++) {
523 *
double total_sec = 0;
524 *
for (
int i = 0; i < rep; i++) {
525 *
auto xx = x->clone();
526 * exec->synchronize();
527 *
auto start = std::chrono::steady_clock::now();
529 * exec->synchronize();
530 *
auto stop = std::chrono::steady_clock::now();
531 * std::chrono::duration<double> duration_time = stop - start;
532 * total_sec += duration_time.count();
533 *
if (i + 1 == rep) {
538 *
return total_sec / rep;
545 *
int main(
int argc,
char* argv[])
547 *
using HighPrecision = double;
549 *
using LowPrecision = float;
550 *
using IndexType = int;
559 *
if (argc == 2 && (std::string(argv[1]) ==
"--help")) {
560 * std::cerr <<
"Usage: " << argv[0] <<
" [executor] " << std::endl;
564 *
const auto executor_string = argc >= 2 ? argv[1] :
"reference";
565 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
582 * {
"reference", [] {
return gko::ReferenceExecutor::create(); }}};
584 *
const auto exec = exec_map.at(executor_string)();
587 *
auto hp_A =
share(gko::read<hp_mtx>(std::ifstream(
"data/A.mtx"), exec));
588 *
auto lp_A =
share(gko::read<lp_mtx>(std::ifstream(
"data/A.mtx"), exec));
589 *
auto A_dim = hp_A->get_size();
592 *
auto host_b = hp_vec::create(exec->get_master(), b_dim);
593 * std::default_random_engine rand_engine(32);
594 *
auto dist = std::uniform_real_distribution<RealValueType>(0.0, 1.0);
595 *
for (
int i = 0; i < host_b->get_size()[0]; i++) {
596 * host_b->at(i, 0) = get_rand_value<HighPrecision>(dist, rand_engine);
599 *
auto lp_b =
share(lp_vec::create(exec));
600 * lp_b->copy_from(hp_b);
602 *
auto hp_x =
share(hp_vec::create(exec, x_dim));
603 *
auto lp_x =
share(lp_vec::create(exec, x_dim));
604 *
auto hplp_x =
share(hp_x->clone());
605 *
auto lplp_x =
share(hp_x->clone());
606 *
auto lphp_x =
share(hp_x->clone());
609 *
auto hp_sec = timing(exec, hp_A, hp_b, hp_x);
610 *
auto lp_sec = timing(exec, lp_A, lp_b, lp_x);
611 *
auto hplp_sec = timing(exec, hp_A, lp_b, hplp_x);
612 *
auto lplp_sec = timing(exec, lp_A, lp_b, lplp_x);
613 *
auto lphp_sec = timing(exec, lp_A, hp_b, lphp_x);
616 *
auto neg_one = gko::initialize<hp_vec>({-1.0}, exec);
617 *
auto hp_x_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
618 *
auto lp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
619 *
auto hplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
620 *
auto lplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
621 *
auto lphp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
622 *
auto lp_diff = hp_x->clone();
623 *
auto hplp_diff = hp_x->clone();
624 *
auto lplp_diff = hp_x->clone();
625 *
auto lphp_diff = hp_x->clone();
627 * hp_x->compute_norm2(hp_x_norm);
628 * lp_diff->add_scaled(neg_one, lp_x);
629 * lp_diff->compute_norm2(lp_diff_norm);
630 * hplp_diff->add_scaled(neg_one, hplp_x);
631 * hplp_diff->compute_norm2(hplp_diff_norm);
632 * lplp_diff->add_scaled(neg_one, lplp_x);
633 * lplp_diff->compute_norm2(lplp_diff_norm);
634 * lphp_diff->add_scaled(neg_one, lphp_x);
635 * lphp_diff->compute_norm2(lphp_diff_norm);
636 * exec->synchronize();
638 * std::cout.precision(10);
639 * std::cout << std::scientific;
640 * std::cout <<
"High Precision time(s): " << hp_sec << std::endl;
641 * std::cout <<
"High Precision result norm: " << hp_x_norm->at(0)
643 * std::cout <<
"Low Precision time(s): " << lp_sec << std::endl;
644 * std::cout <<
"Low Precision relative error: "
645 * << lp_diff_norm->at(0) / hp_x_norm->at(0) <<
"\n";
646 * std::cout <<
"Hp * Lp -> Hp time(s): " << hplp_sec << std::endl;
647 * std::cout <<
"Hp * Lp -> Hp relative error: "
648 * << hplp_diff_norm->at(0) / hp_x_norm->at(0) <<
"\n";
649 * std::cout <<
"Lp * Lp -> Hp time(s): " << lplp_sec << std::endl;
650 * std::cout <<
"Lp * Lp -> Hp relative error: "
651 * << lplp_diff_norm->at(0) / hp_x_norm->at(0) <<
"\n";
652 * std::cout <<
"Lp * Hp -> Hp time(s): " << lplp_sec << std::endl;
653 * std::cout <<
"Lp * Hp -> Hp relative error: "
654 * << lphp_diff_norm->at(0) / hp_x_norm->at(0) <<
"\n";
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
static const version_info & get()
Definition version.hpp:139
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260