/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/mixed-spmv.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/mixed-spmv.hpp Source File#

Reference API: /home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/mixed-spmv.hpp Source File
Reference API
mixed-spmv.hpp
1
488 * template <typename ValueType, typename ValueDistribution, typename Engine>
489 * typename std::enable_if<!gko::is_complex_s<ValueType>::value, ValueType>::type
490 * get_rand_value(ValueDistribution&& value_dist, Engine&& gen)
491 * {
492 * return value_dist(gen);
493 * }
494 *
495 *
500 * template <typename ValueType, typename ValueDistribution, typename Engine>
501 * typename std::enable_if<gko::is_complex_s<ValueType>::value, ValueType>::type
502 * get_rand_value(ValueDistribution&& value_dist, Engine&& gen)
503 * {
504 * return ValueType(value_dist(gen), value_dist(gen));
505 * }
506 *
507 *
513 * double timing(std::shared_ptr<const gko::Executor> exec,
514 * std::shared_ptr<const gko::LinOp> A,
515 * std::shared_ptr<const gko::LinOp> b,
516 * std::shared_ptr<gko::LinOp> x)
517 * {
518 * int warmup = 2;
519 * int rep = 10;
520 * for (int i = 0; i < warmup; i++) {
521 * A->apply(b, x);
522 * }
523 * double total_sec = 0;
524 * for (int i = 0; i < rep; i++) {
525 * auto xx = x->clone();
526 * exec->synchronize();
527 * auto start = std::chrono::steady_clock::now();
528 * A->apply(b, xx);
529 * exec->synchronize();
530 * auto stop = std::chrono::steady_clock::now();
531 * std::chrono::duration<double> duration_time = stop - start;
532 * total_sec += duration_time.count();
533 * if (i + 1 == rep) {
534 * x->copy_from(xx);
535 * }
536 * }
537 *
538 * return total_sec / rep;
539 * }
540 *
541 *
542 * } // namespace
543 *
544 *
545 * int main(int argc, char* argv[])
546 * {
547 * using HighPrecision = double;
548 * using RealValueType = gko::remove_complex<HighPrecision>;
549 * using LowPrecision = float;
550 * using IndexType = int;
551 * using hp_vec = gko::matrix::Dense<HighPrecision>;
552 * using lp_vec = gko::matrix::Dense<LowPrecision>;
556 *
557 * std::cout << gko::version_info::get() << std::endl;
558 *
559 * if (argc == 2 && (std::string(argv[1]) == "--help")) {
560 * std::cerr << "Usage: " << argv[0] << " [executor] " << std::endl;
561 * std::exit(-1);
562 * }
563 *
564 * const auto executor_string = argc >= 2 ? argv[1] : "reference";
565 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
566 * exec_map{
567 * {"omp", [] { return gko::OmpExecutor::create(); }},
568 * {"cuda",
569 * [] {
570 * return gko::CudaExecutor::create(0,
572 * }},
573 * {"hip",
574 * [] {
576 * }},
577 * {"dpcpp",
578 * [] {
581 * }},
582 * {"reference", [] { return gko::ReferenceExecutor::create(); }}};
583 *
584 * const auto exec = exec_map.at(executor_string)(); // throws if not valid
585 *
586 *
587 * auto hp_A = share(gko::read<hp_mtx>(std::ifstream("data/A.mtx"), exec));
588 * auto lp_A = share(gko::read<lp_mtx>(std::ifstream("data/A.mtx"), exec));
589 * auto A_dim = hp_A->get_size();
590 * auto b_dim = gko::dim<2>{A_dim[1], 1};
591 * auto x_dim = gko::dim<2>{A_dim[0], b_dim[1]};
592 * auto host_b = hp_vec::create(exec->get_master(), b_dim);
593 * std::default_random_engine rand_engine(32);
594 * auto dist = std::uniform_real_distribution<RealValueType>(0.0, 1.0);
595 * for (int i = 0; i < host_b->get_size()[0]; i++) {
596 * host_b->at(i, 0) = get_rand_value<HighPrecision>(dist, rand_engine);
597 * }
598 * auto hp_b = share(gko::clone(exec, host_b));
599 * auto lp_b = share(lp_vec::create(exec));
600 * lp_b->copy_from(hp_b);
601 *
602 * auto hp_x = share(hp_vec::create(exec, x_dim));
603 * auto lp_x = share(lp_vec::create(exec, x_dim));
604 * auto hplp_x = share(hp_x->clone());
605 * auto lplp_x = share(hp_x->clone());
606 * auto lphp_x = share(hp_x->clone());
607 *
608 *
609 * auto hp_sec = timing(exec, hp_A, hp_b, hp_x);
610 * auto lp_sec = timing(exec, lp_A, lp_b, lp_x);
611 * auto hplp_sec = timing(exec, hp_A, lp_b, hplp_x);
612 * auto lplp_sec = timing(exec, lp_A, lp_b, lplp_x);
613 * auto lphp_sec = timing(exec, lp_A, hp_b, lphp_x);
614 *
615 *
616 * auto neg_one = gko::initialize<hp_vec>({-1.0}, exec);
617 * auto hp_x_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
618 * auto lp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
619 * auto hplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
620 * auto lplp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
621 * auto lphp_diff_norm = gko::initialize<real_vec>({0.0}, exec->get_master());
622 * auto lp_diff = hp_x->clone();
623 * auto hplp_diff = hp_x->clone();
624 * auto lplp_diff = hp_x->clone();
625 * auto lphp_diff = hp_x->clone();
626 *
627 * hp_x->compute_norm2(hp_x_norm);
628 * lp_diff->add_scaled(neg_one, lp_x);
629 * lp_diff->compute_norm2(lp_diff_norm);
630 * hplp_diff->add_scaled(neg_one, hplp_x);
631 * hplp_diff->compute_norm2(hplp_diff_norm);
632 * lplp_diff->add_scaled(neg_one, lplp_x);
633 * lplp_diff->compute_norm2(lplp_diff_norm);
634 * lphp_diff->add_scaled(neg_one, lphp_x);
635 * lphp_diff->compute_norm2(lphp_diff_norm);
636 * exec->synchronize();
637 *
638 * std::cout.precision(10);
639 * std::cout << std::scientific;
640 * std::cout << "High Precision time(s): " << hp_sec << std::endl;
641 * std::cout << "High Precision result norm: " << hp_x_norm->at(0)
642 * << std::endl;
643 * std::cout << "Low Precision time(s): " << lp_sec << std::endl;
644 * std::cout << "Low Precision relative error: "
645 * << lp_diff_norm->at(0) / hp_x_norm->at(0) << "\n";
646 * std::cout << "Hp * Lp -> Hp time(s): " << hplp_sec << std::endl;
647 * std::cout << "Hp * Lp -> Hp relative error: "
648 * << hplp_diff_norm->at(0) / hp_x_norm->at(0) << "\n";
649 * std::cout << "Lp * Lp -> Hp time(s): " << lplp_sec << std::endl;
650 * std::cout << "Lp * Lp -> Hp relative error: "
651 * << lplp_diff_norm->at(0) / hp_x_norm->at(0) << "\n";
652 * std::cout << "Lp * Hp -> Hp time(s): " << lplp_sec << std::endl;
653 * std::cout << "Lp * Hp -> Hp relative error: "
654 * << lphp_diff_norm->at(0) / hp_x_norm->at(0) << "\n";
655 * }
656 * @endcode
657*/
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
Definition dense.hpp:116
Definition ell.hpp:63
static const version_info & get()
Definition version.hpp:139
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260
Definition dim.hpp:26