doxygen/html/batched-solver_8hpp_source.html

 *    ApplSysData appl_generate_system(const int nrows, const size_type nsystems,

 *                                     std::shared_ptr<gko::Executor> exec);

 *

 *    void appl_clean_up(ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec);

 *

 *

 *    int main(int argc, char* argv[])

 *    {

 *        std::cout << gko::version_info::get() << std::endl;

 *

 *        if (argc == 2 && (std::string(argv[1]) == "--help")) {

 *            std::cerr << "Usage: " << argv[0]

 *                      << " [executor] [num_systems] [num_rows] [print_residuals] "

 *                         "[num_reps]"

 *                      << std::endl;

 *            std::exit(-1);

 *        }

 *

 *        const auto executor_string = argc >= 2 ? argv[1] : "reference";

 *        std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>

 *            exec_map{

 *                {"omp", [] { return gko::OmpExecutor::create(); }},

 *                {"cuda",

 *                 [] {

 *                     return gko::CudaExecutor::create(0,

 *                                                      gko::OmpExecutor::create());

 *                 }},

 *                {"hip",

 *                 [] {

 *                     return gko::HipExecutor::create(0, gko::OmpExecutor::create());

 *                 }},

 *                {"dpcpp",

 *                 [] {

 *                     return gko::DpcppExecutor::create(0,

 *                                                       gko::OmpExecutor::create());

 *                 }},

 *                {"reference", [] { return gko::ReferenceExecutor::create(); }}};

 *

 *        const auto exec = exec_map.at(executor_string)();  // throws if not valid

 *

 *        const size_type num_systems = argc >= 3 ? std::atoi(argv[2]) : 2;

 *        const int num_rows = argc >= 4 ? std::atoi(argv[3]) : 32;  // per system

 *        const bool print_residuals =

 *            argc >= 5 ? (std::string(argv[4]) == "true") : false;

 *        const int num_reps = argc >= 6 ? std::atoi(argv[5]) : 20;

 *        auto appl_sys = appl_generate_system(num_rows, num_systems, exec);

 *        auto batch_mat_size =

 *            gko::batch_dim<2>(num_systems, gko::dim<2>(num_rows, num_rows));

 *        auto batch_vec_size =

 *            gko::batch_dim<2>(num_systems, gko::dim<2>(num_rows, 1));

 *        auto vals_view = gko::array<value_type>::const_view(

 *            exec, num_systems * appl_sys.nnz, appl_sys.all_values);

 *        auto rowptrs_view = gko::array<index_type>::const_view(exec, num_rows + 1,

 *                                                               appl_sys.row_ptrs);

 *        auto colidxs_view = gko::array<index_type>::const_view(exec, appl_sys.nnz,

 *                                                               appl_sys.col_idxs);

 *        auto A = gko::share(mtx_type::create_const(

 *            exec, batch_mat_size, std::move(vals_view), std::move(colidxs_view),

 *            std::move(rowptrs_view)));

 *        auto b_view = gko::array<value_type>::const_view(

 *            exec, num_systems * num_rows, appl_sys.all_rhs);

 *        auto b = vec_type::create_const(exec, batch_vec_size, std::move(b_view));

 *        auto x = vec_type::create(exec);

 *        auto host_x = vec_type::create(exec->get_master(), batch_vec_size);

 *        for (size_type isys = 0; isys < num_systems; isys++) {

 *            for (int irow = 0; irow < num_rows; irow++) {

 *                host_x->at(isys, irow, 0) = gko::zero<value_type>();

 *            }

 *        }

 *        x->copy_from(host_x.get());

 *

 *        const real_type reduction_factor{1e-10};

 *        auto solver =

 *            bicgstab::build()

 *                .with_max_iterations(500)

 *                .with_tolerance(reduction_factor)

 *                .with_tolerance_type(gko::batch::stop::tolerance_type::relative)

 *                .on(exec)

 *                ->generate(A);

 *

 *        std::shared_ptr<const gko::batch::log::BatchConvergence<value_type>>

 *            logger = gko::batch::log::BatchConvergence<value_type>::create();

 *

 *        solver->add_logger(logger);

 *        auto x_clone = gko::clone(x);

 *

 *        for (int i = 0; i < 3; ++i) {

 *            x_clone->copy_from(x.get());

 *            solver->apply(b, x_clone);

 *        }

 *

 *        double apply_time = 0.0;

 *        for (int i = 0; i < num_reps; ++i) {

 *            x_clone->copy_from(x.get());

 *            exec->synchronize();

 *            std::chrono::steady_clock::time_point t1 =

 *                std::chrono::steady_clock::now();

 *            solver->apply(b, x_clone);

 *            exec->synchronize();

 *            std::chrono::steady_clock::time_point t2 =

 *                std::chrono::steady_clock::now();

 *            auto time_span =

 *                std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);

 *            apply_time += time_span.count();

 *        }

 *        x->copy_from(x_clone.get());

 *        solver->remove_logger(logger.get());

 *

 *        auto norm_dim = gko::batch_dim<2>(num_systems, gko::dim<2>(1, 1));

 *        auto host_b_norm = real_vec_type::create(exec->get_master(), norm_dim);

 *        host_b_norm->fill(0.0);

 *

 *        b->compute_norm2(host_b_norm);

 *        auto one = vec_type::create(exec, norm_dim);

 *        one->fill(1.0);

 *        auto neg_one = vec_type::create(exec, norm_dim);

 *        neg_one->fill(-1.0);

 *        auto res = vec_type::create(exec, batch_vec_size);

 *        res->copy_from(b);

 *        A->apply(one, x, neg_one, res);

 *        auto host_res_norm = real_vec_type::create(exec->get_master(), norm_dim);

 *        host_res_norm->fill(0.0);

 *        res->compute_norm2(host_res_norm);

 *        auto host_log_resid = gko::make_temporary_clone(

 *            exec->get_master(), &logger->get_residual_norm());

 *        auto host_log_iters = gko::make_temporary_clone(

 *            exec->get_master(), &logger->get_num_iterations());

 *

 *        if (print_residuals) {

 *            std::cout << "Residual norm sqrt(r^T r):\n";

 *            auto unb_res = detail::unbatch(host_res_norm.get());

 *            auto unb_bnorm = detail::unbatch(host_b_norm.get());

 *            for (size_type i = 0; i < num_systems; ++i) {

 *                std::cout << " System no. " << i

 *                          << ": residual norm = " << unb_res[i]->at(0, 0)

 *                          << ", implicit residual norm = "

 *                          << host_log_resid->get_const_data()[i]

 *                          << ", iterations = "

 *                          << host_log_iters->get_const_data()[i] << std::endl;

 *                const real_type relresnorm =

 *                    unb_res[i]->at(0, 0) / unb_bnorm[i]->at(0, 0);

 *                if (!(relresnorm <= reduction_factor)) {

 *                    std::cout << "System " << i << " converged only to "

 *                              << relresnorm << " relative residual." << std::endl;

 *                }

 *            }

 *        }

 *        std::cout << "Solver type: "

 *                  << "batch::bicgstab"

 *                  << "\nMatrix size: " << A->get_common_size()

 *                  << "\nNum batch entries: " << A->get_num_batch_items()

 *                  << "\nEntire solve took: " << apply_time / num_reps << " seconds."

 *                  << std::endl;

 *

 *        appl_clean_up(appl_sys, exec);

 *        return 0;

 *    }

 *

 *

 *    ApplSysData appl_generate_system(const int nrows, const size_type nsystems,

 *                                     std::shared_ptr<gko::Executor> exec)

 *    {

 *        const int nnz = nrows * 3 - 2;

 *        std::default_random_engine rgen(15);

 *        std::normal_distribution<real_type> distb(0.5, 0.1);

 *        std::vector<real_type> spacings(nsystems * nrows);

 *        std::generate(spacings.begin(), spacings.end(),

 *                      [&]() { return distb(rgen); });

 *

 *        std::vector<value_type> allvalues(nnz * nsystems);

 *        for (size_type isys = 0; isys < nsystems; isys++) {

 *            allvalues.at(isys * nnz) = 2.0 / spacings.at(isys * nrows);

 *            allvalues.at(isys * nnz + 1) = -1.0;

 *            for (int irow = 0; irow < nrows - 2; irow++) {

 *                allvalues.at(isys * nnz + 2 + irow * 3) = -1.0;

 *                allvalues.at(isys * nnz + 2 + irow * 3 + 1) =

 *                    2.0 / spacings.at(isys * nrows + irow + 1);

 *                allvalues.at(isys * nnz + 2 + irow * 3 + 2) = -1.0;

 *            }

 *            allvalues.at(isys * nnz + 2 + (nrows - 2) * 3) = -1.0;

 *            allvalues.at(isys * nnz + 2 + (nrows - 2) * 3 + 1) =

 *                2.0 / spacings.at((isys + 1) * nrows - 1);

 *            assert(isys * nnz + 2 + (nrows - 2) * 3 + 2 == (isys + 1) * nnz);

 *        }

 *

 *        std::vector<index_type> rowptrs(nrows + 1);

 *        rowptrs.at(0) = 0;

 *        rowptrs.at(1) = 2;

 *        for (int i = 2; i < nrows; i++) {

 *            rowptrs.at(i) = rowptrs.at(i - 1) + 3;

 *        }

 *        rowptrs.at(nrows) = rowptrs.at(nrows - 1) + 2;

 *        assert(rowptrs.at(nrows) == nnz);

 *

 *        std::vector<index_type> colidxs(nnz);

 *        colidxs.at(0) = 0;

 *        colidxs.at(1) = 1;

 *        const int nnz_per_row = 3;

 *        for (int irow = 1; irow < nrows - 1; irow++) {

 *            colidxs.at(2 + (irow - 1) * nnz_per_row) = irow - 1;

 *            colidxs.at(2 + (irow - 1) * nnz_per_row + 1) = irow;

 *            colidxs.at(2 + (irow - 1) * nnz_per_row + 2) = irow + 1;

 *        }

 *        colidxs.at(2 + (nrows - 2) * nnz_per_row) = nrows - 2;

 *        colidxs.at(2 + (nrows - 2) * nnz_per_row + 1) = nrows - 1;

 *        assert(2 + (nrows - 2) * nnz_per_row + 1 == nnz - 1);

 *

 *        std::vector<value_type> allb(nrows * nsystems);

 *        for (size_type isys = 0; isys < nsystems; isys++) {

 *            const value_type bval = distb(rgen);

 *            std::fill(allb.begin() + isys * nrows,

 *                      allb.begin() + (isys + 1) * nrows, bval);

 *        }

 *

 *        index_type* const row_ptrs = exec->alloc<index_type>(nrows + 1);

 *        exec->copy_from(exec->get_master().get(), static_cast<size_type>(nrows + 1),

 *                        rowptrs.data(), row_ptrs);

 *        index_type* const col_idxs = exec->alloc<index_type>(nnz);

 *        exec->copy_from(exec->get_master().get(), static_cast<size_type>(nnz),

 *                        colidxs.data(), col_idxs);

 *        value_type* const all_values = exec->alloc<value_type>(nsystems * nnz);

 *        exec->copy_from(exec->get_master().get(), nsystems * nnz, allvalues.data(),

 *                        all_values);

 *        value_type* const all_b = exec->alloc<value_type>(nsystems * nrows);

 *        exec->copy_from(exec->get_master().get(), nsystems * nrows, allb.data(),

 *                        all_b);

 *        return {nsystems, nrows, nnz, row_ptrs, col_idxs, all_values, all_b};

 *    }

 *

 *    void appl_clean_up(ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec)

 *    {

 *        exec->free(const_cast<index_type*>(appl_data.row_ptrs));

 *        exec->free(const_cast<index_type*>(appl_data.col_idxs));

 *        exec->free(const_cast<value_type*>(appl_data.all_values));

 *        exec->free(const_cast<value_type*>(appl_data.all_rhs));

 *    }

 * @endcode

*/

gko::CudaExecutor::create
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)

gko::DpcppExecutor::create
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)

gko::HipExecutor::create
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)

gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396

gko::array::const_view
static detail::const_array_view< ValueType > const_view(std::shared_ptr< const Executor > exec, size_type size, const value_type *data)
Definition array.hpp:384

gko::batch::MultiVector::create
static std::unique_ptr< MultiVector > create(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &size=batch_dim< 2 >{})

gko::batch::MultiVector::create_const
static std::unique_ptr< const MultiVector > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< ValueType > &&values)

gko::batch::log::BatchConvergence::create
static std::unique_ptr< BatchConvergence > create(const mask_type &enabled_events=gko::log::Logger::batch_solver_completed_mask)
Definition batch_logger.hpp:92

gko::batch::matrix::Csr::create_const
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< value_type > &&values, gko::detail::const_array_view< index_type > &&col_idxs, gko::detail::const_array_view< index_type > &&row_ptrs)

gko::version_info::get
static const version_info & get()
Definition version.hpp:139

gko::log::profile_event_category::solver
@ solver

gko::one
constexpr T one()
Definition math.hpp:630

gko::clone
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173

gko::make_temporary_clone
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Definition temporary_clone.hpp:208

gko::share
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224

ApplSysData
Definition batched-solver.cpp:42

gko::batch_dim
Definition batch_dim.hpp:27

gko::dim
Definition dim.hpp:26