The iterative-refinement program#
|
Reference API
|
The iterative-refinement program
The iterative refinement solver example..
This example depends on simple-solver.
| Table of contents | |
|---|---|
This example shows how to use the iterative refinement solver.
In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. An inaccurate CG solver is used as the inner solver to an iterative refinement (IR) method which solves a linear system. The example features the iteration count and runtime of the IR solver.
The commented program
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"cuda",
[] {
return gko::CudaExecutor::create(0,
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
return gko::DpcppExecutor::create(0,
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
executor where Ginkgo will perform the computation
const auto exec = exec_map.at(executor_string)(); // throws if not valid
Read data
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
Create RHS and initial guess as 1
gko::size_type size = A->get_size()[0];
auto host_x = gko::matrix::Dense<ValueType>::create(exec->get_master(),
gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)
Definition dim.hpp:26
Calculate initial residual by overwriting b
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
copy b again
b->copy_from(host_x);
Create solver factory
gko::size_type max_iters = 10000u;
RealValueType outer_reduction_factor{1e-12};
RealValueType inner_reduction_factor{1e-2};
auto solver_gen =
ir::build()
.with_solver(cg::build().with_criteria(
.with_reduction_factor(inner_reduction_factor)))
.with_criteria(
gko::stop::Iteration::build().with_max_iters(max_iters),
.with_reduction_factor(outer_reduction_factor))
.on(exec);
Definition residual_norm.hpp:113
Create solver
auto solver = solver_gen->generate(A);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver->add_logger(logger);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Definition convergence.hpp:73
Solve system
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
Get residual
auto res = gko::as<real_vec>(logger->get_residual_norm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
Print solver statistics
std::cout << "IR iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "IR execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
Results
This is the expected output:
Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
194.679
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
4.23821e-11
IR iteration count: 24
IR execution time [ms]: 0.794962
Comments about programming and debugging
The plain program
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <ginkgo/ginkgo.hpp>
int main(int argc, char* argv[])
{
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
using mtx = gko::matrix::Csr<ValueType, IndexType>;
using cg = gko::solver::Cg<ValueType>;
using ir = gko::solver::Ir<ValueType>;
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"cuda",
[] {
return gko::CudaExecutor::create(0,
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
return gko::DpcppExecutor::create(0,
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
gko::size_type size = A->get_size()[0];
auto host_x = gko::matrix::Dense<ValueType>::create(exec->get_master(),
gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
b->copy_from(host_x);
gko::size_type max_iters = 10000u;
RealValueType outer_reduction_factor{1e-12};
RealValueType inner_reduction_factor{1e-2};
auto solver_gen =
ir::build()
.with_solver(cg::build().with_criteria(
.with_reduction_factor(inner_reduction_factor)))
.with_criteria(
gko::stop::Iteration::build().with_max_iters(max_iters),
.with_reduction_factor(outer_reduction_factor))
.on(exec);
auto solver = solver_gen->generate(A);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver->add_logger(logger);
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
auto res = gko::as<real_vec>(logger->get_residual_norm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "IR iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "IR execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
Definition csr.hpp:123
Definition cg.hpp:49
Definition ir.hpp:84
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Definition mtx_io.hpp:295
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260
Generated by