The iterative-refinement program

The iterative-refinement program#

Reference API: The iterative-refinement program
Reference API
The iterative-refinement program

The iterative refinement solver example..

This example depends on simple-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

This example shows how to use the iterative refinement solver.

In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. An inaccurate CG solver is used as the inner solver to an iterative refinement (IR) method which solves a linear system. The example features the iteration count and runtime of the IR solver.

The commented program

}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)(); // throws if not valid

Read data

auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));

Create RHS and initial guess as 1

gko::size_type size = A->get_size()[0];
auto host_x = gko::matrix::Dense<ValueType>::create(exec->get_master(),
gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)
std::size_t size_type
Definition types.hpp:89
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
Definition dim.hpp:26

Calculate initial residual by overwriting b

auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);

copy b again

b->copy_from(host_x);

Create solver factory

gko::size_type max_iters = 10000u;
RealValueType outer_reduction_factor{1e-12};
RealValueType inner_reduction_factor{1e-2};
auto solver_gen =
ir::build()
.with_solver(cg::build().with_criteria(
.with_reduction_factor(inner_reduction_factor)))
.with_criteria(
gko::stop::Iteration::build().with_max_iters(max_iters),
.with_reduction_factor(outer_reduction_factor))
.on(exec);
Definition residual_norm.hpp:113

Create solver

auto solver = solver_gen->generate(A);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver->add_logger(logger);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Definition convergence.hpp:73

Solve system

exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);

Get residual

auto res = gko::as<real_vec>(logger->get_residual_norm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);

Print solver statistics

std::cout << "IR iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "IR execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
194.679
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
4.23821e-11
IR iteration count: 24
IR execution time [ms]: 0.794962

Comments about programming and debugging

The plain program

#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <ginkgo/ginkgo.hpp>
int main(int argc, char* argv[])
{
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
gko::size_type size = A->get_size()[0];
auto host_x = gko::matrix::Dense<ValueType>::create(exec->get_master(),
gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
b->copy_from(host_x);
gko::size_type max_iters = 10000u;
RealValueType outer_reduction_factor{1e-12};
RealValueType inner_reduction_factor{1e-2};
auto solver_gen =
ir::build()
.with_solver(cg::build().with_criteria(
.with_reduction_factor(inner_reduction_factor)))
.with_criteria(
gko::stop::Iteration::build().with_max_iters(max_iters),
.with_reduction_factor(outer_reduction_factor))
.on(exec);
auto solver = solver_gen->generate(A);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver->add_logger(logger);
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
auto res = gko::as<real_vec>(logger->get_residual_norm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "IR iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "IR execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
Definition csr.hpp:123
Definition cg.hpp:49
Definition ir.hpp:84
static const version_info & get()
Definition version.hpp:139
constexpr T one()
Definition math.hpp:630
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Definition mtx_io.hpp:295
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260