The adaptiveprecision-blockjacobi program#
|
Reference API
|
The adaptiveprecision-blockjacobi program
The preconditioned solver example..
This example depends on preconditioned-solver.
| Table of contents | |
|---|---|
This example shows how to use the adaptive precision block-Jacobi preconditioner.
In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. The preconditioned CG solver is enhanced with a block-Jacobi preconditioner that optimizes the storage format for the distinct inverted diagonal blocks to the numerical requirements. The example features the iteration count and runtime of the CG solver.
The commented program
const auto executor_string = argc >= 2 ? argv[1] : "reference";
Figure out where to run the code
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"cuda",
[] {
return gko::CudaExecutor::create(0,
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
return gko::DpcppExecutor::create(0,
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
executor where Ginkgo will perform the computation
const auto exec = exec_map.at(executor_string)(); // throws if not valid
Read data
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
Create RHS and initial guess as 1
gko::size_type size = A->get_size()[0];
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)
Definition dim.hpp:26
Calculate initial residual by overwriting b
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
copy b again
b->copy_from(host_x);
Create solver factory
const RealValueType reduction_factor = 1e-7;
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),
.with_reduction_factor(reduction_factor))
Definition residual_norm.hpp:113
Add preconditioner, these 2 lines are the only difference from the simple solver example
.with_preconditioner(
bj::build().with_max_block_size(16u).with_storage_optimization(
.on(exec);
static constexpr precision_reduction autodetect() noexcept
Definition types.hpp:313
Create solver
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver_gen->add_logger(logger);
auto solver = solver_gen->generate(A);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Definition convergence.hpp:73
Solve system
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
Get residual
auto res = gko::as<real_vec>(logger->get_residual_norm());
auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "Implicit residual norm squared (r^2):\n";
write(std::cout, impl_res);
Print solver statistics
std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
Results
This is the expected output:
Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
194.679
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
5.69384e-06
Implicit residual norm squared (r^2):
%%MatrixMarket matrix array real general
1 1
1.27043e-15
CG iteration count: 5
CG execution time [ms]: 0.080041
Comments about programming and debugging
The plain program
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <ginkgo/ginkgo.hpp>
int main(int argc, char* argv[])
{
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
using mtx = gko::matrix::Csr<ValueType, IndexType>;
using cg = gko::solver::Cg<ValueType>;
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"cuda",
[] {
return gko::CudaExecutor::create(0,
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
return gko::DpcppExecutor::create(0,
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
gko::size_type size = A->get_size()[0];
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
b->copy_from(host_x);
const RealValueType reduction_factor = 1e-7;
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),
.with_reduction_factor(reduction_factor))
.with_preconditioner(
bj::build().with_max_block_size(16u).with_storage_optimization(
.on(exec);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver_gen->add_logger(logger);
auto solver = solver_gen->generate(A);
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
auto res = gko::as<real_vec>(logger->get_residual_norm());
auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "Implicit residual norm squared (r^2):\n";
write(std::cout, impl_res);
std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
Definition csr.hpp:123
Definition jacobi.hpp:189
Definition cg.hpp:49
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Definition mtx_io.hpp:295
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260
Generated by