The par-ilu-convergence program

The par-ilu-convergence program#

Reference API: The par-ilu-convergence program
Reference API
The par-ilu-convergence program

The ParILU convergence example..

This example depends on simple-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

This example can be used to inspect the convergence behavior of parallel incomplete factorizations.

The commented program

auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
Definition exception.hpp:57
virtual const char * what() const noexcept override
Definition exception.hpp:74
Definition csr.hpp:123
void write(mat_data &data) const override
constexpr auto squared_norm(const T &x) -> decltype(real(conj(x) *x))
Definition math.hpp:913
Definition matrix_data.hpp:126
std::vector< nonzero_type > nonzeros
Definition matrix_data.hpp:453
void sort_row_major()
Definition matrix_data.hpp:458

print usage message

if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}

generate executor based on first argument

auto exec = try_generate([&] { return executors.at(argv[1])(); });

set matrix and preconditioner name with default values

std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);

load matrix file into Csr format

auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
Definition par_ic.hpp:68
Definition par_ict.hpp:68
Definition par_ilu.hpp:69
Definition par_ilut.hpp:71
constexpr T one()
Definition math.hpp:630
std::size_t size_type
Definition types.hpp:89
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224

Results

This is the expected output:

Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<max-iterations>] [<num-repetitions>] [fill-in-limit]

When specifying an executor:

Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;

Comments about programming and debugging

The plain program

#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <ginkgo/ginkgo.hpp>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
auto exec = try_generate([&] { return executors.at(argv[1])(); });
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396