622 * #include <iostream>
627 * #include <ginkgo/ginkgo.hpp>
629 *
constexpr double default_alpha = 10.0 / 3.0;
630 *
constexpr double default_beta = -2.0 / 3.0;
631 *
constexpr double default_gamma = -1.0 / 6.0;
639 *
template <
typename ValueType,
typename IndexType>
640 *
void generate_stencil_matrix(IndexType dp, IndexType* row_ptrs,
641 * IndexType* col_idxs, ValueType* values,
645 *
const size_t dp_2 = dp * dp;
647 *
for (IndexType k = 0; k < dp; ++k) {
648 *
for (IndexType i = 0; i < dp; ++i) {
649 *
const size_t index = i + k * dp;
650 *
for (IndexType j = -1; j <= 1; ++j) {
651 *
for (IndexType l = -1; l <= 1; ++l) {
652 *
const IndexType offset = l + 1 + 3 * (j + 1);
653 *
if ((k + j) >= 0 && (k + j) < dp && (i + l) >= 0 &&
655 * values[pos] = coefs[offset];
656 * col_idxs[pos] = index + l + dp * j;
661 * row_ptrs[index + 1] = pos;
667 *
template <
typename Closure,
typename ClosureT,
typename ValueType,
668 *
typename IndexType>
669 *
void generate_rhs(IndexType dp, Closure f, ClosureT u, ValueType* rhs,
672 *
const size_t dp_2 = dp * dp;
673 *
const ValueType h = 1.0 / (dp + 1.0);
674 *
for (IndexType i = 0; i < dp; ++i) {
675 *
const auto yi = ValueType(i + 1) * h;
676 *
for (IndexType j = 0; j < dp; ++j) {
677 *
const auto xi = ValueType(j + 1) * h;
678 *
const auto index = i * dp + j;
679 *
rhs[index] = -f(xi, yi) * h * h;
683 *
for (
size_t i = 0; i < dp; ++i) {
684 *
const auto xi = ValueType(i + 1) * h;
685 *
const auto index_top = i;
686 *
const auto index_bot = i + dp * (dp - 1);
688 *
rhs[index_top] -= u(xi - h, 0.0) * coefs[0];
689 *
rhs[index_top] -= u(xi, 0.0) * coefs[1];
690 *
rhs[index_top] -= u(xi + h, 0.0) * coefs[2];
692 *
rhs[index_bot] -= u(xi - h, 1.0) * coefs[6];
693 *
rhs[index_bot] -= u(xi, 1.0) * coefs[7];
694 *
rhs[index_bot] -= u(xi + h, 1.0) * coefs[8];
696 *
for (
size_t i = 0; i < dp; ++i) {
697 *
const auto yi = ValueType(i + 1) * h;
698 *
const auto index_left = i * dp;
699 *
const auto index_right = i * dp + (dp - 1);
701 *
rhs[index_left] -= u(0.0, yi - h) * coefs[0];
702 *
rhs[index_left] -= u(0.0, yi) * coefs[3];
703 *
rhs[index_left] -= u(0.0, yi + h) * coefs[6];
705 *
rhs[index_right] -= u(1.0, yi - h) * coefs[2];
706 *
rhs[index_right] -= u(1.0, yi) * coefs[5];
707 *
rhs[index_right] -= u(1.0, yi + h) * coefs[8];
710 *
rhs[0] += u(0.0, 0.0) * coefs[0];
711 *
rhs[(dp - 1)] += u(1.0, 0.0) * coefs[2];
712 *
rhs[(dp - 1) * dp] += u(0.0, 1.0) * coefs[6];
713 *
rhs[dp * dp - 1] += u(1.0, 1.0) * coefs[8];
717 *
template <
typename ValueType,
typename IndexType>
718 *
void print_solution(IndexType dp,
const ValueType* u)
720 *
for (IndexType i = 0; i < dp; ++i) {
721 *
for (IndexType j = 0; j < dp; ++j) {
722 * std::cout << u[i * dp + j] <<
' ';
726 * std::cout << std::endl;
730 *
template <
typename Closure,
typename ValueType,
typename IndexType>
734 *
const ValueType h = 1.0 / (dp + 1);
736 *
for (IndexType j = 0; j < dp; ++j) {
737 *
const auto xi = ValueType(j + 1) * h;
738 *
for (IndexType i = 0; i < dp; ++i) {
740 *
const auto yi = ValueType(i + 1) * h;
742 *
abs(u[i * dp + j] - correct_u(xi, yi)) /
abs(correct_u(xi, yi));
749 *
template <
typename ValueType,
typename IndexType>
750 *
void solve_system(
const std::string& executor_string,
751 *
unsigned int discretization_points, IndexType* row_ptrs,
752 * IndexType* col_idxs, ValueType* values, ValueType* rhs,
761 *
const auto& dp = discretization_points;
764 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
781 * {
"reference", [] {
return gko::ReferenceExecutor::create(); }}};
783 *
const auto exec = exec_map.at(executor_string)();
784 *
const auto app_exec = exec->get_master();
787 *
auto matrix = mtx::create(
789 * val_array::view(app_exec, (3 * dp - 2) * (3 * dp - 2), values),
790 * idx_array::view(app_exec, (3 * dp - 2) * (3 * dp - 2), col_idxs),
791 * idx_array::view(app_exec, dp_2 + 1, row_ptrs));
794 * val_array::view(app_exec, dp_2, rhs), 1);
797 * val_array::view(app_exec, dp_2, u), 1);
801 * .with_criteria(gko::stop::Iteration::build().with_max_iters(dp_2),
803 * .with_reduction_factor(reduction_factor))
804 * .with_preconditioner(bj::build())
812 *
int main(
int argc,
char* argv[])
814 *
using ValueType = double;
815 *
using IndexType = int;
819 *
if (argc == 2 && std::string(argv[1]) ==
"--help") {
821 * <<
"Usage: " << argv[0]
822 * <<
" [executor] [DISCRETIZATION_POINTS] [alpha] [beta] [gamma]"
827 *
const auto executor_string = argc >= 2 ? argv[1] :
"reference";
828 *
const IndexType discretization_points =
829 * argc >= 3 ? std::atoi(argv[2]) : 100;
830 *
const ValueType alpha_c = argc >= 4 ? std::atof(argv[3]) : default_alpha;
831 *
const ValueType beta_c = argc >= 5 ? std::atof(argv[4]) : default_beta;
832 *
const ValueType gamma_c = argc >= 6 ? std::atof(argv[5]) : default_gamma;
834 * std::array<ValueType, 9> coefs{
835 * gamma_c, beta_c, gamma_c,
836 * beta_c, alpha_c, beta_c,
837 * gamma_c, beta_c, gamma_c};
839 *
const auto dp = discretization_points;
840 *
const size_t dp_2 = dp * dp;
842 *
auto correct_u = [](ValueType x, ValueType y) {
843 *
return x * x * x + y * y * y;
845 *
auto f = [](ValueType x, ValueType y) {
846 *
return ValueType(6) * x + ValueType(6) * y;
849 * std::vector<IndexType> row_ptrs(dp_2 + 1);
850 * std::vector<IndexType> col_idxs((3 * dp - 2) * (3 * dp - 2));
851 * std::vector<ValueType> values((3 * dp - 2) * (3 * dp - 2));
852 * std::vector<ValueType>
rhs(dp_2);
853 * std::vector<ValueType> u(dp_2, 0.0);
855 * generate_stencil_matrix(dp, row_ptrs.data(), col_idxs.data(), values.data(),
857 * generate_rhs(dp, f, correct_u,
rhs.data(), coefs.data());
861 *
auto start_time = std::chrono::steady_clock::now();
862 * solve_system(executor_string, dp, row_ptrs.data(), col_idxs.data(),
863 * values.data(),
rhs.data(), u.data(), reduction_factor);
864 *
auto stop_time = std::chrono::steady_clock::now();
865 *
auto runtime_duration =
866 *
static_cast<double>(
867 * std::chrono::duration_cast<std::chrono::nanoseconds>(stop_time -
872 * std::cout <<
"The average relative error is "
873 * << calculate_error(dp, u.data(), correct_u) /
876 * std::cout <<
"The runtime is " << std::to_string(runtime_duration) <<
" ms"
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)
Definition jacobi.hpp:189
Definition residual_norm.hpp:113
static const version_info & get()
Definition version.hpp:139
constexpr std::enable_if_t<!is_complex_s< T >::value, T > abs(const T &x)
Definition math.hpp:931
std::size_t size_type
Definition types.hpp:89
std::remove_reference< OwningPointer >::type && give(OwningPointer &&p)
Definition utils_helper.hpp:247
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260