/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/nine-pt-stencil-solver.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/nine-pt-stencil-solver.hpp Source File#

Reference API: /home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/nine-pt-stencil-solver.hpp Source File
Reference API
nine-pt-stencil-solver.hpp
1
619 *
620 * #include <array>
621 * #include <chrono>
622 * #include <iostream>
623 * #include <map>
624 * #include <string>
625 * #include <vector>
626 *
627 * #include <ginkgo/ginkgo.hpp>
628 *
629 * constexpr double default_alpha = 10.0 / 3.0;
630 * constexpr double default_beta = -2.0 / 3.0;
631 * constexpr double default_gamma = -1.0 / 6.0;
632 *
633 * /* Possible alternative default values are
634 * * default_alpha = 8.0;
635 * * default_beta = -1.0;
636 * * default_gamma = -1.0;
637 * */
638 *
639 * template <typename ValueType, typename IndexType>
640 * void generate_stencil_matrix(IndexType dp, IndexType* row_ptrs,
641 * IndexType* col_idxs, ValueType* values,
642 * ValueType* coefs)
643 * {
644 * IndexType pos = 0;
645 * const size_t dp_2 = dp * dp;
646 * row_ptrs[0] = pos;
647 * for (IndexType k = 0; k < dp; ++k) {
648 * for (IndexType i = 0; i < dp; ++i) {
649 * const size_t index = i + k * dp;
650 * for (IndexType j = -1; j <= 1; ++j) {
651 * for (IndexType l = -1; l <= 1; ++l) {
652 * const IndexType offset = l + 1 + 3 * (j + 1);
653 * if ((k + j) >= 0 && (k + j) < dp && (i + l) >= 0 &&
654 * (i + l) < dp) {
655 * values[pos] = coefs[offset];
656 * col_idxs[pos] = index + l + dp * j;
657 * ++pos;
658 * }
659 * }
660 * }
661 * row_ptrs[index + 1] = pos;
662 * }
663 * }
664 * }
665 *
666 *
667 * template <typename Closure, typename ClosureT, typename ValueType,
668 * typename IndexType>
669 * void generate_rhs(IndexType dp, Closure f, ClosureT u, ValueType* rhs,
670 * ValueType* coefs)
671 * {
672 * const size_t dp_2 = dp * dp;
673 * const ValueType h = 1.0 / (dp + 1.0);
674 * for (IndexType i = 0; i < dp; ++i) {
675 * const auto yi = ValueType(i + 1) * h;
676 * for (IndexType j = 0; j < dp; ++j) {
677 * const auto xi = ValueType(j + 1) * h;
678 * const auto index = i * dp + j;
679 * rhs[index] = -f(xi, yi) * h * h;
680 * }
681 * }
682 *
683 * for (size_t i = 0; i < dp; ++i) {
684 * const auto xi = ValueType(i + 1) * h;
685 * const auto index_top = i;
686 * const auto index_bot = i + dp * (dp - 1);
687 *
688 * rhs[index_top] -= u(xi - h, 0.0) * coefs[0];
689 * rhs[index_top] -= u(xi, 0.0) * coefs[1];
690 * rhs[index_top] -= u(xi + h, 0.0) * coefs[2];
691 *
692 * rhs[index_bot] -= u(xi - h, 1.0) * coefs[6];
693 * rhs[index_bot] -= u(xi, 1.0) * coefs[7];
694 * rhs[index_bot] -= u(xi + h, 1.0) * coefs[8];
695 * }
696 * for (size_t i = 0; i < dp; ++i) {
697 * const auto yi = ValueType(i + 1) * h;
698 * const auto index_left = i * dp;
699 * const auto index_right = i * dp + (dp - 1);
700 *
701 * rhs[index_left] -= u(0.0, yi - h) * coefs[0];
702 * rhs[index_left] -= u(0.0, yi) * coefs[3];
703 * rhs[index_left] -= u(0.0, yi + h) * coefs[6];
704 *
705 * rhs[index_right] -= u(1.0, yi - h) * coefs[2];
706 * rhs[index_right] -= u(1.0, yi) * coefs[5];
707 * rhs[index_right] -= u(1.0, yi + h) * coefs[8];
708 * }
709 *
710 * rhs[0] += u(0.0, 0.0) * coefs[0];
711 * rhs[(dp - 1)] += u(1.0, 0.0) * coefs[2];
712 * rhs[(dp - 1) * dp] += u(0.0, 1.0) * coefs[6];
713 * rhs[dp * dp - 1] += u(1.0, 1.0) * coefs[8];
714 * }
715 *
716 *
717 * template <typename ValueType, typename IndexType>
718 * void print_solution(IndexType dp, const ValueType* u)
719 * {
720 * for (IndexType i = 0; i < dp; ++i) {
721 * for (IndexType j = 0; j < dp; ++j) {
722 * std::cout << u[i * dp + j] << ' ';
723 * }
724 * std::cout << '\n';
725 * }
726 * std::cout << std::endl;
727 * }
728 *
729 *
730 * template <typename Closure, typename ValueType, typename IndexType>
731 * gko::remove_complex<ValueType> calculate_error(IndexType dp, const ValueType* u,
732 * Closure correct_u)
733 * {
734 * const ValueType h = 1.0 / (dp + 1);
735 * gko::remove_complex<ValueType> error = 0.0;
736 * for (IndexType j = 0; j < dp; ++j) {
737 * const auto xi = ValueType(j + 1) * h;
738 * for (IndexType i = 0; i < dp; ++i) {
739 * using std::abs;
740 * const auto yi = ValueType(i + 1) * h;
741 * error +=
742 * abs(u[i * dp + j] - correct_u(xi, yi)) / abs(correct_u(xi, yi));
743 * }
744 * }
745 * return error;
746 * }
747 *
748 *
749 * template <typename ValueType, typename IndexType>
750 * void solve_system(const std::string& executor_string,
751 * unsigned int discretization_points, IndexType* row_ptrs,
752 * IndexType* col_idxs, ValueType* values, ValueType* rhs,
753 * ValueType* u, gko::remove_complex<ValueType> reduction_factor)
754 * {
757 * using cg = gko::solver::Cg<ValueType>;
759 * using val_array = gko::array<ValueType>;
760 * using idx_array = gko::array<IndexType>;
761 * const auto& dp = discretization_points;
762 * const gko::size_type dp_2 = dp * dp;
763 *
764 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
765 * exec_map{
766 * {"omp", [] { return gko::OmpExecutor::create(); }},
767 * {"cuda",
768 * [] {
769 * return gko::CudaExecutor::create(0,
771 * }},
772 * {"hip",
773 * [] {
775 * }},
776 * {"dpcpp",
777 * [] {
780 * }},
781 * {"reference", [] { return gko::ReferenceExecutor::create(); }}};
782 *
783 * const auto exec = exec_map.at(executor_string)(); // throws if not valid
784 * const auto app_exec = exec->get_master();
785 *
786 *
787 * auto matrix = mtx::create(
788 * exec, gko::dim<2>(dp_2),
789 * val_array::view(app_exec, (3 * dp - 2) * (3 * dp - 2), values),
790 * idx_array::view(app_exec, (3 * dp - 2) * (3 * dp - 2), col_idxs),
791 * idx_array::view(app_exec, dp_2 + 1, row_ptrs));
792 *
793 * auto b = vec::create(exec, gko::dim<2>(dp_2, 1),
794 * val_array::view(app_exec, dp_2, rhs), 1);
795 *
796 * auto x = vec::create(app_exec, gko::dim<2>(dp_2, 1),
797 * val_array::view(app_exec, dp_2, u), 1);
798 *
799 * auto solver_gen =
800 * cg::build()
801 * .with_criteria(gko::stop::Iteration::build().with_max_iters(dp_2),
803 * .with_reduction_factor(reduction_factor))
804 * .with_preconditioner(bj::build())
805 * .on(exec);
806 * auto solver = solver_gen->generate(gko::give(matrix));
807 *
808 * solver->apply(b, x);
809 * }
810 *
811 *
812 * int main(int argc, char* argv[])
813 * {
814 * using ValueType = double;
815 * using IndexType = int;
816 *
817 * std::cout << gko::version_info::get() << std::endl;
818 *
819 * if (argc == 2 && std::string(argv[1]) == "--help") {
820 * std::cerr
821 * << "Usage: " << argv[0]
822 * << " [executor] [DISCRETIZATION_POINTS] [alpha] [beta] [gamma]"
823 * << std::endl;
824 * std::exit(-1);
825 * }
826 *
827 * const auto executor_string = argc >= 2 ? argv[1] : "reference";
828 * const IndexType discretization_points =
829 * argc >= 3 ? std::atoi(argv[2]) : 100;
830 * const ValueType alpha_c = argc >= 4 ? std::atof(argv[3]) : default_alpha;
831 * const ValueType beta_c = argc >= 5 ? std::atof(argv[4]) : default_beta;
832 * const ValueType gamma_c = argc >= 6 ? std::atof(argv[5]) : default_gamma;
833 *
834 * std::array<ValueType, 9> coefs{
835 * gamma_c, beta_c, gamma_c,
836 * beta_c, alpha_c, beta_c,
837 * gamma_c, beta_c, gamma_c};
838 *
839 * const auto dp = discretization_points;
840 * const size_t dp_2 = dp * dp;
841 *
842 * auto correct_u = [](ValueType x, ValueType y) {
843 * return x * x * x + y * y * y;
844 * };
845 * auto f = [](ValueType x, ValueType y) {
846 * return ValueType(6) * x + ValueType(6) * y;
847 * };
848 *
849 * std::vector<IndexType> row_ptrs(dp_2 + 1);
850 * std::vector<IndexType> col_idxs((3 * dp - 2) * (3 * dp - 2));
851 * std::vector<ValueType> values((3 * dp - 2) * (3 * dp - 2));
852 * std::vector<ValueType> rhs(dp_2);
853 * std::vector<ValueType> u(dp_2, 0.0);
854 *
855 * generate_stencil_matrix(dp, row_ptrs.data(), col_idxs.data(), values.data(),
856 * coefs.data());
857 * generate_rhs(dp, f, correct_u, rhs.data(), coefs.data());
858 *
859 * const gko::remove_complex<ValueType> reduction_factor = 1e-7;
860 *
861 * auto start_time = std::chrono::steady_clock::now();
862 * solve_system(executor_string, dp, row_ptrs.data(), col_idxs.data(),
863 * values.data(), rhs.data(), u.data(), reduction_factor);
864 * auto stop_time = std::chrono::steady_clock::now();
865 * auto runtime_duration =
866 * static_cast<double>(
867 * std::chrono::duration_cast<std::chrono::nanoseconds>(stop_time -
868 * start_time)
869 * .count()) *
870 * 1e-6;
871 *
872 * std::cout << "The average relative error is "
873 * << calculate_error(dp, u.data(), correct_u) /
874 * static_cast<gko::remove_complex<ValueType>>(dp_2)
875 * << std::endl;
876 * std::cout << "The runtime is " << std::to_string(runtime_duration) << " ms"
877 * << std::endl;
878 * }
879 * @endcode
880*/
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
Definition array.hpp:166
Definition csr.hpp:123
static std::unique_ptr< Dense > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type stride=0)
Definition jacobi.hpp:189
Definition cg.hpp:49
Definition residual_norm.hpp:113
static const version_info & get()
Definition version.hpp:139
constexpr std::enable_if_t<!is_complex_s< T >::value, T > abs(const T &x)
Definition math.hpp:931
std::size_t size_type
Definition types.hpp:89
std::remove_reference< OwningPointer >::type && give(OwningPointer &&p)
Definition utils_helper.hpp:247
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260
Definition dim.hpp:26