/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/batched-solver.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/batched-solver.hpp Source File#

Reference API: /home/docs/checkouts/readthedocs.org/user_builds/ginkgo-test/checkouts/latest/build/doc/doxygen/examples/batched-solver.hpp Source File
Reference API
batched-solver.hpp
1
703 * ApplSysData appl_generate_system(const int nrows, const size_type nsystems,
704 * std::shared_ptr<gko::Executor> exec);
705 *
706 * void appl_clean_up(ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec);
707 *
708 *
709 * int main(int argc, char* argv[])
710 * {
711 * std::cout << gko::version_info::get() << std::endl;
712 *
713 * if (argc == 2 && (std::string(argv[1]) == "--help")) {
714 * std::cerr << "Usage: " << argv[0]
715 * << " [executor] [num_systems] [num_rows] [print_residuals] "
716 * "[num_reps]"
717 * << std::endl;
718 * std::exit(-1);
719 * }
720 *
721 * const auto executor_string = argc >= 2 ? argv[1] : "reference";
722 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
723 * exec_map{
724 * {"omp", [] { return gko::OmpExecutor::create(); }},
725 * {"cuda",
726 * [] {
727 * return gko::CudaExecutor::create(0,
729 * }},
730 * {"hip",
731 * [] {
733 * }},
734 * {"dpcpp",
735 * [] {
738 * }},
739 * {"reference", [] { return gko::ReferenceExecutor::create(); }}};
740 *
741 * const auto exec = exec_map.at(executor_string)(); // throws if not valid
742 *
743 * const size_type num_systems = argc >= 3 ? std::atoi(argv[2]) : 2;
744 * const int num_rows = argc >= 4 ? std::atoi(argv[3]) : 32; // per system
745 * const bool print_residuals =
746 * argc >= 5 ? (std::string(argv[4]) == "true") : false;
747 * const int num_reps = argc >= 6 ? std::atoi(argv[5]) : 20;
748 * auto appl_sys = appl_generate_system(num_rows, num_systems, exec);
749 * auto batch_mat_size =
750 * gko::batch_dim<2>(num_systems, gko::dim<2>(num_rows, num_rows));
751 * auto batch_vec_size =
752 * gko::batch_dim<2>(num_systems, gko::dim<2>(num_rows, 1));
753 * auto vals_view = gko::array<value_type>::const_view(
754 * exec, num_systems * appl_sys.nnz, appl_sys.all_values);
755 * auto rowptrs_view = gko::array<index_type>::const_view(exec, num_rows + 1,
756 * appl_sys.row_ptrs);
757 * auto colidxs_view = gko::array<index_type>::const_view(exec, appl_sys.nnz,
758 * appl_sys.col_idxs);
760 * exec, batch_mat_size, std::move(vals_view), std::move(colidxs_view),
761 * std::move(rowptrs_view)));
763 * exec, num_systems * num_rows, appl_sys.all_rhs);
764 * auto b = vec_type::create_const(exec, batch_vec_size, std::move(b_view));
765 * auto x = vec_type::create(exec);
766 * auto host_x = vec_type::create(exec->get_master(), batch_vec_size);
767 * for (size_type isys = 0; isys < num_systems; isys++) {
768 * for (int irow = 0; irow < num_rows; irow++) {
769 * host_x->at(isys, irow, 0) = gko::zero<value_type>();
770 * }
771 * }
772 * x->copy_from(host_x.get());
773 *
774 * const real_type reduction_factor{1e-10};
775 * auto solver =
776 * bicgstab::build()
777 * .with_max_iterations(500)
778 * .with_tolerance(reduction_factor)
779 * .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
780 * .on(exec)
781 * ->generate(A);
782 *
783 * std::shared_ptr<const gko::batch::log::BatchConvergence<value_type>>
785 *
786 * solver->add_logger(logger);
787 * auto x_clone = gko::clone(x);
788 *
789 * for (int i = 0; i < 3; ++i) {
790 * x_clone->copy_from(x.get());
791 * solver->apply(b, x_clone);
792 * }
793 *
794 * double apply_time = 0.0;
795 * for (int i = 0; i < num_reps; ++i) {
796 * x_clone->copy_from(x.get());
797 * exec->synchronize();
798 * std::chrono::steady_clock::time_point t1 =
799 * std::chrono::steady_clock::now();
800 * solver->apply(b, x_clone);
801 * exec->synchronize();
802 * std::chrono::steady_clock::time_point t2 =
803 * std::chrono::steady_clock::now();
804 * auto time_span =
805 * std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
806 * apply_time += time_span.count();
807 * }
808 * x->copy_from(x_clone.get());
809 * solver->remove_logger(logger.get());
810 *
811 * auto norm_dim = gko::batch_dim<2>(num_systems, gko::dim<2>(1, 1));
812 * auto host_b_norm = real_vec_type::create(exec->get_master(), norm_dim);
813 * host_b_norm->fill(0.0);
814 *
815 * b->compute_norm2(host_b_norm);
816 * auto one = vec_type::create(exec, norm_dim);
817 * one->fill(1.0);
818 * auto neg_one = vec_type::create(exec, norm_dim);
819 * neg_one->fill(-1.0);
820 * auto res = vec_type::create(exec, batch_vec_size);
821 * res->copy_from(b);
822 * A->apply(one, x, neg_one, res);
823 * auto host_res_norm = real_vec_type::create(exec->get_master(), norm_dim);
824 * host_res_norm->fill(0.0);
825 * res->compute_norm2(host_res_norm);
826 * auto host_log_resid = gko::make_temporary_clone(
827 * exec->get_master(), &logger->get_residual_norm());
828 * auto host_log_iters = gko::make_temporary_clone(
829 * exec->get_master(), &logger->get_num_iterations());
830 *
831 * if (print_residuals) {
832 * std::cout << "Residual norm sqrt(r^T r):\n";
833 * auto unb_res = detail::unbatch(host_res_norm.get());
834 * auto unb_bnorm = detail::unbatch(host_b_norm.get());
835 * for (size_type i = 0; i < num_systems; ++i) {
836 * std::cout << " System no. " << i
837 * << ": residual norm = " << unb_res[i]->at(0, 0)
838 * << ", implicit residual norm = "
839 * << host_log_resid->get_const_data()[i]
840 * << ", iterations = "
841 * << host_log_iters->get_const_data()[i] << std::endl;
842 * const real_type relresnorm =
843 * unb_res[i]->at(0, 0) / unb_bnorm[i]->at(0, 0);
844 * if (!(relresnorm <= reduction_factor)) {
845 * std::cout << "System " << i << " converged only to "
846 * << relresnorm << " relative residual." << std::endl;
847 * }
848 * }
849 * }
850 * std::cout << "Solver type: "
851 * << "batch::bicgstab"
852 * << "\nMatrix size: " << A->get_common_size()
853 * << "\nNum batch entries: " << A->get_num_batch_items()
854 * << "\nEntire solve took: " << apply_time / num_reps << " seconds."
855 * << std::endl;
856 *
857 * appl_clean_up(appl_sys, exec);
858 * return 0;
859 * }
860 *
861 *
862 * ApplSysData appl_generate_system(const int nrows, const size_type nsystems,
863 * std::shared_ptr<gko::Executor> exec)
864 * {
865 * const int nnz = nrows * 3 - 2;
866 * std::default_random_engine rgen(15);
867 * std::normal_distribution<real_type> distb(0.5, 0.1);
868 * std::vector<real_type> spacings(nsystems * nrows);
869 * std::generate(spacings.begin(), spacings.end(),
870 * [&]() { return distb(rgen); });
871 *
872 * std::vector<value_type> allvalues(nnz * nsystems);
873 * for (size_type isys = 0; isys < nsystems; isys++) {
874 * allvalues.at(isys * nnz) = 2.0 / spacings.at(isys * nrows);
875 * allvalues.at(isys * nnz + 1) = -1.0;
876 * for (int irow = 0; irow < nrows - 2; irow++) {
877 * allvalues.at(isys * nnz + 2 + irow * 3) = -1.0;
878 * allvalues.at(isys * nnz + 2 + irow * 3 + 1) =
879 * 2.0 / spacings.at(isys * nrows + irow + 1);
880 * allvalues.at(isys * nnz + 2 + irow * 3 + 2) = -1.0;
881 * }
882 * allvalues.at(isys * nnz + 2 + (nrows - 2) * 3) = -1.0;
883 * allvalues.at(isys * nnz + 2 + (nrows - 2) * 3 + 1) =
884 * 2.0 / spacings.at((isys + 1) * nrows - 1);
885 * assert(isys * nnz + 2 + (nrows - 2) * 3 + 2 == (isys + 1) * nnz);
886 * }
887 *
888 * std::vector<index_type> rowptrs(nrows + 1);
889 * rowptrs.at(0) = 0;
890 * rowptrs.at(1) = 2;
891 * for (int i = 2; i < nrows; i++) {
892 * rowptrs.at(i) = rowptrs.at(i - 1) + 3;
893 * }
894 * rowptrs.at(nrows) = rowptrs.at(nrows - 1) + 2;
895 * assert(rowptrs.at(nrows) == nnz);
896 *
897 * std::vector<index_type> colidxs(nnz);
898 * colidxs.at(0) = 0;
899 * colidxs.at(1) = 1;
900 * const int nnz_per_row = 3;
901 * for (int irow = 1; irow < nrows - 1; irow++) {
902 * colidxs.at(2 + (irow - 1) * nnz_per_row) = irow - 1;
903 * colidxs.at(2 + (irow - 1) * nnz_per_row + 1) = irow;
904 * colidxs.at(2 + (irow - 1) * nnz_per_row + 2) = irow + 1;
905 * }
906 * colidxs.at(2 + (nrows - 2) * nnz_per_row) = nrows - 2;
907 * colidxs.at(2 + (nrows - 2) * nnz_per_row + 1) = nrows - 1;
908 * assert(2 + (nrows - 2) * nnz_per_row + 1 == nnz - 1);
909 *
910 * std::vector<value_type> allb(nrows * nsystems);
911 * for (size_type isys = 0; isys < nsystems; isys++) {
912 * const value_type bval = distb(rgen);
913 * std::fill(allb.begin() + isys * nrows,
914 * allb.begin() + (isys + 1) * nrows, bval);
915 * }
916 *
917 * index_type* const row_ptrs = exec->alloc<index_type>(nrows + 1);
918 * exec->copy_from(exec->get_master().get(), static_cast<size_type>(nrows + 1),
919 * rowptrs.data(), row_ptrs);
920 * index_type* const col_idxs = exec->alloc<index_type>(nnz);
921 * exec->copy_from(exec->get_master().get(), static_cast<size_type>(nnz),
922 * colidxs.data(), col_idxs);
923 * value_type* const all_values = exec->alloc<value_type>(nsystems * nnz);
924 * exec->copy_from(exec->get_master().get(), nsystems * nnz, allvalues.data(),
925 * all_values);
926 * value_type* const all_b = exec->alloc<value_type>(nsystems * nrows);
927 * exec->copy_from(exec->get_master().get(), nsystems * nrows, allb.data(),
928 * all_b);
929 * return {nsystems, nrows, nnz, row_ptrs, col_idxs, all_values, all_b};
930 * }
931 *
932 * void appl_clean_up(ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec)
933 * {
934 * exec->free(const_cast<index_type*>(appl_data.row_ptrs));
935 * exec->free(const_cast<index_type*>(appl_data.col_idxs));
936 * exec->free(const_cast<value_type*>(appl_data.all_values));
937 * exec->free(const_cast<value_type*>(appl_data.all_rhs));
938 * }
939 * @endcode
940*/
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
static detail::const_array_view< ValueType > const_view(std::shared_ptr< const Executor > exec, size_type size, const value_type *data)
Definition array.hpp:384
static std::unique_ptr< MultiVector > create(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &size=batch_dim< 2 >{})
static std::unique_ptr< const MultiVector > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< ValueType > &&values)
static std::unique_ptr< BatchConvergence > create(const mask_type &enabled_events=gko::log::Logger::batch_solver_completed_mask)
Definition batch_logger.hpp:92
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< value_type > &&values, gko::detail::const_array_view< index_type > &&col_idxs, gko::detail::const_array_view< index_type > &&row_ptrs)
static const version_info & get()
Definition version.hpp:139
constexpr T one()
Definition math.hpp:630
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Definition temporary_clone.hpp:208
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
Definition batched-solver.cpp:42
Definition batch_dim.hpp:27
Definition dim.hpp:26