703 *
ApplSysData appl_generate_system(
const int nrows,
const size_type nsystems,
704 * std::shared_ptr<gko::Executor> exec);
706 *
void appl_clean_up(
ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec);
709 *
int main(
int argc,
char* argv[])
713 *
if (argc == 2 && (std::string(argv[1]) ==
"--help")) {
714 * std::cerr <<
"Usage: " << argv[0]
715 * <<
" [executor] [num_systems] [num_rows] [print_residuals] "
721 *
const auto executor_string = argc >= 2 ? argv[1] :
"reference";
722 * std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
739 * {
"reference", [] {
return gko::ReferenceExecutor::create(); }}};
741 *
const auto exec = exec_map.at(executor_string)();
743 *
const size_type num_systems = argc >= 3 ? std::atoi(argv[2]) : 2;
744 *
const int num_rows = argc >= 4 ? std::atoi(argv[3]) : 32;
745 *
const bool print_residuals =
746 * argc >= 5 ? (std::string(argv[4]) ==
"true") : false;
747 *
const int num_reps = argc >= 6 ? std::atoi(argv[5]) : 20;
748 *
auto appl_sys = appl_generate_system(num_rows, num_systems, exec);
749 *
auto batch_mat_size =
751 *
auto batch_vec_size =
754 * exec, num_systems * appl_sys.nnz, appl_sys.all_values);
756 * appl_sys.row_ptrs);
758 * appl_sys.col_idxs);
760 * exec, batch_mat_size, std::move(vals_view), std::move(colidxs_view),
761 * std::move(rowptrs_view)));
763 * exec, num_systems * num_rows, appl_sys.all_rhs);
767 *
for (size_type isys = 0; isys < num_systems; isys++) {
768 *
for (
int irow = 0; irow < num_rows; irow++) {
769 * host_x->at(isys, irow, 0) = gko::zero<value_type>();
772 * x->copy_from(host_x.get());
774 *
const real_type reduction_factor{1e-10};
777 * .with_max_iterations(500)
778 * .with_tolerance(reduction_factor)
779 * .with_tolerance_type(gko::batch::stop::tolerance_type::relative)
783 * std::shared_ptr<const gko::batch::log::BatchConvergence<value_type>>
786 *
solver->add_logger(logger);
789 *
for (
int i = 0; i < 3; ++i) {
790 * x_clone->copy_from(x.get());
791 *
solver->apply(b, x_clone);
794 *
double apply_time = 0.0;
795 *
for (
int i = 0; i < num_reps; ++i) {
796 * x_clone->copy_from(x.get());
797 * exec->synchronize();
798 * std::chrono::steady_clock::time_point t1 =
799 * std::chrono::steady_clock::now();
800 *
solver->apply(b, x_clone);
801 * exec->synchronize();
802 * std::chrono::steady_clock::time_point t2 =
803 * std::chrono::steady_clock::now();
805 * std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
806 * apply_time += time_span.count();
808 * x->copy_from(x_clone.get());
809 *
solver->remove_logger(logger.get());
813 * host_b_norm->fill(0.0);
815 * b->compute_norm2(host_b_norm);
819 * neg_one->fill(-1.0);
822 * A->apply(one, x, neg_one, res);
824 * host_res_norm->fill(0.0);
825 * res->compute_norm2(host_res_norm);
827 * exec->get_master(), &logger->get_residual_norm());
829 * exec->get_master(), &logger->get_num_iterations());
831 *
if (print_residuals) {
832 * std::cout <<
"Residual norm sqrt(r^T r):\n";
833 *
auto unb_res = detail::unbatch(host_res_norm.get());
834 *
auto unb_bnorm = detail::unbatch(host_b_norm.get());
835 *
for (size_type i = 0; i < num_systems; ++i) {
836 * std::cout <<
" System no. " << i
837 * <<
": residual norm = " << unb_res[i]->at(0, 0)
838 * <<
", implicit residual norm = "
839 * << host_log_resid->get_const_data()[i]
840 * <<
", iterations = "
841 * << host_log_iters->get_const_data()[i] << std::endl;
842 *
const real_type relresnorm =
843 * unb_res[i]->at(0, 0) / unb_bnorm[i]->at(0, 0);
844 *
if (!(relresnorm <= reduction_factor)) {
845 * std::cout <<
"System " << i <<
" converged only to "
846 * << relresnorm <<
" relative residual." << std::endl;
850 * std::cout <<
"Solver type: "
851 * <<
"batch::bicgstab"
852 * <<
"\nMatrix size: " << A->get_common_size()
853 * <<
"\nNum batch entries: " << A->get_num_batch_items()
854 * <<
"\nEntire solve took: " << apply_time / num_reps <<
" seconds."
857 * appl_clean_up(appl_sys, exec);
862 *
ApplSysData appl_generate_system(
const int nrows,
const size_type nsystems,
863 * std::shared_ptr<gko::Executor> exec)
865 *
const int nnz = nrows * 3 - 2;
866 * std::default_random_engine rgen(15);
867 * std::normal_distribution<real_type> distb(0.5, 0.1);
868 * std::vector<real_type> spacings(nsystems * nrows);
869 * std::generate(spacings.begin(), spacings.end(),
870 * [&]() { return distb(rgen); });
872 * std::vector<value_type> allvalues(nnz * nsystems);
873 *
for (size_type isys = 0; isys < nsystems; isys++) {
874 * allvalues.at(isys * nnz) = 2.0 / spacings.at(isys * nrows);
875 * allvalues.at(isys * nnz + 1) = -1.0;
876 *
for (
int irow = 0; irow < nrows - 2; irow++) {
877 * allvalues.at(isys * nnz + 2 + irow * 3) = -1.0;
878 * allvalues.at(isys * nnz + 2 + irow * 3 + 1) =
879 * 2.0 / spacings.at(isys * nrows + irow + 1);
880 * allvalues.at(isys * nnz + 2 + irow * 3 + 2) = -1.0;
882 * allvalues.at(isys * nnz + 2 + (nrows - 2) * 3) = -1.0;
883 * allvalues.at(isys * nnz + 2 + (nrows - 2) * 3 + 1) =
884 * 2.0 / spacings.at((isys + 1) * nrows - 1);
885 * assert(isys * nnz + 2 + (nrows - 2) * 3 + 2 == (isys + 1) * nnz);
888 * std::vector<index_type> rowptrs(nrows + 1);
891 *
for (
int i = 2; i < nrows; i++) {
892 * rowptrs.at(i) = rowptrs.at(i - 1) + 3;
894 * rowptrs.at(nrows) = rowptrs.at(nrows - 1) + 2;
895 * assert(rowptrs.at(nrows) == nnz);
897 * std::vector<index_type> colidxs(nnz);
900 *
const int nnz_per_row = 3;
901 *
for (
int irow = 1; irow < nrows - 1; irow++) {
902 * colidxs.at(2 + (irow - 1) * nnz_per_row) = irow - 1;
903 * colidxs.at(2 + (irow - 1) * nnz_per_row + 1) = irow;
904 * colidxs.at(2 + (irow - 1) * nnz_per_row + 2) = irow + 1;
906 * colidxs.at(2 + (nrows - 2) * nnz_per_row) = nrows - 2;
907 * colidxs.at(2 + (nrows - 2) * nnz_per_row + 1) = nrows - 1;
908 * assert(2 + (nrows - 2) * nnz_per_row + 1 == nnz - 1);
910 * std::vector<value_type> allb(nrows * nsystems);
911 *
for (size_type isys = 0; isys < nsystems; isys++) {
912 *
const value_type bval = distb(rgen);
913 * std::fill(allb.begin() + isys * nrows,
914 * allb.begin() + (isys + 1) * nrows, bval);
917 * index_type*
const row_ptrs = exec->alloc<index_type>(nrows + 1);
918 * exec->copy_from(exec->get_master().get(),
static_cast<size_type
>(nrows + 1),
919 * rowptrs.data(), row_ptrs);
920 * index_type*
const col_idxs = exec->alloc<index_type>(nnz);
921 * exec->copy_from(exec->get_master().get(),
static_cast<size_type
>(nnz),
922 * colidxs.data(), col_idxs);
923 * value_type*
const all_values = exec->alloc<value_type>(nsystems * nnz);
924 * exec->copy_from(exec->get_master().get(), nsystems * nnz, allvalues.data(),
926 * value_type*
const all_b = exec->alloc<value_type>(nsystems * nrows);
927 * exec->copy_from(exec->get_master().get(), nsystems * nrows, allb.data(),
929 *
return {nsystems, nrows, nnz, row_ptrs, col_idxs, all_values, all_b};
932 *
void appl_clean_up(
ApplSysData& appl_data, std::shared_ptr<gko::Executor> exec)
934 * exec->free(
const_cast<index_type*
>(appl_data.row_ptrs));
935 * exec->free(
const_cast<index_type*
>(appl_data.col_idxs));
936 * exec->free(
const_cast<value_type*
>(appl_data.all_values));
937 * exec->free(
const_cast<value_type*
>(appl_data.all_rhs));
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Definition executor.hpp:1396
static detail::const_array_view< ValueType > const_view(std::shared_ptr< const Executor > exec, size_type size, const value_type *data)
Definition array.hpp:384
static std::unique_ptr< MultiVector > create(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &size=batch_dim< 2 >{})
static std::unique_ptr< const MultiVector > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< ValueType > &&values)
static std::unique_ptr< BatchConvergence > create(const mask_type &enabled_events=gko::log::Logger::batch_solver_completed_mask)
Definition batch_logger.hpp:92
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const batch_dim< 2 > &sizes, gko::detail::const_array_view< value_type > &&values, gko::detail::const_array_view< index_type > &&col_idxs, gko::detail::const_array_view< index_type > &&row_ptrs)
static const version_info & get()
Definition version.hpp:139
constexpr T one()
Definition math.hpp:630
detail::cloned_type< Pointer > clone(const Pointer &p)
Definition utils_helper.hpp:173
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Definition temporary_clone.hpp:208
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Definition utils_helper.hpp:224
Definition batched-solver.cpp:42
Definition batch_dim.hpp:27