ginkgo/core/matrix/csr.hpp Source File

ginkgo/core/matrix/csr.hpp Source File#

Reference API: ginkgo/core/matrix/csr.hpp Source File
Reference API
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51template <typename IndexType>
52class Permutation;
53
54
55namespace detail {
56
57
58template <typename ValueType = default_precision, typename IndexType = int32>
59void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60
61
62} // namespace detail
63
64
102template <typename ValueType = default_precision, typename IndexType = int32>
103class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
104 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
105#if GINKGO_ENABLE_HALF
106 public ConvertibleTo<
107 Csr<next_precision<next_precision<ValueType>>, IndexType>>,
108#endif
109 public ConvertibleTo<Dense<ValueType>>,
110 public ConvertibleTo<Coo<ValueType, IndexType>>,
111 public ConvertibleTo<Ell<ValueType, IndexType>>,
112 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
113 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
114 public ConvertibleTo<Sellp<ValueType, IndexType>>,
115 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
116 public DiagonalExtractable<ValueType>,
117 public ReadableFromMatrixData<ValueType, IndexType>,
118 public WritableToMatrixData<ValueType, IndexType>,
119 public Transposable,
120 public Permutable<IndexType>,
122 remove_complex<Csr<ValueType, IndexType>>>,
123 public ScaledIdentityAddable {
124 friend class EnablePolymorphicObject<Csr, LinOp>;
125 friend class Coo<ValueType, IndexType>;
126 friend class Dense<ValueType>;
127 friend class Diagonal<ValueType>;
128 friend class Ell<ValueType, IndexType>;
129 friend class Hybrid<ValueType, IndexType>;
130 friend class Sellp<ValueType, IndexType>;
131 friend class SparsityCsr<ValueType, IndexType>;
132 friend class Fbcsr<ValueType, IndexType>;
133 friend class CsrBuilder<ValueType, IndexType>;
134 friend class Csr<to_complex<ValueType>, IndexType>;
135
136public:
137 using EnableLinOp<Csr>::convert_to;
138 using EnableLinOp<Csr>::move_to;
139 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
140 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
141 using ConvertibleTo<Dense<ValueType>>::convert_to;
142 using ConvertibleTo<Dense<ValueType>>::move_to;
143 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
145 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
155 using ReadableFromMatrixData<ValueType, IndexType>::read;
156
157 using value_type = ValueType;
158 using index_type = IndexType;
162 using absolute_type = remove_complex<Csr>;
163
164 class automatical;
165
173 friend class automatical;
174
175 public:
181 strategy_type(std::string name) : name_(name) {}
182
183 virtual ~strategy_type() = default;
184
190 std::string get_name() { return name_; }
191
198 virtual void process(const array<index_type>& mtx_row_ptrs,
199 array<index_type>* mtx_srow) = 0;
200
208 virtual int64_t clac_size(const int64_t nnz) = 0;
209
214 virtual std::shared_ptr<strategy_type> copy() = 0;
215
216 protected:
217 void set_name(std::string name) { name_ = name; }
218
219 private:
220 std::string name_;
221 };
222
229 class classical : public strategy_type {
230 public:
234 classical() : strategy_type("classical"), max_length_per_row_(0) {}
235
236 void process(const array<index_type>& mtx_row_ptrs,
237 array<index_type>* mtx_srow) override
238 {
239 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
240 array<index_type> row_ptrs_host(host_mtx_exec);
241 const bool is_mtx_on_host{host_mtx_exec ==
242 mtx_row_ptrs.get_executor()};
243 const index_type* row_ptrs{};
244 if (is_mtx_on_host) {
245 row_ptrs = mtx_row_ptrs.get_const_data();
246 } else {
247 row_ptrs_host = mtx_row_ptrs;
248 row_ptrs = row_ptrs_host.get_const_data();
249 }
250 auto num_rows = mtx_row_ptrs.get_size() - 1;
251 max_length_per_row_ = 0;
252 for (size_type i = 0; i < num_rows; i++) {
253 max_length_per_row_ = std::max(max_length_per_row_,
254 row_ptrs[i + 1] - row_ptrs[i]);
255 }
256 }
257
258 int64_t clac_size(const int64_t nnz) override { return 0; }
259
260 index_type get_max_length_per_row() const noexcept
261 {
262 return max_length_per_row_;
263 }
264
265 std::shared_ptr<strategy_type> copy() override
266 {
267 return std::make_shared<classical>();
268 }
269
270 private:
271 index_type max_length_per_row_;
272 };
273
279 class merge_path : public strategy_type {
280 public:
284 merge_path() : strategy_type("merge_path") {}
285
286 void process(const array<index_type>& mtx_row_ptrs,
287 array<index_type>* mtx_srow) override
288 {}
289
290 int64_t clac_size(const int64_t nnz) override { return 0; }
291
292 std::shared_ptr<strategy_type> copy() override
293 {
294 return std::make_shared<merge_path>();
295 }
296 };
297
304 class cusparse : public strategy_type {
305 public:
309 cusparse() : strategy_type("cusparse") {}
310
311 void process(const array<index_type>& mtx_row_ptrs,
312 array<index_type>* mtx_srow) override
313 {}
314
315 int64_t clac_size(const int64_t nnz) override { return 0; }
316
317 std::shared_ptr<strategy_type> copy() override
318 {
319 return std::make_shared<cusparse>();
320 }
321 };
322
328 class sparselib : public strategy_type {
329 public:
333 sparselib() : strategy_type("sparselib") {}
334
335 void process(const array<index_type>& mtx_row_ptrs,
336 array<index_type>* mtx_srow) override
337 {}
338
339 int64_t clac_size(const int64_t nnz) override { return 0; }
340
341 std::shared_ptr<strategy_type> copy() override
342 {
343 return std::make_shared<sparselib>();
344 }
345 };
346
351 public:
358 [[deprecated]] load_balance()
359 : load_balance(std::move(
361 {}
362
368 load_balance(std::shared_ptr<const CudaExecutor> exec)
369 : load_balance(exec->get_num_warps(), exec->get_warp_size())
370 {}
371
377 load_balance(std::shared_ptr<const HipExecutor> exec)
378 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
379 {}
380
388 load_balance(std::shared_ptr<const DpcppExecutor> exec)
389 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
390 {}
391
403 load_balance(int64_t nwarps, int warp_size = 32,
404 bool cuda_strategy = true,
405 std::string strategy_name = "none")
406 : strategy_type("load_balance"),
407 nwarps_(nwarps),
408 warp_size_(warp_size),
409 cuda_strategy_(cuda_strategy),
410 strategy_name_(strategy_name)
411 {}
412
413 void process(const array<index_type>& mtx_row_ptrs,
414 array<index_type>* mtx_srow) override
415 {
416 auto nwarps = mtx_srow->get_size();
417
418 if (nwarps > 0) {
419 auto host_srow_exec = mtx_srow->get_executor()->get_master();
420 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
421 const bool is_srow_on_host{host_srow_exec ==
422 mtx_srow->get_executor()};
423 const bool is_mtx_on_host{host_mtx_exec ==
424 mtx_row_ptrs.get_executor()};
425 array<index_type> row_ptrs_host(host_mtx_exec);
426 array<index_type> srow_host(host_srow_exec);
427 const index_type* row_ptrs{};
428 index_type* srow{};
429 if (is_srow_on_host) {
430 srow = mtx_srow->get_data();
431 } else {
432 srow_host = *mtx_srow;
433 srow = srow_host.get_data();
434 }
435 if (is_mtx_on_host) {
436 row_ptrs = mtx_row_ptrs.get_const_data();
437 } else {
438 row_ptrs_host = mtx_row_ptrs;
439 row_ptrs = row_ptrs_host.get_const_data();
440 }
441 for (size_type i = 0; i < nwarps; i++) {
442 srow[i] = 0;
443 }
444 const auto num_rows = mtx_row_ptrs.get_size() - 1;
445 const auto num_elems = row_ptrs[num_rows];
446 const auto bucket_divider =
447 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
448 for (size_type i = 0; i < num_rows; i++) {
449 auto bucket =
450 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
451 bucket_divider);
452 if (bucket < nwarps) {
453 srow[bucket]++;
454 }
455 }
456 // find starting row for thread i
457 for (size_type i = 1; i < nwarps; i++) {
458 srow[i] += srow[i - 1];
459 }
460 if (!is_srow_on_host) {
461 *mtx_srow = srow_host;
462 }
463 }
464 }
465
466 int64_t clac_size(const int64_t nnz) override
467 {
468 if (warp_size_ > 0) {
469 int multiple = 8;
470 if (nnz >= static_cast<int64_t>(2e8)) {
471 multiple = 2048;
472 } else if (nnz >= static_cast<int64_t>(2e7)) {
473 multiple = 512;
474 } else if (nnz >= static_cast<int64_t>(2e6)) {
475 multiple = 128;
476 } else if (nnz >= static_cast<int64_t>(2e5)) {
477 multiple = 32;
478 }
479 if (strategy_name_ == "intel") {
480 multiple = 8;
481 if (nnz >= static_cast<int64_t>(2e8)) {
482 multiple = 256;
483 } else if (nnz >= static_cast<int64_t>(2e7)) {
484 multiple = 32;
485 }
486 }
487#if GINKGO_HIP_PLATFORM_HCC
488 if (!cuda_strategy_) {
489 multiple = 8;
490 if (nnz >= static_cast<int64_t>(1e7)) {
491 multiple = 64;
492 } else if (nnz >= static_cast<int64_t>(1e6)) {
493 multiple = 16;
494 }
495 }
496#endif // GINKGO_HIP_PLATFORM_HCC
497
498 auto nwarps = nwarps_ * multiple;
499 return min(ceildiv(nnz, warp_size_), nwarps);
500 } else {
501 return 0;
502 }
503 }
504
505 std::shared_ptr<strategy_type> copy() override
506 {
507 return std::make_shared<load_balance>(
508 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
509 }
510
511 private:
512 int64_t nwarps_;
513 int warp_size_;
514 bool cuda_strategy_;
515 std::string strategy_name_;
516 };
517
518 class automatical : public strategy_type {
519 public:
520 /* Use imbalance strategy when the maximum number of nonzero per row is
521 * more than 1024 on NVIDIA hardware */
522 const index_type nvidia_row_len_limit = 1024;
523 /* Use imbalance strategy when the matrix has more more than 1e6 on
524 * NVIDIA hardware */
525 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
526 /* Use imbalance strategy when the maximum number of nonzero per row is
527 * more than 768 on AMD hardware */
528 const index_type amd_row_len_limit = 768;
529 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
530 * hardware */
531 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
532 /* Use imbalance strategy when the maximum number of nonzero per row is
533 * more than 25600 on Intel hardware */
534 const index_type intel_row_len_limit = 25600;
535 /* Use imbalance strategy when the matrix has more more than 3e8 on
536 * Intel hardware */
537 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
538
539 public:
546 [[deprecated]] automatical()
547 : automatical(std::move(
549 {}
550
556 automatical(std::shared_ptr<const CudaExecutor> exec)
557 : automatical(exec->get_num_warps(), exec->get_warp_size())
558 {}
559
565 automatical(std::shared_ptr<const HipExecutor> exec)
566 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
567 {}
568
576 automatical(std::shared_ptr<const DpcppExecutor> exec)
577 : automatical(exec->get_num_subgroups(), 32, false, "intel")
578 {}
579
591 automatical(int64_t nwarps, int warp_size = 32,
592 bool cuda_strategy = true,
593 std::string strategy_name = "none")
594 : strategy_type("automatical"),
595 nwarps_(nwarps),
596 warp_size_(warp_size),
597 cuda_strategy_(cuda_strategy),
598 strategy_name_(strategy_name),
599 max_length_per_row_(0)
600 {}
601
602 void process(const array<index_type>& mtx_row_ptrs,
603 array<index_type>* mtx_srow) override
604 {
605 // if the number of stored elements is larger than <nnz_limit> or
606 // the maximum number of stored elements per row is larger than
607 // <row_len_limit>, use load_balance otherwise use classical
608 index_type nnz_limit = nvidia_nnz_limit;
609 index_type row_len_limit = nvidia_row_len_limit;
610 if (strategy_name_ == "intel") {
611 nnz_limit = intel_nnz_limit;
612 row_len_limit = intel_row_len_limit;
613 }
614#if GINKGO_HIP_PLATFORM_HCC
615 if (!cuda_strategy_) {
616 nnz_limit = amd_nnz_limit;
617 row_len_limit = amd_row_len_limit;
618 }
619#endif // GINKGO_HIP_PLATFORM_HCC
620 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
621 const bool is_mtx_on_host{host_mtx_exec ==
622 mtx_row_ptrs.get_executor()};
623 array<index_type> row_ptrs_host(host_mtx_exec);
624 const index_type* row_ptrs{};
625 if (is_mtx_on_host) {
626 row_ptrs = mtx_row_ptrs.get_const_data();
627 } else {
628 row_ptrs_host = mtx_row_ptrs;
629 row_ptrs = row_ptrs_host.get_const_data();
630 }
631 const auto num_rows = mtx_row_ptrs.get_size() - 1;
632 if (row_ptrs[num_rows] > nnz_limit) {
633 load_balance actual_strategy(nwarps_, warp_size_,
634 cuda_strategy_, strategy_name_);
635 if (is_mtx_on_host) {
636 actual_strategy.process(mtx_row_ptrs, mtx_srow);
637 } else {
638 actual_strategy.process(row_ptrs_host, mtx_srow);
639 }
640 this->set_name(actual_strategy.get_name());
641 } else {
642 index_type maxnum = 0;
643 for (size_type i = 0; i < num_rows; i++) {
644 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
645 }
646 if (maxnum > row_len_limit) {
647 load_balance actual_strategy(
648 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
649 if (is_mtx_on_host) {
650 actual_strategy.process(mtx_row_ptrs, mtx_srow);
651 } else {
652 actual_strategy.process(row_ptrs_host, mtx_srow);
653 }
654 this->set_name(actual_strategy.get_name());
655 } else {
656 classical actual_strategy;
657 if (is_mtx_on_host) {
658 actual_strategy.process(mtx_row_ptrs, mtx_srow);
659 max_length_per_row_ =
660 actual_strategy.get_max_length_per_row();
661 } else {
662 actual_strategy.process(row_ptrs_host, mtx_srow);
663 max_length_per_row_ =
664 actual_strategy.get_max_length_per_row();
665 }
666 this->set_name(actual_strategy.get_name());
667 }
668 }
669 }
670
671 int64_t clac_size(const int64_t nnz) override
672 {
673 return std::make_shared<load_balance>(
674 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
675 ->clac_size(nnz);
676 }
677
678 index_type get_max_length_per_row() const noexcept
679 {
680 return max_length_per_row_;
681 }
682
683 std::shared_ptr<strategy_type> copy() override
684 {
685 return std::make_shared<automatical>(
686 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
687 }
688
689 private:
690 int64_t nwarps_;
691 int warp_size_;
692 bool cuda_strategy_;
693 std::string strategy_name_;
694 index_type max_length_per_row_;
695 };
696
697 friend class Csr<previous_precision<ValueType>, IndexType>;
698
699 void convert_to(
700 Csr<next_precision<ValueType>, IndexType>* result) const override;
701
702 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
703
704#if GINKGO_ENABLE_HALF
705 friend class Csr<previous_precision<previous_precision<ValueType>>,
706 IndexType>;
707 using ConvertibleTo<
708 Csr<next_precision<next_precision<ValueType>>, IndexType>>::convert_to;
709 using ConvertibleTo<
711
712 void convert_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
713 result) const override;
714
715 void move_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
716 result) override;
717#endif
718
719 void convert_to(Dense<ValueType>* other) const override;
720
721 void move_to(Dense<ValueType>* other) override;
722
723 void convert_to(Coo<ValueType, IndexType>* result) const override;
724
725 void move_to(Coo<ValueType, IndexType>* result) override;
726
727 void convert_to(Ell<ValueType, IndexType>* result) const override;
728
729 void move_to(Ell<ValueType, IndexType>* result) override;
730
731 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
732
733 void move_to(Fbcsr<ValueType, IndexType>* result) override;
734
735 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
736
737 void move_to(Hybrid<ValueType, IndexType>* result) override;
738
739 void convert_to(Sellp<ValueType, IndexType>* result) const override;
740
741 void move_to(Sellp<ValueType, IndexType>* result) override;
742
743 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
744
745 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
746
747 void read(const mat_data& data) override;
748
749 void read(const device_mat_data& data) override;
750
751 void read(device_mat_data&& data) override;
752
753 void write(mat_data& data) const override;
754
755 std::unique_ptr<LinOp> transpose() const override;
756
757 std::unique_ptr<LinOp> conj_transpose() const override;
758
766
769 std::unique_ptr<Permutation<index_type>> value_permutation);
770
779 ptr_param<Csr> output) const;
780
781 std::unique_ptr<Permutation<IndexType>> value_permutation;
782 };
783
795 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
796 const;
797
812 std::unique_ptr<Csr> permute(
813 ptr_param<const Permutation<index_type>> permutation,
815
829 std::unique_ptr<Csr> permute(
830 ptr_param<const Permutation<index_type>> row_permutation,
831 ptr_param<const Permutation<index_type>> column_permutation,
832 bool invert = false) const;
833
854 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
855 ptr_param<const Permutation<index_type>> permutation,
857
876 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
877 ptr_param<const Permutation<index_type>> row_permutation,
878 ptr_param<const Permutation<index_type>> column_permutation,
879 bool invert = false) const;
880
890 std::unique_ptr<Csr> scale_permute(
893
906 std::unique_ptr<Csr> scale_permute(
908 row_permutation,
910 column_permutation,
911 bool invert = false) const;
912
913 std::unique_ptr<LinOp> permute(
914 const array<IndexType>* permutation_indices) const override;
915
916 std::unique_ptr<LinOp> inverse_permute(
917 const array<IndexType>* inverse_permutation_indices) const override;
918
919 std::unique_ptr<LinOp> row_permute(
920 const array<IndexType>* permutation_indices) const override;
921
922 std::unique_ptr<LinOp> column_permute(
923 const array<IndexType>* permutation_indices) const override;
924
925 std::unique_ptr<LinOp> inverse_row_permute(
926 const array<IndexType>* inverse_permutation_indices) const override;
927
928 std::unique_ptr<LinOp> inverse_column_permute(
929 const array<IndexType>* inverse_permutation_indices) const override;
930
931 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
932
933 std::unique_ptr<absolute_type> compute_absolute() const override;
934
936
941
942 /*
943 * Tests if all row entry pairs (value, col_idx) are sorted by column index
944 *
945 * @returns True if all row entry pairs (value, col_idx) are sorted by
946 * column index
947 */
948 bool is_sorted_by_column_index() const;
949
955 value_type* get_values() noexcept { return values_.get_data(); }
956
964 const value_type* get_const_values() const noexcept
965 {
966 return values_.get_const_data();
967 }
968
973 std::unique_ptr<Dense<ValueType>> create_value_view();
974
979 std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
980
986 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
987
995 const index_type* get_const_col_idxs() const noexcept
996 {
997 return col_idxs_.get_const_data();
998 }
999
1005 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1006
1014 const index_type* get_const_row_ptrs() const noexcept
1015 {
1016 return row_ptrs_.get_const_data();
1017 }
1018
1024 index_type* get_srow() noexcept { return srow_.get_data(); }
1025
1033 const index_type* get_const_srow() const noexcept
1034 {
1035 return srow_.get_const_data();
1036 }
1037
1044 {
1045 return srow_.get_size();
1046 }
1047
1054 {
1055 return values_.get_size();
1056 }
1057
1062 std::shared_ptr<strategy_type> get_strategy() const noexcept
1063 {
1064 return strategy_;
1065 }
1066
1072 void set_strategy(std::shared_ptr<strategy_type> strategy)
1073 {
1074 strategy_ = std::move(strategy->copy());
1075 this->make_srow();
1076 }
1077
1085 {
1086 auto exec = this->get_executor();
1087 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1088 this->scale_impl(make_temporary_clone(exec, alpha).get());
1089 }
1090
1098 {
1099 auto exec = this->get_executor();
1100 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1101 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1102 }
1103
1112 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1113 std::shared_ptr<strategy_type> strategy);
1114
1126 static std::unique_ptr<Csr> create(
1127 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1128 size_type num_nonzeros = {},
1129 std::shared_ptr<strategy_type> strategy = nullptr);
1130
1150 static std::unique_ptr<Csr> create(
1151 std::shared_ptr<const Executor> exec, const dim<2>& size,
1152 array<value_type> values, array<index_type> col_idxs,
1153 array<index_type> row_ptrs,
1154 std::shared_ptr<strategy_type> strategy = nullptr);
1155
1160 template <typename InputValueType, typename InputColumnIndexType,
1161 typename InputRowPtrType>
1162 GKO_DEPRECATED(
1163 "explicitly construct the gko::array argument instead of passing "
1164 "initializer lists")
1165 static std::unique_ptr<Csr> create(
1166 std::shared_ptr<const Executor> exec, const dim<2>& size,
1167 std::initializer_list<InputValueType> values,
1168 std::initializer_list<InputColumnIndexType> col_idxs,
1169 std::initializer_list<InputRowPtrType> row_ptrs)
1170 {
1171 return create(exec, size, array<value_type>{exec, std::move(values)},
1172 array<index_type>{exec, std::move(col_idxs)},
1173 array<index_type>{exec, std::move(row_ptrs)});
1174 }
1175
1191 static std::unique_ptr<const Csr> create_const(
1192 std::shared_ptr<const Executor> exec, const dim<2>& size,
1193 gko::detail::const_array_view<ValueType>&& values,
1194 gko::detail::const_array_view<IndexType>&& col_idxs,
1195 gko::detail::const_array_view<IndexType>&& row_ptrs,
1196 std::shared_ptr<strategy_type> strategy = nullptr);
1197
1210 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1211 const index_set<IndexType>& row_index_set,
1212 const index_set<IndexType>& column_index_set) const;
1213
1225 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1226 const span& row_span, const span& column_span) const;
1227
1232
1239
1243 Csr(const Csr&);
1244
1251
1252protected:
1253 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1254 size_type num_nonzeros = {},
1255 std::shared_ptr<strategy_type> strategy = nullptr);
1256
1257 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1258 array<value_type> values, array<index_type> col_idxs,
1259 array<index_type> row_ptrs,
1260 std::shared_ptr<strategy_type> strategy = nullptr);
1261
1262 void apply_impl(const LinOp* b, LinOp* x) const override;
1263
1264 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1265 LinOp* x) const override;
1266
1267 // TODO: This provides some more sane settings. Please fix this!
1268 static std::shared_ptr<strategy_type> make_default_strategy(
1269 std::shared_ptr<const Executor> exec)
1270 {
1271 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1272 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1273 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1274 std::shared_ptr<strategy_type> new_strategy;
1275 if (cuda_exec) {
1276 new_strategy = std::make_shared<automatical>(cuda_exec);
1277 } else if (hip_exec) {
1278 new_strategy = std::make_shared<automatical>(hip_exec);
1279 } else if (dpcpp_exec) {
1280 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1281 } else {
1282 new_strategy = std::make_shared<classical>();
1283 }
1284 return new_strategy;
1285 }
1286
1287 // TODO clean this up as soon as we improve strategy_type
1288 template <typename CsrType>
1289 void convert_strategy_helper(CsrType* result) const
1290 {
1291 auto strat = this->get_strategy().get();
1292 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1293 if (dynamic_cast<classical*>(strat)) {
1294 new_strat = std::make_shared<typename CsrType::classical>();
1295 } else if (dynamic_cast<merge_path*>(strat)) {
1296 new_strat = std::make_shared<typename CsrType::merge_path>();
1297 } else if (dynamic_cast<cusparse*>(strat)) {
1298 new_strat = std::make_shared<typename CsrType::cusparse>();
1299 } else if (dynamic_cast<sparselib*>(strat)) {
1300 new_strat = std::make_shared<typename CsrType::sparselib>();
1301 } else {
1302 auto rexec = result->get_executor();
1303 auto cuda_exec =
1304 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1305 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1306 auto dpcpp_exec =
1307 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1308 auto lb = dynamic_cast<load_balance*>(strat);
1309 if (cuda_exec) {
1310 if (lb) {
1311 new_strat =
1312 std::make_shared<typename CsrType::load_balance>(
1313 cuda_exec);
1314 } else {
1315 new_strat = std::make_shared<typename CsrType::automatical>(
1316 cuda_exec);
1317 }
1318 } else if (hip_exec) {
1319 if (lb) {
1320 new_strat =
1321 std::make_shared<typename CsrType::load_balance>(
1322 hip_exec);
1323 } else {
1324 new_strat = std::make_shared<typename CsrType::automatical>(
1325 hip_exec);
1326 }
1327 } else if (dpcpp_exec) {
1328 if (lb) {
1329 new_strat =
1330 std::make_shared<typename CsrType::load_balance>(
1331 dpcpp_exec);
1332 } else {
1333 new_strat = std::make_shared<typename CsrType::automatical>(
1334 dpcpp_exec);
1335 }
1336 } else {
1337 // Try to preserve this executor's configuration
1338 auto this_cuda_exec =
1339 std::dynamic_pointer_cast<const CudaExecutor>(
1340 this->get_executor());
1341 auto this_hip_exec =
1342 std::dynamic_pointer_cast<const HipExecutor>(
1343 this->get_executor());
1344 auto this_dpcpp_exec =
1345 std::dynamic_pointer_cast<const DpcppExecutor>(
1346 this->get_executor());
1347 if (this_cuda_exec) {
1348 if (lb) {
1349 new_strat =
1350 std::make_shared<typename CsrType::load_balance>(
1351 this_cuda_exec);
1352 } else {
1353 new_strat =
1354 std::make_shared<typename CsrType::automatical>(
1355 this_cuda_exec);
1356 }
1357 } else if (this_hip_exec) {
1358 if (lb) {
1359 new_strat =
1360 std::make_shared<typename CsrType::load_balance>(
1361 this_hip_exec);
1362 } else {
1363 new_strat =
1364 std::make_shared<typename CsrType::automatical>(
1365 this_hip_exec);
1366 }
1367 } else if (this_dpcpp_exec) {
1368 if (lb) {
1369 new_strat =
1370 std::make_shared<typename CsrType::load_balance>(
1371 this_dpcpp_exec);
1372 } else {
1373 new_strat =
1374 std::make_shared<typename CsrType::automatical>(
1375 this_dpcpp_exec);
1376 }
1377 } else {
1378 // FIXME: this changes strategies.
1379 // We had a load balance or automatical strategy from a non
1380 // HIP or Cuda executor and are moving to a non HIP or Cuda
1381 // executor.
1382 new_strat = std::make_shared<typename CsrType::classical>();
1383 }
1384 }
1385 }
1386 result->set_strategy(new_strat);
1387 }
1388
1393 {
1394 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1395 strategy_->process(row_ptrs_, &srow_);
1396 }
1397
1404 virtual void scale_impl(const LinOp* alpha);
1405
1412 virtual void inv_scale_impl(const LinOp* alpha);
1413
1414private:
1415 std::shared_ptr<strategy_type> strategy_;
1416 array<value_type> values_;
1417 array<index_type> col_idxs_;
1418 array<index_type> row_ptrs_;
1419 array<index_type> srow_;
1420
1421 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1422};
1423
1424
1425namespace detail {
1426
1427
1434template <typename ValueType, typename IndexType>
1435void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1436{
1437 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1438 using automatical = typename Csr<ValueType, IndexType>::automatical;
1439 auto strategy = result->get_strategy();
1440 auto executor = result->get_executor();
1441 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1442 if (auto exec =
1443 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1444 result->set_strategy(std::make_shared<load_balance>(exec));
1445 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1446 executor)) {
1447 result->set_strategy(std::make_shared<load_balance>(exec));
1448 }
1449 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1450 if (auto exec =
1451 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1452 result->set_strategy(std::make_shared<automatical>(exec));
1453 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1454 executor)) {
1455 result->set_strategy(std::make_shared<automatical>(exec));
1456 }
1457 }
1458}
1459
1460
1461} // namespace detail
1462} // namespace matrix
1463} // namespace gko
1464
1465
1466#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
Definition polymorphic_object.hpp:479
Definition executor.hpp:1542
Definition lin_op.hpp:742
Definition lin_op.hpp:793
Definition lin_op.hpp:878
Definition polymorphic_object.hpp:668
Definition executor.hpp:615
Definition lin_op.hpp:117
Definition executor.hpp:1387
Definition lin_op.hpp:484
std::shared_ptr< const Executor > get_executor() const noexcept
Definition polymorphic_object.hpp:243
Definition lin_op.hpp:605
Definition lin_op.hpp:817
Definition lin_op.hpp:433
Definition lin_op.hpp:660
Definition array.hpp:166
void resize_and_reset(size_type size)
Definition array.hpp:622
value_type * get_data() noexcept
Definition array.hpp:673
std::shared_ptr< const Executor > get_executor() const noexcept
Definition array.hpp:689
const value_type * get_const_data() const noexcept
Definition array.hpp:682
size_type get_size() const noexcept
Definition array.hpp:656
Definition device_matrix_data.hpp:36
Definition index_set.hpp:56
Definition coo.hpp:61
Definition csr.hpp:49
Definition csr.hpp:518
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:683
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Definition csr.hpp:591
automatical()
Definition csr.hpp:546
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:671
automatical(std::shared_ptr< const CudaExecutor > exec)
Definition csr.hpp:556
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:602
automatical(std::shared_ptr< const DpcppExecutor > exec)
Definition csr.hpp:576
automatical(std::shared_ptr< const HipExecutor > exec)
Definition csr.hpp:565
Definition csr.hpp:229
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:236
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:265
classical()
Definition csr.hpp:234
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:258
Definition csr.hpp:304
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:315
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:317
cusparse()
Definition csr.hpp:309
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:311
Definition csr.hpp:350
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:413
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:505
load_balance(std::shared_ptr< const HipExecutor > exec)
Definition csr.hpp:377
load_balance()
Definition csr.hpp:358
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:466
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Definition csr.hpp:403
load_balance(std::shared_ptr< const CudaExecutor > exec)
Definition csr.hpp:368
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Definition csr.hpp:388
Definition csr.hpp:279
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:290
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:292
merge_path()
Definition csr.hpp:284
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:286
Definition csr.hpp:328
int64_t clac_size(const int64_t nnz) override
Definition csr.hpp:339
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Definition csr.hpp:335
sparselib()
Definition csr.hpp:333
std::shared_ptr< strategy_type > copy() override
Definition csr.hpp:341
Definition csr.hpp:172
virtual int64_t clac_size(const int64_t nnz)=0
std::string get_name()
Definition csr.hpp:190
virtual std::shared_ptr< strategy_type > copy()=0
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
strategy_type(std::string name)
Definition csr.hpp:181
Definition csr.hpp:123
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Csr & operator=(const Csr &)
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
void write(mat_data &data) const override
std::unique_ptr< absolute_type > compute_absolute() const override
const index_type * get_const_row_ptrs() const noexcept
Definition csr.hpp:1014
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
void read(device_mat_data &&data) override
const index_type * get_const_srow() const noexcept
Definition csr.hpp:1033
void set_strategy(std::shared_ptr< strategy_type > strategy)
Definition csr.hpp:1072
void inv_scale(ptr_param< const LinOp > alpha)
Definition csr.hpp:1097
virtual void scale_impl(const LinOp *alpha)
void read(const device_mat_data &data) override
index_type * get_srow() noexcept
Definition csr.hpp:1024
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
size_type get_num_srow_elements() const noexcept
Definition csr.hpp:1043
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
virtual void inv_scale_impl(const LinOp *alpha)
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
void make_srow()
Definition csr.hpp:1392
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
index_type * get_row_ptrs() noexcept
Definition csr.hpp:1005
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Csr(const Csr &)
Csr & operator=(Csr &&)
std::unique_ptr< LinOp > transpose() const override
const value_type * get_const_values() const noexcept
Definition csr.hpp:964
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
void compute_absolute_inplace() override
size_type get_num_stored_elements() const noexcept
Definition csr.hpp:1053
std::shared_ptr< strategy_type > get_strategy() const noexcept
Definition csr.hpp:1062
std::unique_ptr< LinOp > permute(const array< IndexType > *permutation_indices) const override
const index_type * get_const_col_idxs() const noexcept
Definition csr.hpp:995
void read(const mat_data &data) override
void sort_by_column_index()
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
std::unique_ptr< Dense< ValueType > > create_value_view()
void scale(ptr_param< const LinOp > alpha)
Definition csr.hpp:1084
value_type * get_values() noexcept
Definition csr.hpp:955
index_type * get_col_idxs() noexcept
Definition csr.hpp:986
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
std::unique_ptr< LinOp > conj_transpose() const override
Definition dense.hpp:116
Definition diagonal.hpp:52
Definition ell.hpp:63
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:112
Definition hybrid.hpp:54
Definition permutation.hpp:111
Definition scaled_permutation.hpp:37
Definition sellp.hpp:54
Definition sparsity_csr.hpp:55
Definition utils_helper.hpp:41
permute_mode
Definition permutation.hpp:42
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::next_precision_impl< T >::type next_precision
Definition math.hpp:438
std::size_t size_type
Definition types.hpp:89
constexpr int64 ceildiv(int64 num, int64 den)
Definition math.hpp:590
constexpr T min(const T &x, const T &y)
Definition math.hpp:719
typename detail::to_complex_s< T >::type to_complex
Definition math.hpp:279
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Definition temporary_clone.hpp:208
typename detail::remove_complex_s< T >::type remove_complex
Definition math.hpp:260
STL namespace.
Definition dim.hpp:26
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
permuting_reuse_info(std::unique_ptr< Permutation< index_type > > value_permutation)
Definition matrix_data.hpp:126
Definition range.hpp:46