28#ifndef _MATRIX_SCALAPACK_IMPL_HPP_
29#define _MATRIX_SCALAPACK_IMPL_HPP_
86 : grid(comm), desc(9) {
166 size_t&
g_col)
const {
167 const int lld = get_lld();
170 prep_local_to_global();
178 const int lld = get_lld();
179 const int nprow = grid.nprow;
180 const int npcol = grid.npcol;
181 const int myprow = grid.myprow;
182 const int mypcol = grid.mypcol;
188 if (
p_row !=
size_t(myprow) ||
p_col !=
size_t(mypcol))
return false;
199 prep_global_to_local();
200 const int myprow = proc_row[
g_row];
201 const int mypcol = proc_col[
g_col];
202 comm_rank = grid.mpirank(myprow, mypcol);
208 return local_row_size_;
213 return local_col_size_;
218 return lindex % (
static_cast<size_t>(get_lld()));
223 return lindex / (
static_cast<size_t>(get_lld()));
228 return BLOCK_SIZE * ((
l_row / BLOCK_SIZE) * grid.nprow + grid.myprow) +
229 (
l_row % BLOCK_SIZE);
234 return BLOCK_SIZE * ((
l_col / BLOCK_SIZE) * grid.npcol + grid.mypcol) +
235 (
l_col % BLOCK_SIZE);
241 if (has_local_to_global)
return;
243 const int nprow = grid.nprow;
244 const int npcol = grid.npcol;
245 const int myprow = grid.myprow;
246 const int mypcol = grid.mypcol;
252#pragma omp parallel default(shared)
256 global_row[
l_row] = BLOCK_SIZE * ((
l_row / BLOCK_SIZE) * nprow + myprow) +
257 (
l_row % BLOCK_SIZE);
261 global_col[
l_col] = BLOCK_SIZE * ((
l_col / BLOCK_SIZE) * npcol + mypcol) +
262 (
l_col % BLOCK_SIZE);
266 has_local_to_global =
true;
272 if (has_global_to_local)
return;
274 const int nprow = grid.nprow;
275 const int npcol = grid.npcol;
286 lld_list.resize(nprow);
288#pragma omp parallel default(shared)
305 for (
size_t myprow = 0; myprow <
size_t(nprow); ++myprow) {
313 lld_list[myprow] = (
locr < 1) ? 1 :
locr;
317 has_global_to_local =
true;
324 int xMB = BLOCK_SIZE;
325 int xNB = BLOCK_SIZE;
334 local_row_size_ =
static_cast<size_t>(
locr);
335 local_col_size_ =
static_cast<size_t>(
locc);
336 has_local_to_global =
false;
337 has_global_to_local =
false;
352 out <<
"Matrix: prow= " << grid.myprow <<
" pcol= " << grid.mypcol
353 <<
" local_size= " << local_size() <<
" lld(locr)= " << get_lld()
354 <<
" locc= " << local_size() / get_lld() <<
"\n";
360 for (
size_t i = 0;
i < V.size(); ++
i) V[
i] +=
rhs[
i];
367 for (
size_t i = 0;
i < V.size(); ++
i) V[
i] -=
rhs[
i];
373 for (
size_t i = 0;
i < V.size(); ++
i) V[
i] *=
rhs;
379 for (
size_t i = 0;
i < V.size(); ++
i) V[
i] /=
rhs;
384template <
typename UnaryOperation>
386 std::transform(V.begin(), V.end(), V.begin(),
op);
396 std::vector<int>
dest_rank(local_size());
397 std::vector<size_t> local_position(local_size());
399 for (
size_t i = 0;
i < local_size();
i++) {
406 local_position[
i] =
pos;
418 const size_t n = n_row() * n_col();
419 const size_t nr = n_row();
420 std::vector<C>
vec(
n);
422 for (
size_t i = 0;
i < local_size(); ++
i) {
426 return mpi_wrapper::allreduce_vec(
vec, get_comm());
436 return mpi_wrapper::allreduce_sum(
value, get_comm());
449 prep_local_to_global();
451 fout <<
"local_size= " << local_size() <<
"\n";
454 fout <<
"[global_row]\n";
456 fout <<
" " << global_row[
i];
457 if (
i % BLOCK_SIZE == BLOCK_SIZE - 1)
fout <<
"\n";
460 fout <<
"[global_col]\n";
462 fout <<
" " << global_col[
i];
463 if (
i % BLOCK_SIZE == BLOCK_SIZE - 1)
fout <<
"\n";
473 const std::vector<size_t>& local_position,
476 const int mpisize =
M.get_comm_size();
478 const size_t local_size =
M.local_size();
480 const C*
mat =
M.head();
485 assert(local_position.size() == local_size);
492 for (
int rank = 0; rank <
proc_size; ++rank) {
499 for (
size_t i = 0;
i < local_size; ++
i) {
505 for (
int rank = 0; rank < mpisize; ++rank) {
520 for (
int rank = 0; rank <
proc_size; ++rank) {
523 for (
size_t i = 0;
i < local_size; ++
i) {
555void replace_matrix_data(
const std::vector<C>& V,
const std::vector<int>&
dest_rank,
556 const std::vector<size_t>& local_position,
559 const int mpisize =
M_new.get_comm_size();
561 const size_t local_size = V.size();
563 const C*
mat = &(V[0]);
568 assert(local_position.size() == local_size);
575 for (
int rank = 0; rank <
proc_size; ++rank) {
582 for (
size_t i = 0;
i < local_size; ++
i) {
588 for (
int rank = 0; rank < mpisize; ++rank) {
603 for (
int rank = 0; rank <
proc_size; ++rank) {
606 for (
size_t i = 0;
i < local_size; ++
i) {
639 const std::vector<size_t>& local_position,
642 const int mpisize =
M.get_comm_size();
643 const int mpirank =
M.get_comm_rank();
644 const size_t local_size =
M.local_size();
648 assert(local_position.size() == local_size);
651 for (
size_t i = 0;
i < local_size; ++
i) {
656 const C*
mat =
M.head();
660 int dest = (mpirank +
step) % mpisize;
661 int source = (mpirank + mpisize -
step) % mpisize;
662 if (
dest == mpirank) {
663 for (
size_t i = 0;
i < local_size;
i++) {
680 for (
size_t i = 0;
i < local_size;
i++) {
708 for (
size_t i = 0;
i <
n; ++
i) {
720 for (
size_t i = 0;
i <
n; ++
i) {
729 const size_t n =
A.local_size();
733 for (
size_t i = 0;
i <
n; ++
i) {
Non-distributed matrix using LAPACK.
Definition matrix_lapack.hpp:44
Distributed matrix using ScaLAPACK.
Definition matrix_scalapack.hpp:47
Matrix()
Definition matrix_scalapack_impl.hpp:70
const MPI_Comm & get_comm() const
Definition matrix_scalapack_impl.hpp:121
size_t local_size() const
Definition matrix_scalapack_impl.hpp:113
std::string filename(const std::string &prefix, int proc_size)
Definition common.hpp:32
Define the type of a complex number.
std::complex< double > complex
Definition complex.hpp:38
mptensor::complex complex
Definition matrix_lapack.cc:36
void descinit_(int desca[], int *M, int *N, int *MB, int *NB, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info)
int numroc_(int *M, int *MB, int *prow, int *irsrc, int *nprow)
Wrapper functions of MPI communications.
Definition complex.hpp:34