mptensor  v0.3.0
Parallel Library for Tensor Network Methods
matrix_scalapack_impl.hpp
Go to the documentation of this file.
1 /*
2  mptensor - Parallel Library for Tensor Network Methods
3 
4  Copyright 2016 Satoshi Morita
5 
6  mptensor is free software: you can redistribute it and/or modify it
7  under the terms of the GNU Lesser General Public License as
8  published by the Free Software Foundation, either version 3 of the
9  License, or (at your option) any later version.
10 
11  mptensor is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with mptensor. If not, see
18  <https://www.gnu.org/licenses/>.
19 */
20 
28 #ifndef _MATRIX_SCALAPACK_IMPL_HPP_
29 #define _MATRIX_SCALAPACK_IMPL_HPP_
30 
31 #include <algorithm>
32 #include <cassert>
33 #include <cfloat>
34 #include <complex>
35 #include <iostream>
36 #include <fstream>
37 #include <vector>
38 #include <string>
39 
40 #include <mpi.h>
41 
42 #include "../complex.hpp"
43 #include "../mpi_wrapper.hpp"
44 #include "blacsgrid.hpp"
45 
46 /* ---------- PBLAS, SCALAPACK ---------- */
47 extern "C" {
48 int numroc_(int* M, int* MB, int* prow, int* irsrc, int* nprow);
49 void descinit_(int desca[], int* M, int* N, int* MB, int* NB, int* irsrc,
50  int* icsrc, int* ictxt, int* lld, int* info);
51 }
52 
53 namespace mptensor {
54 
56 namespace scalapack {
57 
59 
63 /* ---------- static member variables ---------- */
64 template <typename C>
65 const size_t Matrix<C>::BLOCK_SIZE = 16;
66 
67 /* ---------- constructors ---------- */
68 
69 template <typename C>
70 Matrix<C>::Matrix() : grid(MPI_COMM_WORLD), desc(9) {
71  init(0, 0);
72 };
73 
74 template <typename C>
75 Matrix<C>::Matrix(const MPI_Comm& comm) : grid(comm), desc(9) {
76  init(0, 0);
77 };
78 
79 template <typename C>
80 Matrix<C>::Matrix(size_t n_row, size_t n_col) : grid(MPI_COMM_WORLD), desc(9) {
81  init(n_row, n_col);
82 };
83 
84 template <typename C>
85 Matrix<C>::Matrix(const MPI_Comm& comm, size_t n_row, size_t n_col)
86  : grid(comm), desc(9) {
87  init(n_row, n_col);
88 };
89 
90 /* ---------- member functions ---------- */
91 
92 template <typename C>
93 inline const C& Matrix<C>::operator[](size_t i) const {
94  return V[i];
95 }
96 
97 template <typename C>
98 inline C& Matrix<C>::operator[](size_t i) {
99  return V[i];
100 }
101 
102 template <typename C>
103 inline const C* Matrix<C>::head() const {
104  return &(V[0]);
105 }
106 
107 template <typename C>
108 inline C* Matrix<C>::head() {
109  return &(V[0]);
110 }
111 
112 template <typename C>
113 inline size_t Matrix<C>::local_size() const {
114  return V.size();
115 }
116 
117 // template <typename C> inline
118 // const BlacsGrid& Matrix<C>::get_grid() const { return grid; }
119 
120 template <typename C>
121 inline const MPI_Comm& Matrix<C>::get_comm() const {
122  return grid.comm;
123 }
124 
125 template <typename C>
126 inline int Matrix<C>::get_comm_size() const {
127  return grid.mpisize;
128 }
129 
130 template <typename C>
131 inline int Matrix<C>::get_comm_rank() const {
132  return grid.myrank;
133 }
134 
135 template <typename C>
136 inline const int* Matrix<C>::descriptor() const {
137  return &(desc[0]);
138 }
139 
140 // template <typename C> inline
141 // int Matrix<C>::blacs_context() const { return desc[1]; }
142 
143 template <typename C>
144 inline int Matrix<C>::n_row() const {
145  return desc[2];
146 }
147 
148 template <typename C>
149 inline int Matrix<C>::n_col() const {
150  return desc[3];
151 }
152 
153 // template <typename C> inline
154 // int Matrix<C>::nb_row() const { return desc[4]; }
155 
156 // template <typename C> inline
157 // int Matrix<C>::nb_col() const { return desc[5]; }
158 
159 template <typename C>
160 inline int Matrix<C>::get_lld() const {
161  return desc[8];
162 }
163 
164 template <typename C>
165 inline void Matrix<C>::global_index(size_t i, size_t& g_row,
166  size_t& g_col) const {
167  const int lld = get_lld();
168  const size_t l_row = i % lld;
169  const size_t l_col = i / lld;
170  prep_local_to_global();
171  g_row = global_row[l_row];
172  g_col = global_col[l_col];
173 };
174 
175 template <typename C>
176 inline bool Matrix<C>::local_index(size_t g_row, size_t g_col,
177  size_t& i) const {
178  const int lld = get_lld();
179  const int nprow = grid.nprow;
180  const int npcol = grid.npcol;
181  const int myprow = grid.myprow;
182  const int mypcol = grid.mypcol;
183 
184  size_t b_row = g_row / BLOCK_SIZE;
185  size_t p_row = b_row % nprow;
186  size_t b_col = g_col / BLOCK_SIZE;
187  size_t p_col = b_col % npcol;
188  if (p_row != size_t(myprow) || p_col != size_t(mypcol)) return false;
189 
190  size_t l_row = (b_row / nprow) * BLOCK_SIZE + g_row % BLOCK_SIZE;
191  size_t l_col = (b_col / npcol) * BLOCK_SIZE + g_col % BLOCK_SIZE;
192  i = l_row + l_col * lld; // column major
193  return true;
194 };
195 
196 template <typename C>
197 inline void Matrix<C>::local_position(size_t g_row, size_t g_col,
198  int& comm_rank, size_t& lindex) const {
199  prep_global_to_local();
200  const int myprow = proc_row[g_row];
201  const int mypcol = proc_col[g_col];
202  comm_rank = grid.mpirank(myprow, mypcol);
203  lindex = local_row[g_row] + local_col[g_col] * lld_list[myprow];
204 }
205 
206 template <typename C>
207 inline size_t Matrix<C>::local_row_size() const {
208  return local_row_size_;
209 }
210 
211 template <typename C>
212 inline size_t Matrix<C>::local_col_size() const {
213  return local_col_size_;
214 }
215 
216 template <typename C>
217 inline size_t Matrix<C>::local_row_index(size_t lindex) const {
218  return lindex % (static_cast<size_t>(get_lld()));
219 }
220 
221 template <typename C>
222 inline size_t Matrix<C>::local_col_index(size_t lindex) const {
223  return lindex / (static_cast<size_t>(get_lld()));
224 }
225 
226 template <typename C>
227 inline size_t Matrix<C>::global_row_index(size_t l_row) const {
228  return BLOCK_SIZE * ((l_row / BLOCK_SIZE) * grid.nprow + grid.myprow) +
229  (l_row % BLOCK_SIZE);
230 }
231 
232 template <typename C>
233 inline size_t Matrix<C>::global_col_index(size_t l_col) const {
234  return BLOCK_SIZE * ((l_col / BLOCK_SIZE) * grid.npcol + grid.mypcol) +
235  (l_col % BLOCK_SIZE);
236 }
237 
239 template <typename C>
240 inline void Matrix<C>::prep_local_to_global() const {
241  if (has_local_to_global) return;
242 
243  const int nprow = grid.nprow;
244  const int npcol = grid.npcol;
245  const int myprow = grid.myprow;
246  const int mypcol = grid.mypcol;
247  const size_t size_row = get_lld();
248  const size_t size_col = local_size() / size_row;
249  global_row.resize(size_row);
250  global_col.resize(size_col);
251 
252 #pragma omp parallel default(shared)
253  {
254 #pragma omp for
255  for (size_t l_row = 0; l_row < size_row; ++l_row) {
256  global_row[l_row] = BLOCK_SIZE * ((l_row / BLOCK_SIZE) * nprow + myprow) +
257  (l_row % BLOCK_SIZE);
258  }
259 #pragma omp for
260  for (size_t l_col = 0; l_col < size_col; ++l_col) {
261  global_col[l_col] = BLOCK_SIZE * ((l_col / BLOCK_SIZE) * npcol + mypcol) +
262  (l_col % BLOCK_SIZE);
263  }
264  }
265 
266  has_local_to_global = true;
267 }
268 
270 template <typename C>
271 inline void Matrix<C>::prep_global_to_local() const {
272  if (has_global_to_local) return;
273 
274  const int nprow = grid.nprow;
275  const int npcol = grid.npcol;
276  const size_t size_row = n_row();
277  const size_t size_col = n_col();
278  const size_t nblocks = size_row / BLOCK_SIZE;
279  const size_t extra_blocks = nblocks % nprow;
280  const size_t locr_offset = (nblocks / nprow) * BLOCK_SIZE;
281 
282  local_row.resize(size_row);
283  local_col.resize(size_col);
284  proc_row.resize(size_row);
285  proc_col.resize(size_col);
286  lld_list.resize(nprow);
287 
288 #pragma omp parallel default(shared)
289  {
290 #pragma omp for
291  for (size_t g_row = 0; g_row < size_row; ++g_row) {
292  size_t b_row = g_row / BLOCK_SIZE;
293  proc_row[g_row] = b_row % nprow;
294  local_row[g_row] = (b_row / nprow) * BLOCK_SIZE + g_row % BLOCK_SIZE;
295  }
296 
297 #pragma omp for
298  for (size_t g_col = 0; g_col < size_col; ++g_col) {
299  size_t b_col = g_col / BLOCK_SIZE;
300  proc_col[g_col] = b_col % npcol;
301  local_col[g_col] = (b_col / npcol) * BLOCK_SIZE + g_col % BLOCK_SIZE;
302  }
303 
304 #pragma omp for
305  for (size_t myprow = 0; myprow < size_t(nprow); ++myprow) {
306  // inline expansion of numroc_()
307  size_t locr = locr_offset;
308  if (myprow < extra_blocks)
309  locr += BLOCK_SIZE;
310  else if (myprow == extra_blocks)
311  locr += size_row % BLOCK_SIZE;
312 
313  lld_list[myprow] = (locr < 1) ? 1 : locr;
314  }
315  }
316 
317  has_global_to_local = true;
318 };
319 
320 template <typename C>
321 void Matrix<C>::init(size_t n_row, size_t n_col) {
322  int xM = n_row;
323  int xN = n_col;
324  int xMB = BLOCK_SIZE;
325  int xNB = BLOCK_SIZE;
326  int irsrc = 0, icsrc = 0;
327  int locr = numroc_(&xM, &xMB, &(grid.myprow), &irsrc, &(grid.nprow));
328  int locc = numroc_(&xN, &xNB, &(grid.mypcol), &icsrc, &(grid.npcol));
329  int lld = (locr < 1) ? 1 : locr;
330  int info;
331  descinit_(&(desc[0]), &xM, &xN, &xMB, &xNB, &irsrc, &icsrc, &(grid.ictxt),
332  &lld, &info);
333  V.resize(locr * locc);
334  local_row_size_ = static_cast<size_t>(locr);
335  local_col_size_ = static_cast<size_t>(locc);
336  has_local_to_global = false;
337  has_global_to_local = false;
338 };
339 
340 // template <typename C>
341 // void Matrix<C>::set(C (*element)(size_t gi, size_t gj)) {
342 // const size_t& length = V.size();
343 // for (size_t i=0;i<length;++i) {
344 // size_t gi, gj;
345 // global_index(i, gi, gj);
346 // V[i] = element(gi, gj);
347 // }
348 // };
349 
350 template <typename C>
351 inline void Matrix<C>::print_info(std::ostream& out) const {
352  out << "Matrix: prow= " << grid.myprow << " pcol= " << grid.mypcol
353  << " local_size= " << local_size() << " lld(locr)= " << get_lld()
354  << " locc= " << local_size() / get_lld() << "\n";
355 };
356 
357 template <typename C>
359  assert(V.size() == rhs.local_size());
360  for (size_t i = 0; i < V.size(); ++i) V[i] += rhs[i];
361  return *this;
362 };
363 
364 template <typename C>
366  assert(V.size() == rhs.local_size());
367  for (size_t i = 0; i < V.size(); ++i) V[i] -= rhs[i];
368  return *this;
369 };
370 
371 template <typename C>
373  for (size_t i = 0; i < V.size(); ++i) V[i] *= rhs;
374  return *this;
375 };
376 
377 template <typename C>
379  for (size_t i = 0; i < V.size(); ++i) V[i] /= rhs;
380  return *this;
381 };
382 
383 template <typename C>
384 template <typename UnaryOperation>
385 Matrix<C>& Matrix<C>::map(UnaryOperation op) {
386  std::transform(V.begin(), V.end(), V.begin(), op);
387  return *this;
388 };
389 
390 template <typename C>
392  /* new matrix */
393  Matrix<C> M_new(get_comm(), n_col(), n_row());
394 
395  /* create lists of local position and destination rank */
396  std::vector<int> dest_rank(local_size());
397  std::vector<unsigned long int> local_position(local_size());
398 
399  for (size_t i = 0; i < local_size(); i++) {
400  int dest;
401  size_t pos, g_col, g_row;
402 
403  global_index(i, g_row, g_col);
404  M_new.local_position(g_col, g_row, dest, pos);
405 
406  local_position[i] = pos;
407  dest_rank[i] = dest;
408  }
409 
410  /* exchange data */
411  replace_matrix_data((*this), dest_rank, local_position, M_new);
412 
413  return M_new;
414 }
415 
416 template <typename C>
417 inline std::vector<C> Matrix<C>::flatten() {
418  const size_t n = n_row() * n_col();
419  const size_t nr = n_row();
420  std::vector<C> vec(n);
421  size_t g_row, g_col;
422  for (size_t i = 0; i < local_size(); ++i) {
423  global_index(i, g_row, g_col);
424  vec[g_row + g_col * nr] = V[i];
425  }
426  return mpi_wrapper::allreduce_vec(vec, get_comm());
427 };
428 
429 template <typename C>
430 inline void Matrix<C>::barrier() const {
431  MPI_Barrier(get_comm());
432 }
433 
434 template <typename C>
435 inline C Matrix<C>::allreduce_sum(C value) const {
436  return mpi_wrapper::allreduce_sum(value, get_comm());
437 }
438 
439 template <typename C>
440 template <typename D>
441 inline void Matrix<C>::bcast(D* buffer, int count, int root) const {
442  mpi_wrapper::bcast(buffer, count, root, get_comm());
443 }
444 
445 template <typename C>
446 void Matrix<C>::save_index(const std::string &filename) const {
447  const size_t size_row = get_lld();
448  const size_t size_col = local_size() / size_row;
449  prep_local_to_global();
450  std::ofstream fout(filename);
451  fout << "local_size= " << local_size() << "\n";
452  fout << "local_n_row= " << size_row << "\n";
453  fout << "local_n_col= " << size_col << "\n";
454  fout << "[global_row]\n";
455  for (size_t i = 0; i < size_row; ++i) {
456  fout << " " << global_row[i];
457  if (i % BLOCK_SIZE == BLOCK_SIZE - 1) fout << "\n";
458  }
459  fout << "\n";
460  fout << "[global_col]\n";
461  for (size_t i = 0; i < size_col; ++i) {
462  fout << " " << global_col[i];
463  if (i % BLOCK_SIZE == BLOCK_SIZE - 1) fout << "\n";
464  }
465  fout << "\n";
466  fout.close();
467 }
468 
469 /* ---------- non-member functions ---------- */
470 
471 template <typename C>
472 void replace_matrix_data(const Matrix<C>& M, const std::vector<int>& dest_rank,
473  const std::vector<size_t>& local_position,
474  Matrix<C>& M_new) {
475  const MPI_Comm comm = M.get_comm();
476  const int mpisize = M.get_comm_size();
477  // const int mpirank = M.get_comm_rank();
478  const size_t local_size = M.local_size();
479 
480  const C* mat = M.head();
481  C* mat_new = M_new.head();
482 
483  // assert(send_size_list.size() == mpisize);
484  assert(dest_rank.size() == local_size);
485  assert(local_position.size() == local_size);
486 
487  const int proc_size = mpisize + 1;
488  int* send_counts = new int[proc_size];
489  int* send_displs = new int[proc_size];
490  int* recv_counts = new int[proc_size];
491  int* recv_displs = new int[proc_size];
492  for (int rank = 0; rank < proc_size; ++rank) {
493  send_counts[rank] = 0;
494  send_displs[rank] = 0;
495  recv_counts[rank] = 0;
496  recv_displs[rank] = 0;
497  }
498 
499  for (size_t i = 0; i < local_size; ++i) {
500  send_counts[dest_rank[i]] += 1;
501  }
502 
503  MPI_Alltoall(send_counts, 1, MPI_INT, recv_counts, 1, MPI_INT, comm);
504 
505  for (int rank = 0; rank < mpisize; ++rank) {
506  send_displs[rank + 1] = send_counts[rank] + send_displs[rank];
507  recv_displs[rank + 1] = recv_counts[rank] + recv_displs[rank];
508  }
509 
510  const int send_size = local_size; // send_displs[mpisize];
511  const int recv_size = recv_displs[mpisize];
512  unsigned long int* send_pos = new unsigned long int[send_size];
513  unsigned long int* recv_pos = new unsigned long int[recv_size];
514  C* send_value = new C[send_size];
515  C* recv_value = new C[recv_size];
516 
517  /* Pack */
518  // std::vector<int> pack_idx = send_displs;
519  int* pack_idx = new int[proc_size];
520  for (int rank = 0; rank < proc_size; ++rank) {
521  pack_idx[rank] = send_displs[rank];
522  }
523  for (size_t i = 0; i < local_size; ++i) {
524  const int rank = dest_rank[i];
525  const int idx = pack_idx[rank];
526  send_value[idx] = mat[i];
527  send_pos[idx] = local_position[i];
528  pack_idx[rank] += 1;
529  }
530  delete[] pack_idx;
531 
532  /* Send and Recieve */
533  mpi_wrapper::alltoallv(send_pos, send_counts, send_displs, recv_pos,
534  recv_counts, recv_displs, comm);
535  mpi_wrapper::alltoallv(send_value, send_counts, send_displs, recv_value,
536  recv_counts, recv_displs, comm);
537 
538  /* Unpack */
539  for (int i = 0; i < recv_size; ++i) {
540  mat_new[recv_pos[i]] = recv_value[i];
541  }
542 
543  delete[] send_pos;
544  delete[] send_value;
545  delete[] recv_pos;
546  delete[] recv_value;
547 
548  delete[] send_counts;
549  delete[] send_displs;
550  delete[] recv_counts;
551  delete[] recv_displs;
552 }
553 
554 template <typename C>
555 void replace_matrix_data(const std::vector<C>& V, const std::vector<int>& dest_rank,
556  const std::vector<size_t>& local_position,
557  Matrix<C>& M_new) {
558  const MPI_Comm comm = M_new.get_comm();
559  const int mpisize = M_new.get_comm_size();
560  // const int mpirank = M.get_comm_rank();
561  const size_t local_size = V.size();
562 
563  const C* mat = &(V[0]);
564  C* mat_new = M_new.head();
565 
566  // assert(send_size_list.size() == mpisize);
567  assert(dest_rank.size() == local_size);
568  assert(local_position.size() == local_size);
569 
570  const int proc_size = mpisize + 1;
571  int* send_counts = new int[proc_size];
572  int* send_displs = new int[proc_size];
573  int* recv_counts = new int[proc_size];
574  int* recv_displs = new int[proc_size];
575  for (int rank = 0; rank < proc_size; ++rank) {
576  send_counts[rank] = 0;
577  send_displs[rank] = 0;
578  recv_counts[rank] = 0;
579  recv_displs[rank] = 0;
580  }
581 
582  for (size_t i = 0; i < local_size; ++i) {
583  send_counts[dest_rank[i]] += 1;
584  }
585 
586  MPI_Alltoall(send_counts, 1, MPI_INT, recv_counts, 1, MPI_INT, comm);
587 
588  for (int rank = 0; rank < mpisize; ++rank) {
589  send_displs[rank + 1] = send_counts[rank] + send_displs[rank];
590  recv_displs[rank + 1] = recv_counts[rank] + recv_displs[rank];
591  }
592 
593  const int send_size = local_size; // send_displs[mpisize];
594  const int recv_size = recv_displs[mpisize];
595  unsigned long int* send_pos = new unsigned long int[send_size];
596  unsigned long int* recv_pos = new unsigned long int[recv_size];
597  C* send_value = new C[send_size];
598  C* recv_value = new C[recv_size];
599 
600  /* Pack */
601  // std::vector<int> pack_idx = send_displs;
602  int* pack_idx = new int[proc_size];
603  for (int rank = 0; rank < proc_size; ++rank) {
604  pack_idx[rank] = send_displs[rank];
605  }
606  for (size_t i = 0; i < local_size; ++i) {
607  const int rank = dest_rank[i];
608  const int idx = pack_idx[rank];
609  send_value[idx] = mat[i];
610  send_pos[idx] = local_position[i];
611  pack_idx[rank] += 1;
612  }
613  delete[] pack_idx;
614 
615  /* Send and Recieve */
616  mpi_wrapper::alltoallv(send_pos, send_counts, send_displs, recv_pos,
617  recv_counts, recv_displs, comm);
618  mpi_wrapper::alltoallv(send_value, send_counts, send_displs, recv_value,
619  recv_counts, recv_displs, comm);
620 
621  /* Unpack */
622  for (int i = 0; i < recv_size; ++i) {
623  mat_new[recv_pos[i]] = recv_value[i];
624  }
625 
626  delete[] send_pos;
627  delete[] send_value;
628  delete[] recv_pos;
629  delete[] recv_value;
630 
631  delete[] send_counts;
632  delete[] send_displs;
633  delete[] recv_counts;
634  delete[] recv_displs;
635 }
636 
637 template <typename C>
638 void sum_matrix_data(const Matrix<C>& M, const std::vector<int>& dest_rank,
639  const std::vector<size_t>& local_position,
640  Matrix<C>& M_new) {
641  const MPI_Comm comm = M.get_comm();
642  const int mpisize = M.get_comm_size();
643  const int mpirank = M.get_comm_rank();
644  const size_t local_size = M.local_size();
645 
646  // assert(send_size_list.size() == mpisize);
647  assert(dest_rank.size() == local_size);
648  assert(local_position.size() == local_size);
649 
650  std::vector<int> send_size_list(mpisize, 0);
651  for (size_t i = 0; i < local_size; ++i) {
652  const int rank = dest_rank[i];
653  if (rank >= 0 && rank < mpisize) send_size_list[dest_rank[i]] += 1;
654  }
655 
656  const C* mat = M.head();
657  C* mat_new = M_new.head();
658 
659  for (int step = 0; step < mpisize; ++step) {
660  int dest = (mpirank + step) % mpisize;
661  int source = (mpirank + mpisize - step) % mpisize;
662  if (dest == mpirank) {
663  for (size_t i = 0; i < local_size; i++) {
664  if (dest_rank[i] == mpirank) {
665  mat_new[local_position[i]] += mat[i];
666  }
667  }
668  } else {
669  /* Get recv_size */
670  int send_size = send_size_list[dest];
671  int recv_size;
672  MPI_Status status;
673  int tag = step;
674  MPI_Sendrecv(&send_size, 1, MPI_INT, dest, tag, &recv_size, 1, MPI_INT,
675  source, tag, comm, &status);
676 
677  /* Pack */
678  std::vector<unsigned long int> send_pos(send_size);
679  std::vector<C> send_value(send_size);
680  size_t k = 0;
681  for (size_t i = 0; i < local_size; i++) {
682  if (dest_rank[i] == dest) {
683  send_pos[k] = local_position[i];
684  send_value[k] = mat[i];
685  k += 1;
686  }
687  }
688 
689  /* Send and Recieve */
690  std::vector<unsigned long int> recv_pos(recv_size);
691  std::vector<C> recv_value(recv_size);
692  MPI_Sendrecv(&(send_pos[0]), send_size, MPI_UNSIGNED_LONG, dest, tag,
693  &(recv_pos[0]), recv_size, MPI_UNSIGNED_LONG, source, tag,
694  comm, &status);
695  mpi_wrapper::send_recv_vector(send_value, dest, tag, recv_value, source,
696  tag, comm, status);
697 
698  /* Unpack */
699  for (int i = 0; i < recv_size; ++i) {
700  mat_new[recv_pos[i]] += recv_value[i];
701  }
702  }
703  }
704 }
705 
706 template <typename C>
707 double max_abs(const Matrix<C>& a) {
708  const size_t n = a.local_size();
709  double send = 0.0;
710  double recv;
711  for (size_t i = 0; i < n; ++i) {
712  send = std::max(send, std::abs(a[i]));
713  }
714  MPI_Allreduce(&send, &recv, 1, MPI_DOUBLE, MPI_MAX, a.get_comm());
715  return recv;
716 };
717 
718 template <typename C>
719 double min_abs(const Matrix<C>& a) {
720  const size_t n = a.local_size();
721  double send = DBL_MAX;
722  double recv;
723  for (size_t i = 0; i < n; ++i) {
724  send = std::min(send, std::abs(a[i]));
725  }
726  MPI_Allreduce(&send, &recv, 1, MPI_DOUBLE, MPI_MIN, a.get_comm());
727  return recv;
728 };
729 
730 } // namespace scalapack
731 } // namespace mptensor
732 
733 #endif // _MATRIX_SCALAPACK_IMPL_HPP_
BlacsGrid class.
Non-distributed matrix using LAPACK.
Definition: matrix_lapack.hpp:44
Distributed matrix using ScaLAPACK.
Definition: matrix_scalapack.hpp:47
int n_row() const
Definition: matrix_scalapack_impl.hpp:144
Matrix & operator*=(C rhs)
Definition: matrix_scalapack_impl.hpp:372
const int * descriptor() const
Definition: matrix_scalapack_impl.hpp:136
void bcast(D *buffer, int count, int root) const
Definition: matrix_scalapack_impl.hpp:441
void print_info(std::ostream &) const
Definition: matrix_scalapack_impl.hpp:351
void prep_local_to_global() const
Preprocess for fast conversion from local index to global one.
Definition: matrix_scalapack_impl.hpp:240
size_t local_row_index(size_t lindex) const
Definition: matrix_scalapack_impl.hpp:217
bool local_index(size_t g_row, size_t g_col, size_t &i) const
Definition: matrix_scalapack_impl.hpp:176
void global_index(size_t i, size_t &g_row, size_t &g_col) const
Definition: matrix_scalapack_impl.hpp:165
int n_col() const
Definition: matrix_scalapack_impl.hpp:149
size_t local_row_size() const
Definition: matrix_scalapack_impl.hpp:207
Matrix & operator-=(const Matrix &rhs)
Definition: matrix_scalapack_impl.hpp:365
const C * head() const
Definition: matrix_scalapack_impl.hpp:103
Matrix()
Definition: matrix_scalapack_impl.hpp:70
Matrix & map(UnaryOperation op)
void init(size_t n_row, size_t n_col)
Definition: matrix_scalapack_impl.hpp:321
C allreduce_sum(C value) const
Definition: matrix_scalapack_impl.hpp:435
std::vector< C > flatten()
Definition: matrix_scalapack_impl.hpp:417
const MPI_Comm & get_comm() const
Definition: matrix_scalapack_impl.hpp:121
Matrix & operator/=(C rhs)
Definition: matrix_scalapack_impl.hpp:378
void barrier() const
Definition: matrix_scalapack_impl.hpp:430
void local_position(size_t g_row, size_t g_col, int &comm_rank, size_t &lindex) const
Definition: matrix_scalapack_impl.hpp:197
const Matrix transpose()
Definition: matrix_scalapack_impl.hpp:391
void save_index(const std::string &filename) const
Definition: matrix_scalapack_impl.hpp:446
size_t local_col_index(size_t lindex) const
Definition: matrix_scalapack_impl.hpp:222
size_t local_size() const
Definition: matrix_scalapack_impl.hpp:113
size_t global_row_index(size_t lindex_row) const
Definition: matrix_scalapack_impl.hpp:227
int get_comm_size() const
Definition: matrix_scalapack_impl.hpp:126
int get_comm_rank() const
Definition: matrix_scalapack_impl.hpp:131
void prep_global_to_local() const
Preprocess for fast conversion from global index to local one.
Definition: matrix_scalapack_impl.hpp:271
Matrix & operator+=(const Matrix &rhs)
Definition: matrix_scalapack_impl.hpp:358
const C & operator[](size_t i) const
Definition: matrix_scalapack_impl.hpp:93
size_t local_col_size() const
Definition: matrix_scalapack_impl.hpp:212
size_t global_col_index(size_t lindex_col) const
Definition: matrix_scalapack_impl.hpp:233
std::string filename(const std::string &prefix, int proc_size)
Definition: common.hpp:32
void descinit_(int desca[], int *M, int *N, int *MB, int *NB, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info)
int numroc_(int *M, int *MB, int *prow, int *irsrc, int *nprow)
double min(const Matrix< C > &a)
double max(const Matrix< C > &a)
std::vector< C > allreduce_vec(const std::vector< C > &vec, const MPI_Comm &comm)
Calculate a summation of each element of vector over MPI communicator.
void bcast(C *buffer, int count, int root, const MPI_Comm &comm)
Wrapper of MPI_Bcast.
void alltoallv(const C *sendbuf, const int *sendcounts, const int *sdispls, C *recvbuf, const int *recvcounts, const int *rdispls, const MPI_Comm &comm)
Wrapper of MPI_Alltoallv.
void send_recv_vector(const std::vector< C > &send_vec, int dest, int sendtag, std::vector< C > &recv_vec, int source, int recvtag, const MPI_Comm &comm, MPI_Status &status)
Wrapper of MPI_Sendrecv.
C allreduce_sum(C val, const MPI_Comm &comm)
Calculate a summation over MPI communicator.
double min_abs(const Matrix< C > &a)
Definition: matrix_scalapack_impl.hpp:719
void replace_matrix_data(const Matrix< C > &M, const std::vector< int > &dest_rank, const std::vector< size_t > &local_position, Matrix< C > &M_new)
Definition: matrix_scalapack_impl.hpp:472
void sum_matrix_data(const Matrix< C > &M, const std::vector< int > &dest_rank, const std::vector< size_t > &local_position, Matrix< C > &M_new)
Definition: matrix_scalapack_impl.hpp:638
double max_abs(const Matrix< C > &a)
Definition: matrix_scalapack_impl.hpp:707
Definition: complex.hpp:34