Hipace
HpMultiGrid.H
Go to the documentation of this file.
1 /* Copyright 2022
2  *
3  * This file is part of HiPACE++.
4  *
5  * Authors: Weiqun Zhang
6  * License: BSD-3-Clause-LBNL
7  */
8 #ifndef HIPACE_MULTIGRID_H_
9 #define HIPACE_MULTIGRID_H_
10 
12 #include <AMReX_FArrayBox.H>
13 #include <AMReX_Geometry.H>
14 #include <type_traits>
15 
17 namespace hpmg {
18 
33 class MultiGrid
34 {
35 public:
36 
43  explicit MultiGrid (amrex::Real dx, amrex::Real dy, amrex::Box a_domain);
44 
46  ~MultiGrid ();
47 
59  void solve1 (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs, amrex::FArrayBox const& acoef,
60  amrex::Real const tol_rel, amrex::Real const tol_abs, int const nummaxiter,
61  int const verbose);
62 
75  void solve2 (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs,
76  amrex::Real const acoef_real, amrex::Real const acoef_imag,
77  amrex::Real const tol_rel, amrex::Real const tol_abs,
78  int const nummaxiter, int const verbose);
79 
92  void solve2 (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs,
93  amrex::Real const acoef_real, amrex::FArrayBox const& acoef_imag,
94  amrex::Real const tol_rel, amrex::Real const tol_abs,
95  int const nummaxiter, int const verbose);
96 
109  void solve2 (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs,
110  amrex::FArrayBox const& acoef_real, amrex::Real const acoef_imag,
111  amrex::Real const tol_rel, amrex::Real const tol_abs,
112  int const nummaxiter, int const verbose);
113 
126  void solve2 (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs,
127  amrex::FArrayBox const& acoef_real, amrex::FArrayBox const& acoef_imag,
128  amrex::Real const tol_rel, amrex::Real const tol_abs,
129  int const nummaxiter, int const verbose);
130 
134  void average_down_acoef ();
137  void vcycle ();
141  void bottomsolve ();
144  void solve_doit (amrex::FArrayBox& sol, amrex::FArrayBox const& rhs,
145  amrex::Real const tol_rel, amrex::Real const tol_abs,
146  int const nummaxiter, int const verbose);
150  static amrex::Box center_box (amrex::Box in_box, amrex::Box domain) {
151  amrex::Box out_box = amrex::makeSlab(
152  in_box + (domain.smallEnd() + domain.bigEnd() - in_box.smallEnd() - in_box.bigEnd())/2,
153  2, 0);
154  out_box.setType(domain.ixType());
155  AMREX_ALWAYS_ASSERT(out_box.contains(domain));
156  return out_box;
157  }
158 
159 private:
160 
161  static constexpr int m_num_system_types = 2;
162  int m_system_type = 0;
163 
167  amrex::Real m_dx, m_dy;
168 
179 
184 
186  static constexpr int nfabvs = 4;
195 
201 
206 
207 #if defined(AMREX_USE_CUDA)
209  bool m_cuda_graph_acf_created[m_num_system_types] = {false,false};
210  cudaGraph_t m_cuda_graph_acf[m_num_system_types] = {NULL,NULL};
211  cudaGraphExec_t m_cuda_graph_exe_acf[m_num_system_types] = {NULL,NULL};
212 
214  bool m_cuda_graph_vcycle_created[m_num_system_types] = {false,false};
215  cudaGraph_t m_cuda_graph_vcycle[m_num_system_types] = {NULL,NULL};
216  cudaGraphExec_t m_cuda_graph_exe_vcycle[m_num_system_types] = {NULL,NULL};
217 #endif
218 };
219 
220 #if defined(AMREX_USE_GPU) || !defined(AMREX_USE_OMP)
221 
222 using amrex::ParallelFor;
223 
224 #else
225 
226 // amrex::ParallelFor does not do OpenMP. Thus we have hpmg::ParallelFor.
227 
228 template <typename T, typename F>
229 void ParallelFor (T n, F&& f) noexcept
230 {
231  if (n < 1000) {
232  for (T i = 0; i < n; ++i) {
233  f(i);
234  }
235  } else {
236 #pragma omp parallel for simd
237  for (T i = 0; i < n; ++i) {
238  f(i);
239  }
240  }
241 }
242 
243 template <typename F>
244 void ParallelFor (amrex::Box const& box, F&& f) noexcept
245 {
246  if (box.numPts() < 1000) {
247  const auto lo = amrex::lbound(box);
248  const auto hi = amrex::ubound(box);
249  for (int k = lo.z; k <= hi.z; ++k) {
250  for (int j = lo.y; j <= hi.y; ++j) {
251 #pragma omp simd
252  for (int i = lo.x; i <= hi.x; ++i) {
253  f(i,j,k);
254  }}}
255  } else {
256  const auto lo = amrex::lbound(box);
257  const auto hi = amrex::ubound(box);
258 
259  constexpr int tilesize = 8;
260 #pragma omp parallel for collapse(3)
261  for (int k = lo.z; k <= hi.z; ++k) {
262  for (int jt = lo.y; jt <= hi.y; jt+=tilesize) {
263  for (int it = lo.x; it <= hi.x; it+=tilesize) {
264  for (int j = jt; j < jt+tilesize && j<=hi.y; ++j) {
265  for (int i = it; i < it+tilesize && i<=hi.x; ++i) {
266  f(i,j,k);
267  }}}}}
268  }
269 }
270 
271 template <typename F>
272 void ParallelFor (amrex::Box const& box, int ncomp, F&& f) noexcept
273 {
274  if (box.numPts()*ncomp < 1000) {
275  const auto lo = amrex::lbound(box);
276  const auto hi = amrex::ubound(box);
277  for (int n = 0; n < ncomp; ++n) {
278  for (int k = lo.z; k <= hi.z; ++k) {
279  for (int j = lo.y; j <= hi.y; ++j) {
280 #pragma omp simd
281  for (int i = lo.x; i <= hi.x; ++i) {
282  f(i,j,k,n);
283  }}}
284  }
285  } else {
286  const auto lo = amrex::lbound(box);
287  const auto hi = amrex::ubound(box);
288 
289  constexpr int tilesize = 8;
290 #pragma omp parallel for collapse(4)
291  for (int n = 0; n < ncomp; ++n) {
292  for (int k = lo.z; k <= hi.z; ++k) {
293  for (int jt = lo.y; jt <= hi.y; jt+=tilesize) {
294  for (int it = lo.x; it <= hi.x; it+=tilesize) {
295  for (int j = jt; j < jt+tilesize && j<=hi.y; ++j) {
296  for (int i = it; i < it+tilesize && i<=hi.x; ++i) {
297  f(i,j,k,n);
298  }}}}}
299  }
300  }
301 }
302 
303 #endif
304 
305 }
306 
307 #endif
#define AMREX_ALWAYS_ASSERT(EX)
AMREX_GPU_HOST_DEVICE const IntVect & smallEnd() const &noexcept
AMREX_GPU_HOST_DEVICE bool contains(const IntVect &p) const noexcept
AMREX_GPU_HOST_DEVICE IndexType ixType() const noexcept
AMREX_GPU_HOST_DEVICE Box & setType(const IndexType &t) noexcept
AMREX_GPU_HOST_DEVICE const IntVect & bigEnd() const &noexcept
Multigrid solver.
Definition: HpMultiGrid.H:34
amrex::Vector< amrex::Box > m_domain
Definition: HpMultiGrid.H:165
amrex::Real m_dx
Definition: HpMultiGrid.H:167
static amrex::Box center_box(amrex::Box in_box, amrex::Box domain)
Centers the input box in x and y around the domain so that only the ghost cells "overhang"....
Definition: HpMultiGrid.H:150
amrex::Vector< amrex::FArrayBox > m_cor
Definition: HpMultiGrid.H:192
amrex::Array4< amrex::Real > const * m_acf_a
Definition: HpMultiGrid.H:197
amrex::FArrayBox m_sol
Definition: HpMultiGrid.H:181
void average_down_acoef()
Average down the coefficient. Ideally, this function is not supposed to be a public function....
Definition: HpMultiGrid.cpp:1216
amrex::Array4< amrex::Real > const * m_res_a
Definition: HpMultiGrid.H:198
amrex::Array4< amrex::Real > const * m_rescor_a
Definition: HpMultiGrid.H:200
static constexpr int nfabvs
Definition: HpMultiGrid.H:186
int m_single_block_level_begin
Definition: HpMultiGrid.H:172
bool m_use_single_block_kernel
Definition: HpMultiGrid.H:178
amrex::FArrayBox m_rhs
Definition: HpMultiGrid.H:183
amrex::Gpu::DeviceVector< amrex::Array4< amrex::Real > > m_d_array4
Definition: HpMultiGrid.H:205
void solve_doit(amrex::FArrayBox &sol, amrex::FArrayBox const &rhs, amrex::Real const tol_rel, amrex::Real const tol_abs, int const nummaxiter, int const verbose)
Private function used by solve1 and solve2. It's made public due to a CUDA limitation.
Definition: HpMultiGrid.cpp:893
amrex::Array4< amrex::Real > const * m_cor_a
Definition: HpMultiGrid.H:199
~MultiGrid()
Dtor.
Definition: HpMultiGrid.cpp:1282
void solve2(amrex::FArrayBox &sol, amrex::FArrayBox const &rhs, amrex::Real const acoef_real, amrex::Real const acoef_imag, amrex::Real const tol_rel, amrex::Real const tol_abs, int const nummaxiter, int const verbose)
Solve the Type II equation given the initial guess, right hand side, and the coefficient.
Definition: HpMultiGrid.cpp:793
void vcycle()
Perform a V-cycle. Ideally, this function is not supposed to be a public function....
Definition: HpMultiGrid.cpp:998
int m_max_level
Definition: HpMultiGrid.H:170
amrex::Vector< amrex::FArrayBox > m_res
Definition: HpMultiGrid.H:190
int m_num_mg_levels
Definition: HpMultiGrid.H:174
amrex::Vector< amrex::FArrayBox > m_acf
Definition: HpMultiGrid.H:188
amrex::Gpu::PinnedVector< amrex::Array4< amrex::Real > > m_h_array4
Definition: HpMultiGrid.H:203
void bottomsolve()
Solve at the bottom of the V-cycle. Ideally, this function is not supposed to be a public function....
Definition: HpMultiGrid.cpp:1105
amrex::Vector< amrex::FArrayBox > m_rescor
Definition: HpMultiGrid.H:194
int m_system_type
Definition: HpMultiGrid.H:162
amrex::Real m_dy
Definition: HpMultiGrid.H:167
void solve1(amrex::FArrayBox &sol, amrex::FArrayBox const &rhs, amrex::FArrayBox const &acoef, amrex::Real const tol_rel, amrex::Real const tol_abs, int const nummaxiter, int const verbose)
Solve the Type I equation given the initial guess, right hand side, and the coefficient.
Definition: HpMultiGrid.cpp:770
static constexpr int m_num_system_types
Definition: HpMultiGrid.H:161
int m_num_single_block_levels
Definition: HpMultiGrid.H:176
MultiGrid(amrex::Real dx, amrex::Real dy, amrex::Box a_domain)
Ctor.
Definition: HpMultiGrid.cpp:667
int i
Definition: MakeOpenBoundary.py:152
j
Definition: MakeOpenBoundary.py:128
int verbose
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Box makeSlab(Box const &b, int direction, int slab_index) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 ubound(Array4< T > const &a) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 lbound(Array4< T > const &a) noexcept
std::enable_if_t< std::is_integral< T >::value > ParallelFor(TypeList< CTOs... >, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
Definition: Hipace.H:38
void ParallelFor(T n, F &&f) noexcept
Definition: HpMultiGrid.H:229