16#ifndef dealii__tensor_product_kernels_h
17#define dealii__tensor_product_kernels_h
23#include <deal.II/matrix_free/cuda_matrix_free.templates.h>
48#if KOKKOS_VERSION >= 40000
52 template <
int n_q_points_1d,
61 apply_1d(
const Kokkos::TeamPolicy<
62 MemorySpace::Default::kokkos_space::execution_space>::member_type
64 const Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
69 Number t[n_q_points_1d];
70 Kokkos::parallel_for(Kokkos::TeamThreadRange(team_member, n_q_points_1d),
78 for (
int k = 0; k < n_q_points_1d; ++k)
80 const unsigned int shape_idx =
81 dof_to_quad ? (q + k * n_q_points_1d) :
82 (k + q * n_q_points_1d);
83 const unsigned int source_idx = k;
84 t[q] += shape_data[shape_idx] * in(source_idx);
88 if constexpr (in_place)
89 team_member.team_barrier();
91 Kokkos::parallel_for(Kokkos::TeamThreadRange(team_member, n_q_points_1d),
93 const unsigned int destination_idx = q;
95 Kokkos::atomic_add(&out(destination_idx), t[q]);
97 out(destination_idx) = t[q];
106 template <
int n_q_points_1d,
113 typename ViewTypeOut>
115 apply_2d(
const Kokkos::TeamPolicy<
116 MemorySpace::Default::kokkos_space::execution_space>::member_type
118 const Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
123 using TeamType = Kokkos::TeamPolicy<
124 MemorySpace::Default::kokkos_space::execution_space>::member_type;
125 constexpr unsigned int n_q_points =
Utilities::pow(n_q_points_1d, 2);
127 Number t[n_q_points];
129 Kokkos::TeamThreadMDRange<Kokkos::Rank<2>, TeamType>(team_member,
132 Kokkos::parallel_for(thread_policy, [&](
const int i,
const int j) {
133 int q_point = i + j * n_q_points_1d;
138 const int base_shape = dof_to_quad ? j : j * n_q_points_1d;
139 const int stride_shape = dof_to_quad ? n_q_points_1d : 1;
140 const int base_in = (direction == 0) ? (n_q_points_1d * i) : i;
142 Number
sum = shape_data[base_shape] * in(base_in);
143 for (
int k = 1; k < n_q_points_1d; ++k)
145 sum += shape_data[base_shape + k * stride_shape] *
146 in(base_in + k * stride_in);
152 team_member.team_barrier();
154 Kokkos::parallel_for(thread_policy, [&](
const int i,
const int j) {
155 const int q_point = i + j * n_q_points_1d;
156 const unsigned int destination_idx =
157 (direction == 0) ? (j + n_q_points_1d * i) : (i + n_q_points_1d * j);
160 Kokkos::atomic_add(&out(destination_idx), t[q_point]);
162 out(destination_idx) = t[q_point];
171 template <
int n_q_points_1d,
178 typename ViewTypeOut>
180 apply_3d(
const Kokkos::TeamPolicy<
181 MemorySpace::Default::kokkos_space::execution_space>::member_type
183 const Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
188 using TeamType = Kokkos::TeamPolicy<
189 MemorySpace::Default::kokkos_space::execution_space>::member_type;
190 constexpr unsigned int n_q_points =
Utilities::pow(n_q_points_1d, 3);
192 Number t[n_q_points];
193 auto thread_policy = Kokkos::TeamThreadMDRange<Kokkos::Rank<3>, TeamType>(
194 team_member, n_q_points_1d, n_q_points_1d, n_q_points_1d);
195 Kokkos::parallel_for(
196 thread_policy, [&](
const int i,
const int j,
const int q) {
198 i + j * n_q_points_1d + q * n_q_points_1d * n_q_points_1d;
203 const int base_shape = dof_to_quad ? q : q * n_q_points_1d;
204 const int stride_shape = dof_to_quad ? n_q_points_1d : 1;
207 (n_q_points_1d * (i + n_q_points_1d * j)) :
208 (direction == 1 ? (i + n_q_points_1d * n_q_points_1d * j) :
209 (i + n_q_points_1d * j)));
211 Number
sum = shape_data[base_shape] * in(base_in);
212 for (
int k = 1; k < n_q_points_1d; ++k)
214 sum += shape_data[base_shape + k * stride_shape] *
215 in(base_in + k * stride_in);
221 team_member.team_barrier();
223 Kokkos::parallel_for(
224 thread_policy, [&](
const int i,
const int j,
const int q) {
226 i + j * n_q_points_1d + q * n_q_points_1d * n_q_points_1d;
227 const unsigned int destination_idx =
228 (direction == 0) ? (q + n_q_points_1d * (i + n_q_points_1d * j)) :
229 (direction == 1) ? (i + n_q_points_1d * (q + n_q_points_1d * j)) :
230 (i + n_q_points_1d * (j + n_q_points_1d * q));
233 Kokkos::atomic_add(&out(destination_idx), t[q_point]);
235 out(destination_idx) = t[q_point];
253 typename ViewTypeOut>
256 MemorySpace::Default::kokkos_space::execution_space>::member_type
258 const Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
263#if KOKKOS_VERSION >= 40000
264 if constexpr (dim == 1)
265 apply_1d<n_q_points_1d, Number, direction, dof_to_quad, add, in_place>(
266 team_member, shape_data, in, out);
267 if constexpr (dim == 2)
268 apply_2d<n_q_points_1d, Number, direction, dof_to_quad, add, in_place>(
269 team_member, shape_data, in, out);
270 if constexpr (dim == 3)
271 apply_3d<n_q_points_1d, Number, direction, dof_to_quad, add, in_place>(
272 team_member, shape_data, in, out);
274 constexpr unsigned int n_q_points =
Utilities::pow(n_q_points_1d, dim);
276 Number t[n_q_points];
277 Kokkos::parallel_for(
278 Kokkos::TeamThreadRange(team_member, n_q_points),
279 [&](
const int &q_point) {
280 const unsigned int i = (dim == 1) ? 0 : q_point % n_q_points_1d;
281 const unsigned int j =
282 (dim == 3) ? (q_point / n_q_points_1d) % n_q_points_1d : 0;
283 const unsigned int q =
284 (dim == 1) ? q_point :
285 (dim == 2) ? (q_point / n_q_points_1d) % n_q_points_1d :
286 q_point / (n_q_points_1d * n_q_points_1d);
290 const int stride_shape = dof_to_quad ? n_q_points_1d : 1;
292 const int base_shape = dof_to_quad ? q : (q * n_q_points_1d);
294 (direction == 0) ? (n_q_points_1d * (i + n_q_points_1d * j)) :
295 (direction == 1) ? (i + n_q_points_1d * (n_q_points_1d * j)) :
296 (i + n_q_points_1d * j);
298 shape_data[base_shape] * (in_place ? out(base) : in(base));
299 for (
int k = 1; k < n_q_points_1d; ++k)
302 shape_data[base_shape + k * stride_shape] *
303 (in_place ? out(base + k * stride) : in(base + k * stride));
309 team_member.team_barrier();
311 Kokkos::parallel_for(
312 Kokkos::TeamThreadRange(team_member, n_q_points),
313 [&](
const int &q_point) {
314 const unsigned int i = (dim == 1) ? 0 : q_point % n_q_points_1d;
315 const unsigned int j =
316 (dim == 3) ? (q_point / n_q_points_1d) % n_q_points_1d : 0;
317 const unsigned int q =
318 (dim == 1) ? q_point :
319 (dim == 2) ? (q_point / n_q_points_1d) % n_q_points_1d :
320 q_point / (n_q_points_1d * n_q_points_1d);
322 const unsigned int destination_idx =
323 (direction == 0) ? (q + n_q_points_1d * (i + n_q_points_1d * j)) :
324 (direction == 1) ? (i + n_q_points_1d * (q + n_q_points_1d * j)) :
325 (i + n_q_points_1d * (j + n_q_points_1d * q));
328 Kokkos::atomic_add(&out(destination_idx), t[q_point]);
330 out(destination_idx) = t[q_point];
357 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
366 MemorySpace::Default::kokkos_space::execution_space>::member_type;
371 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
shape_values,
372 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
374 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
380 template <
typename ViewType>
388 template <
typename ViewTypeIn,
typename ViewTypeOut>
396 template <
typename ViewType1,
typename ViewType2>
403 template <
typename ViewType>
411 template <
bool add,
typename ViewType1,
typename ViewType2>
419 template <
typename ViewType1,
typename ViewType2>
427 template <
int direction,
432 typename ViewTypeOut>
434 values(
const ViewTypeIn in, ViewTypeOut out)
const;
440 template <
int direction,
445 typename ViewTypeOut>
447 gradients(
const ViewTypeIn in, ViewTypeOut out)
const;
454 template <
int direction,
459 typename ViewTypeOut>
461 co_gradients(
const ViewTypeIn in, ViewTypeOut out)
const;
471 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
shape_values;
476 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
482 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
488 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
495 EvaluatorTensorProduct(
497 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
shape_values,
498 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
500 Kokkos::View<Number *, MemorySpace::Default::kokkos_space>
510 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
511 template <
int direction,
516 typename ViewTypeOut>
523 ViewTypeOut out)
const
531 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
532 template <
int direction,
537 typename ViewTypeOut>
544 ViewTypeOut out)
const
552 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
553 template <
int direction,
558 typename ViewTypeOut>
565 ViewTypeOut out)
const
573 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
574 template <
typename ViewType>
582 if constexpr (dim == 1)
584 else if constexpr (dim == 2)
590 else if constexpr (dim == 3)
599 Kokkos::abort(
"dim must not exceed 3!");
604 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
605 template <
typename ViewType>
613 if constexpr (dim == 1)
615 else if constexpr (dim == 2)
621 else if constexpr (dim == 3)
630 Kokkos::abort(
"dim must not exceed 3!");
635 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
636 template <
typename ViewTypeIn,
typename ViewTypeOut>
645 if constexpr (dim == 1)
648 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
650 else if constexpr (dim == 2)
653 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
655 u, Kokkos::subview(grad_u, Kokkos::ALL, 1));
660 Kokkos::subview(grad_u, Kokkos::ALL, 0));
662 Kokkos::subview(grad_u, Kokkos::ALL, 1),
663 Kokkos::subview(grad_u, Kokkos::ALL, 1));
665 else if constexpr (dim == 3)
668 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
670 u, Kokkos::subview(grad_u, Kokkos::ALL, 1));
672 u, Kokkos::subview(grad_u, Kokkos::ALL, 2));
677 Kokkos::subview(grad_u, Kokkos::ALL, 0));
679 Kokkos::subview(grad_u, Kokkos::ALL, 1),
680 Kokkos::subview(grad_u, Kokkos::ALL, 1));
682 Kokkos::subview(grad_u, Kokkos::ALL, 2));
687 Kokkos::subview(grad_u, Kokkos::ALL, 0));
689 Kokkos::subview(grad_u, Kokkos::ALL, 1));
691 Kokkos::subview(grad_u, Kokkos::ALL, 2),
692 Kokkos::subview(grad_u, Kokkos::ALL, 2));
695 Kokkos::abort(
"dim must not exceed 3!");
700 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
701 template <
typename ViewType1,
typename ViewType2>
711 if constexpr (dim == 1)
717 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
719 else if constexpr (dim == 2)
727 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
729 u, Kokkos::subview(grad_u, Kokkos::ALL, 1));
731 else if constexpr (dim == 3)
741 u, Kokkos::subview(grad_u, Kokkos::ALL, 0));
743 u, Kokkos::subview(grad_u, Kokkos::ALL, 1));
745 u, Kokkos::subview(grad_u, Kokkos::ALL, 2));
748 Kokkos::abort(
"dim must not exceed 3!");
753 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
754 template <
bool add,
typename ViewType1,
typename ViewType2>
763 if constexpr (dim == 1)
766 Kokkos::subview(grad_u, Kokkos::ALL, dim), u);
768 else if constexpr (dim == 2)
771 Kokkos::subview(grad_u, Kokkos::ALL, 0),
772 Kokkos::subview(grad_u, Kokkos::ALL, 0));
774 Kokkos::subview(grad_u,
784 Kokkos::subview(grad_u, Kokkos::ALL, 1), u);
786 else if constexpr (dim == 3)
789 Kokkos::subview(grad_u, Kokkos::ALL, 0),
790 Kokkos::subview(grad_u, Kokkos::ALL, 0));
792 Kokkos::subview(grad_u,
796 Kokkos::subview(grad_u,
803 Kokkos::subview(grad_u,
807 Kokkos::subview(grad_u, Kokkos::ALL, 1),
808 Kokkos::subview(grad_u, Kokkos::ALL, 1));
810 Kokkos::subview(grad_u,
823 Kokkos::subview(grad_u, Kokkos::ALL, 2), u);
826 Kokkos::abort(
"dim must not exceed 3!");
831 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
832 template <
typename ViewType1,
typename ViewType2>
842 if constexpr (dim == 1)
845 Kokkos::subview(grad_u, Kokkos::ALL, 0), u);
850 else if constexpr (dim == 2)
853 Kokkos::subview(grad_u, Kokkos::ALL, 1), u);
856 Kokkos::subview(grad_u, Kokkos::ALL, 0), u);
864 else if constexpr (dim == 3)
867 Kokkos::subview(grad_u, Kokkos::ALL, 2), u);
870 Kokkos::subview(grad_u, Kokkos::ALL, 1), u);
873 Kokkos::subview(grad_u, Kokkos::ALL, 0), u);
884 Kokkos::abort(
"dim must not exceed 3!");
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
void apply(const Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type &team_member, const Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_data, const ViewTypeIn in, ViewTypeOut out)
T sum(const T &t, const MPI_Comm mpi_communicator)
constexpr T pow(const T base, const int iexp)
#define DEAL_II_HOST_DEVICE
void integrate_values(ViewType u)
void evaluate_values(ViewType u)
const TeamHandle & team_member
Kokkos::View< Number *, MemorySpace::Default::kokkos_space > co_shape_gradients
void evaluate_values_and_gradients(ViewType1 u, ViewType2 grad_u)
Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type TeamHandle
void co_gradients(const ViewTypeIn in, ViewTypeOut out) const
void integrate_gradients(ViewType1 u, ViewType2 grad_u)
void evaluate_gradients(const ViewTypeIn u, ViewTypeOut grad_u)
EvaluatorTensorProduct(const TeamHandle &team_member, Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_values, Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_gradients, Kokkos::View< Number *, MemorySpace::Default::kokkos_space > co_shape_gradients)
void values(const ViewTypeIn in, ViewTypeOut out) const
Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_values
Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_gradients
void gradients(const ViewTypeIn in, ViewTypeOut out) const
void integrate_values_and_gradients(ViewType1 u, ViewType2 grad_u)