16#ifndef dealii_matrix_free_evaluation_kernels_h
17#define dealii_matrix_free_evaluation_kernels_h
36 template <MatrixFreeFunctions::ElementType element,
bool is_
long>
40 template <
bool is_
long>
58 template <
bool is_
long>
77 template <
bool is_
long>
129 const Number *values_dofs_actual,
135 Number *values_dofs_actual,
137 const bool add_into_values_array);
142 *univariate_shape_data)
165 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
173 evaluate(
const unsigned int n_components,
175 const Number *values_dofs_actual,
179 integrate(
const unsigned int n_components,
181 Number *values_dofs_actual,
183 const bool add_into_values_array);
195 const unsigned int n_components,
197 const Number *values_dofs_actual,
203 std::array<const MatrixFreeFunctions::UnivariateShapeData<Number2> *, 3>
204 univariate_shape_data;
208 univariate_shape_data.fill(&shape_data.front());
210 if (shape_data.size() == dim)
211 for (
int i = 1; i < dim; ++i)
212 univariate_shape_data[i] = &shape_data[i];
218 const unsigned int temp_size =
229 shape_data.front().fe_degree + 1),
231 shape_data.front().n_q_points_1d));
235 temp2 = temp1 + temp_size;
238 const std::size_t n_q_points = temp_size == 0 ?
241 const std::size_t dofs_per_comp =
245 const Number *values_dofs = values_dofs_actual;
248 const std::size_t n_dofs_per_comp =
250 Number *values_dofs_tmp =
251 temp1 + 2 * (
std::max(n_dofs_per_comp, n_q_points));
253 fe_degree != -1 ? fe_degree : shape_data.front().fe_degree;
254 for (
unsigned int c = 0; c < n_components; ++c)
255 for (
int i = 0, count_p = 0, count_q = 0;
256 i < (dim > 2 ? degree + 1 : 1);
259 for (
int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j)
261 for (
int k = 0; k < degree + 1 - j - i;
262 ++k, ++count_p, ++count_q)
263 values_dofs_tmp[c * dofs_per_comp + count_q] =
264 values_dofs_actual[c * n_dofs_per_comp + count_p];
265 for (
int k = degree + 1 - j - i; k < degree + 1;
267 values_dofs_tmp[c * dofs_per_comp + count_q] = Number();
269 for (
int j = degree + 1 - i; j < degree + 1; ++j)
270 for (
int k = 0; k < degree + 1; ++k, ++count_q)
271 values_dofs_tmp[c * dofs_per_comp + count_q] = Number();
273 values_dofs = values_dofs_tmp;
282 for (
unsigned int c = 0; c < n_components; ++c)
285 eval0.template values<0, true, false>(values_dofs, values_quad);
287 eval0.template gradients<0, true, false>(values_dofs,
291 values_dofs += dofs_per_comp;
292 values_quad += n_q_points;
293 gradients_quad += n_q_points;
298 for (
unsigned int c = 0; c < n_components; ++c)
303 eval0.template gradients<0, true, false>(values_dofs, temp1);
304 eval1.template values<1, true, false, 2>(temp1,
309 eval0.template values<0, true, false>(values_dofs, temp1);
311 eval1.template gradients<1, true, false, 2>(temp1,
316 eval1.template values<1, true, false>(temp1, values_quad);
319 values_dofs += dofs_per_comp;
320 values_quad += n_q_points;
321 gradients_quad += 2 * n_q_points;
326 for (
unsigned int c = 0; c < n_components; ++c)
331 eval0.template gradients<0, true, false>(values_dofs, temp1);
332 eval1.template values<1, true, false>(temp1, temp2);
333 eval2.template values<2, true, false, 3>(temp2,
338 eval0.template values<0, true, false>(values_dofs, temp1);
341 eval1.template gradients<1, true, false>(temp1, temp2);
342 eval2.template values<2, true, false, 3>(temp2,
348 eval1.template values<1, true, false>(temp1, temp2);
350 eval2.template gradients<2, true, false, 3>(temp2,
356 eval2.template values<2, true, false>(temp2, values_quad);
359 values_dofs += dofs_per_comp;
360 values_quad += n_q_points;
361 gradients_quad += 3 * n_q_points;
374 values_quad -= n_components * n_q_points;
375 values_dofs -= n_components * dofs_per_comp;
376 for (std::size_t c = 0; c < n_components; ++c)
377 for (std::size_t q = 0; q < n_q_points; ++q)
378 values_quad[c * n_q_points + q] +=
379 values_dofs[(c + 1) * dofs_per_comp - 1];
392 const unsigned int n_components,
394 Number *values_dofs_actual,
396 const bool add_into_values_array)
398 std::array<const MatrixFreeFunctions::UnivariateShapeData<Number2> *, 3>
399 univariate_shape_data;
402 univariate_shape_data.fill(&shape_data.front());
404 if (shape_data.size() == dim)
405 for (
int i = 1; i < dim; ++i)
406 univariate_shape_data[i] = &shape_data[i];
412 const unsigned int temp_size =
423 shape_data.front().fe_degree + 1),
425 shape_data.front().n_q_points_1d));
429 temp2 = temp1 + temp_size;
432 const std::size_t n_q_points = temp_size == 0 ?
435 const unsigned int dofs_per_comp =
440 Number *values_dofs =
453 for (
unsigned int c = 0; c < n_components; ++c)
457 if (add_into_values_array ==
false)
458 eval0.template values<0, false, false>(values_quad,
461 eval0.template values<0, false, true>(values_quad,
467 add_into_values_array ==
true)
468 eval0.template gradients<0, false, true>(gradients_quad,
471 eval0.template gradients<0, false, false>(gradients_quad,
476 values_dofs += dofs_per_comp;
477 values_quad += n_q_points;
478 gradients_quad += n_q_points;
483 for (
unsigned int c = 0; c < n_components; ++c)
488 eval1.template values<1, false, false>(values_quad, temp1);
489 if (add_into_values_array ==
false)
490 eval0.template values<0, false, false>(temp1, values_dofs);
492 eval0.template values<0, false, true>(temp1, values_dofs);
496 eval1.template gradients<1, false, false, 2>(gradients_quad +
500 eval1.template values<1, false, true>(values_quad, temp1);
501 if (add_into_values_array ==
false)
502 eval0.template values<0, false, false>(temp1, values_dofs);
504 eval0.template values<0, false, true>(temp1, values_dofs);
505 eval1.template values<1, false, false, 2>(gradients_quad,
507 eval0.template gradients<0, false, true>(temp1, values_dofs);
511 values_dofs += dofs_per_comp;
512 values_quad += n_q_points;
513 gradients_quad += 2 * n_q_points;
518 for (
unsigned int c = 0; c < n_components; ++c)
523 eval2.template values<2, false, false>(values_quad, temp1);
524 eval1.template values<1, false, false>(temp1, temp2);
525 if (add_into_values_array ==
false)
526 eval0.template values<0, false, false>(temp2, values_dofs);
528 eval0.template values<0, false, true>(temp2, values_dofs);
532 eval2.template gradients<2, false, false, 3>(gradients_quad +
536 eval2.template values<2, false, true>(values_quad, temp1);
537 eval1.template values<1, false, false>(temp1, temp2);
538 eval2.template values<2, false, false, 3>(gradients_quad + 1,
540 eval1.template gradients<1, false, true>(temp1, temp2);
541 if (add_into_values_array ==
false)
542 eval0.template values<0, false, false>(temp2, values_dofs);
544 eval0.template values<0, false, true>(temp2, values_dofs);
545 eval2.template values<2, false, false, 3>(gradients_quad,
547 eval1.template values<1, false, false>(temp1, temp2);
548 eval0.template gradients<0, false, true>(temp2, values_dofs);
552 values_dofs += dofs_per_comp;
553 values_quad += n_q_points;
554 gradients_quad += 3 * n_q_points;
565 values_dofs -= n_components * dofs_per_comp - dofs_per_comp + 1;
566 values_quad -= n_components * n_q_points;
568 for (
unsigned int c = 0; c < n_components; ++c)
570 values_dofs[0] = values_quad[0];
571 for (
unsigned int q = 1; q < n_q_points; ++q)
572 values_dofs[0] += values_quad[q];
573 values_dofs += dofs_per_comp;
574 values_quad += n_q_points;
578 for (
unsigned int c = 0; c < n_components; ++c)
579 values_dofs[c * dofs_per_comp] = Number();
580 values_dofs += n_components * dofs_per_comp;
586 const std::size_t n_dofs_per_comp =
588 values_dofs -= dofs_per_comp * n_components;
590 fe_degree != -1 ? fe_degree : shape_data.front().fe_degree;
591 for (
unsigned int c = 0; c < n_components; ++c)
592 for (
int i = 0, count_p = 0, count_q = 0;
593 i < (dim > 2 ? degree + 1 : 1);
596 for (
int j = 0; j < (dim > 1 ? degree + 1 - i : 1); ++j)
598 for (
int k = 0; k < degree + 1 - j - i;
599 ++k, ++count_p, ++count_q)
600 values_dofs_actual[c * n_dofs_per_comp + count_p] =
601 values_dofs[c * dofs_per_comp + count_q];
604 count_q += i * (degree + 1);
611 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
620 const Number *values_dofs_actual,
625 const std::size_t n_dofs =
627 const std::size_t n_q_points = fe_eval.
get_shape_info().n_q_points;
636 const auto *
const shape_values = shape_data.front().shape_values.data();
638 const auto *in = values_dofs_actual;
640 for (
unsigned int c = 0; c < n_components; ++c)
647 shape_values, in, out, n_dofs, n_q_points, 1, 1);
656 const auto *
const shape_gradients =
657 shape_data.front().shape_gradients.data();
659 const auto *in = values_dofs_actual;
661 for (
unsigned int c = 0; c < n_components; ++c)
668 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
669 out += n_q_points * dim;
677 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
686 Number *values_dofs_actual,
688 const bool add_into_values_array)
693 const std::size_t n_dofs =
695 const std::size_t n_q_points = fe_eval.
get_shape_info().n_q_points;
704 const auto *
const shape_values = shape_data.front().shape_values.data();
706 auto *out = values_dofs_actual;
708 for (
unsigned int c = 0; c < n_components; ++c)
710 if (add_into_values_array ==
false)
716 shape_values, in, out, n_dofs, n_q_points, 1, 1);
723 shape_values, in, out, n_dofs, n_q_points, 1, 1);
732 const auto *
const shape_gradients =
733 shape_data.front().shape_gradients.data();
735 auto *out = values_dofs_actual;
737 for (
unsigned int c = 0; c < n_components; ++c)
739 if (add_into_values_array ==
false &&
746 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
753 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
755 in += n_q_points * dim;
779 static_assert(basis_size_1 == 0 || basis_size_1 <= basis_size_2,
780 "The second dimension must not be smaller than the first");
804 template <
typename Number,
typename Number2>
811 const Number *values_in,
813 const unsigned int basis_size_1_variable =
815 const unsigned int basis_size_2_variable =
819 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
820 ExcMessage(
"The second dimension must not be smaller than the first"));
828 constexpr int next_dim = (dim == 1 || (dim == 2 && basis_size_1 > 0 &&
829 basis_size_1 == basis_size_2)) ?
836 (basis_size_1 == 0 ? 0 : basis_size_2),
839 eval_val(transformation_matrix,
842 basis_size_1_variable,
843 basis_size_2_variable);
844 const unsigned int np_1 =
845 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
846 const unsigned int np_2 =
847 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
849 ExcMessage(
"Cannot transform with 0-point basis"));
851 ExcMessage(
"Cannot transform with 0-point basis"));
858 for (
unsigned int c = n_components; c != 0; --c)
863 for (
unsigned int q = np_1; q != 0; --q)
870 transformation_matrix,
875 basis_size_1_variable,
876 basis_size_2_variable);
881 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
883 eval_val.template values<0, true, false>(values_in, values_out);
884 eval_val.template values<1, true, false>(values_out, values_out);
887 eval_val.template values<dim - 1,
true,
false>(values_in,
890 eval_val.template values<dim - 1,
true,
false>(values_out,
925 template <
typename Number,
typename Number2>
932 const bool add_into_result,
935 const unsigned int basis_size_1_variable =
937 const unsigned int basis_size_2_variable =
941 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
942 ExcMessage(
"The second dimension must not be smaller than the first"));
943 Assert(add_into_result ==
false || values_in != values_out,
945 "Input and output cannot alias with each other when "
946 "adding the result of the basis change to existing data"));
952 constexpr int next_dim =
954 ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ?
960 (basis_size_1 == 0 ? 0 : basis_size_2),
963 eval_val(transformation_matrix,
964 transformation_matrix,
965 transformation_matrix,
966 basis_size_1_variable,
967 basis_size_2_variable);
968 const unsigned int np_1 =
969 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
970 const unsigned int np_2 =
971 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
973 ExcMessage(
"Cannot transform with 0-point basis"));
975 ExcMessage(
"Cannot transform with 0-point basis"));
977 for (
unsigned int c = 0; c < n_components; ++c)
979 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
982 eval_val.template values<1, false, false>(values_in, values_in);
984 eval_val.template hessians<1, false, false>(values_in,
990 eval_val.template values<0, false, true>(values_in,
993 eval_val.template hessians<0, false, true>(values_in,
999 eval_val.template values<0, false, false>(values_in,
1002 eval_val.template hessians<0, false, false>(values_in,
1008 if (dim == 1 && add_into_result)
1011 eval_val.template values<0, false, true>(values_in,
1014 eval_val.template hessians<0, false, true>(values_in,
1020 eval_val.template values<0, false, false>(values_in,
1023 eval_val.template hessians<0, false, false>(values_in,
1029 eval_val.template values<dim - 1,
false,
false>(values_in,
1032 eval_val.template hessians<dim - 1,
false,
false>(
1033 values_in, values_in);
1037 for (
unsigned int q = 0; q < np_1; ++q)
1044 transformation_matrix,
1050 basis_size_1_variable,
1051 basis_size_2_variable);
1078 template <
typename Number,
typename Number2>
1083 const Number *values_in,
1084 Number *scratch_data,
1087 constexpr int next_dim = dim > 1 ? dim - 1 : dim;
1088 Number *my_scratch =
1089 basis_size_1 != basis_size_2 ? scratch_data : values_out;
1091 const unsigned int size_per_component =
Utilities::pow(basis_size_2, dim);
1092 Assert(coefficients.
size() == size_per_component ||
1093 coefficients.
size() == n_components * size_per_component,
1095 const unsigned int stride =
1096 coefficients.
size() == size_per_component ? 0 : 1;
1098 for (
unsigned int q = basis_size_1; q != 0; --q)
1105 transformation_matrix,
1118 eval_val(transformation_matrix);
1119 const unsigned int n_inner_blocks =
1120 (dim > 1 && basis_size_2 < 10) ? basis_size_2 : 1;
1121 const unsigned int n_blocks =
Utilities::pow(basis_size_2, dim - 1);
1122 for (
unsigned int ii = 0; ii < n_blocks; ii += n_inner_blocks)
1123 for (
unsigned int c = 0; c < n_components; ++c)
1125 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1126 eval_val.template values_one_line<dim - 1, true, false>(
1127 my_scratch + i, my_scratch + i);
1128 for (
unsigned int q = 0; q < basis_size_2; ++q)
1129 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1130 my_scratch[i + q * n_blocks + c * size_per_component] *=
1131 coefficients[i + q * n_blocks +
1132 c * stride * size_per_component];
1133 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1134 eval_val.template values_one_line<dim - 1, false, false>(
1135 my_scratch + i, my_scratch + i);
1137 for (
unsigned int q = 0; q < basis_size_1; ++q)
1144 transformation_matrix,
1160 template <
int n_po
ints_1d,
int dim,
typename Number,
typename Number2>
1164 const Number *values,
1168 (n_points_1d + 1) / 2 * n_points_1d);
1186 eval.template gradients<0, true, false>(values, gradients);
1190 eval.template gradients<2, true, false, dim>(values, gradients + 2);
1191 constexpr unsigned int loop_bound = (dim > 2 ? n_points_1d : 1);
1192 constexpr unsigned int n_points_2d = n_points_1d * n_points_1d;
1193 const Number *in = values + (loop_bound - 1) * n_points_2d;
1194 Number *out = gradients + (loop_bound - 1) * dim * n_points_2d;
1195 for (
unsigned int l = 0; l < loop_bound; ++l)
1197 eval_2d.template gradients<0, true, false, dim>(in, out);
1198 eval_2d.template gradients<1, true, false, dim>(in, out + 1);
1200 out -= dim * n_points_2d;
1214 template <
int n_po
ints_1d,
int dim,
typename Number,
typename Number2>
1219 const Number *gradients,
1220 const bool add_into_values_array)
1223 (n_points_1d + 1) / 2 * n_points_1d);
1242 if (add_into_values_array)
1243 eval.template gradients<0, false, true>(gradients, values);
1245 eval.template gradients<0, false, false>(gradients, values);
1249 constexpr unsigned int loop_bound = (dim > 2 ? n_points_1d : 1);
1250 constexpr unsigned int n_points_2d = n_points_1d * n_points_1d;
1252 const Number *in = gradients + (loop_bound - 1) * dim * n_points_2d;
1253 Number *out = values + (loop_bound - 1) * n_points_2d;
1254 for (
unsigned int l = 0; l < loop_bound; ++l)
1256 if (add_into_values_array)
1257 eval_2d.template gradients<0, false, true, dim>(in, out);
1259 eval_2d.template gradients<0, false, false, dim>(in, out);
1260 eval_2d.template gradients<1, false, true, dim>(in + 1, out);
1261 in -= dim * n_points_2d;
1266 eval.template gradients<2, false, true, dim>(gradients + 2, values);
1277 template <
int n_po
ints_1d,
int dim,
typename Number>
1307 const std::size_t n_points = fe_eval.
get_shape_info().n_q_points;
1308 for (
unsigned int comp = 0; comp < n_components; ++comp)
1311 eval.template hessians<0, true, false>(values, hessians);
1317 eval.template gradients<0, true, false>(values, scratch);
1318 eval.template gradients<1, true, false>(scratch,
1319 hessians + dim * n_points);
1321 eval.template hessians<1, true, false>(values, hessians + n_points);
1326 eval.template gradients<2, true, false>(scratch,
1327 hessians + 4 * n_points);
1329 eval.template gradients<1, true, false>(values, scratch);
1330 eval.template gradients<2, true, false>(scratch,
1331 hessians + 5 * n_points);
1333 eval.template hessians<2, true, false>(values,
1334 hessians + 2 * n_points);
1338 hessians += (dim * (dim + 1)) / 2 * n_points;
1350 template <
int n_q_po
ints_1d,
int dim,
typename Number>
1354 const bool add_into_values_array)
1377 const std::size_t n_points = fe_eval.
get_shape_info().n_q_points;
1379 for (
unsigned int comp = 0; comp < n_components; ++comp)
1382 if (add_into_values_array ==
true)
1383 eval.template hessians<0, false, true>(hessians, values);
1385 eval.template hessians<0, false, false>(hessians, values);
1389 eval.template hessians<1, false, true>(hessians + n_points, values);
1393 eval.template hessians<2, false, true>(hessians + 2 * n_points,
1396 eval.template gradients<2, false, false>(hessians + 5 * n_points,
1398 eval.template gradients<1, false, true>(scratch, values);
1401 eval.template gradients<2, false, false>(hessians + 4 * n_points,
1408 eval.template gradients<1,
false, (dim > 2)>(hessians +
1411 eval.template gradients<0, false, true>(scratch, values);
1415 hessians += (dim * (dim + 1)) / 2 * n_points;
1427 template <
int dim,
typename Number>
1430 const Number *values_dofs,
1433 const auto &univariate_shape_data = fe_eval.
get_shape_info().data;
1436 using Eval =
typename Impl::Eval;
1438 Impl::create_evaluator_tensor_product(&univariate_shape_data[0]);
1439 Eval eval1 = Impl::create_evaluator_tensor_product(
1441 univariate_shape_data.size() - 1)]);
1442 Eval eval2 = Impl::create_evaluator_tensor_product(
1444 univariate_shape_data.size() - 1)]);
1446 const unsigned int n_points = fe_eval.
get_shape_info().n_q_points;
1450 univariate_shape_data.front().fe_degree + 1),
1452 univariate_shape_data.front().n_q_points_1d));
1455 for (
unsigned int comp = 0; comp < n_components;
1457 hessians += n_points * dim * (dim + 1) / 2,
1463 eval0.template hessians<0, true, false>(values_dofs, hessians);
1467 eval0.template hessians<0, true, false>(values_dofs, tmp1);
1468 eval1.template values<1, true, false>(tmp1, hessians);
1470 eval0.template gradients<0, true, false>(values_dofs, tmp1);
1471 eval1.template gradients<1, true, false>(tmp1,
1472 hessians + 2 * n_points);
1474 eval0.template values<0, true, false>(values_dofs, tmp1);
1475 eval1.template hessians<1, true, false>(tmp1, hessians + n_points);
1479 eval0.template hessians<0, true, false>(values_dofs, tmp1);
1480 eval1.template values<1, true, false>(tmp1, tmp2);
1481 eval2.template values<2, true, false>(tmp2, hessians);
1483 eval0.template gradients<0, true, false>(values_dofs, tmp1);
1484 eval1.template gradients<1, true, false>(tmp1, tmp2);
1485 eval2.template values<2, true, false>(tmp2,
1486 hessians + 3 * n_points);
1488 eval1.template values<1, true, false>(tmp1, tmp2);
1489 eval2.template gradients<2, true, false>(tmp2,
1490 hessians + 4 * n_points);
1492 eval0.template values<0, true, false>(values_dofs, tmp1);
1493 eval1.template hessians<1, true, false>(tmp1, tmp2);
1494 eval2.template values<2, true, false>(tmp2, hessians + n_points);
1496 eval1.template gradients<1, true, false>(tmp1, tmp2);
1497 eval2.template gradients<2, true, false>(tmp2,
1498 hessians + 5 * n_points);
1500 eval1.template values<1, true, false>(tmp1, tmp2);
1501 eval2.template hessians<2, true, false>(tmp2,
1502 hessians + 2 * n_points);
1508 "Only 1d, 2d and 3d implemented for Hessian"));
1521 template <
int dim,
typename Number>
1525 Number *values_dofs,
1526 const bool add_into_values_array)
1528 const auto &univariate_shape_data = fe_eval.
get_shape_info().data;
1531 using Eval =
typename Impl::Eval;
1533 Impl::create_evaluator_tensor_product(&univariate_shape_data[0]);
1534 Eval eval1 = Impl::create_evaluator_tensor_product(
1536 univariate_shape_data.size() - 1)]);
1537 Eval eval2 = Impl::create_evaluator_tensor_product(
1539 univariate_shape_data.size() - 1)]);
1541 const unsigned int n_points = fe_eval.
get_shape_info().n_q_points;
1545 univariate_shape_data.front().fe_degree + 1),
1547 univariate_shape_data.front().n_q_points_1d));
1550 for (
unsigned int comp = 0; comp < n_components;
1552 hessians += n_points * dim * (dim + 1) / 2,
1558 if (add_into_values_array)
1559 eval0.template hessians<0, false, true>(hessians, values_dofs);
1561 eval0.template hessians<0, false, false>(hessians, values_dofs);
1565 eval1.template values<1, false, false>(hessians, tmp1);
1566 if (add_into_values_array)
1567 eval0.template hessians<0, false, true>(tmp1, values_dofs);
1569 eval0.template hessians<0, false, false>(tmp1, values_dofs);
1572 eval1.template gradients<1, false, false>(hessians + 2 * n_points,
1574 eval0.template gradients<0, false, true>(tmp1, values_dofs);
1576 eval1.template hessians<1, false, false>(hessians + n_points, tmp1);
1577 eval0.template values<0, false, true>(tmp1, values_dofs);
1581 eval2.template values<2, false, false>(hessians, tmp1);
1582 eval1.template values<1, false, false>(tmp1, tmp2);
1584 if (add_into_values_array)
1585 eval0.template hessians<0, false, true>(tmp2, values_dofs);
1587 eval0.template hessians<0, false, false>(tmp2, values_dofs);
1590 eval2.template values<2, false, false>(hessians + 3 * n_points,
1592 eval1.template gradients<1, false, false>(tmp1, tmp2);
1594 eval2.template gradients<2, false, false>(hessians + 4 * n_points,
1596 eval1.template values<1, false, true>(tmp1, tmp2);
1597 eval1.template values<0, false, true>(tmp2, values_dofs);
1600 eval2.template values<2, false, false>(hessians + n_points, tmp1);
1601 eval1.template hessians<1, false, false>(tmp1, tmp2);
1604 eval2.template gradients<2, false, false>(hessians + 5 * n_points,
1606 eval1.template gradients<1, false, true>(tmp1, tmp2);
1609 eval2.template hessians<2, false, false>(hessians + 2 * n_points,
1611 eval1.template values<1, false, true>(tmp1, tmp2);
1612 eval0.template values<0, false, true>(tmp2, values_dofs);
1618 "Only 1d, 2d and 3d implemented for Hessian"));
1636 template <
int dim,
int fe_degree,
typename Number>
1651 const Number *values_dofs,
1654 constexpr std::size_t n_points =
Utilities::pow(fe_degree + 1, dim);
1656 for (
unsigned int c = 0; c < n_components; ++c)
1659 for (
unsigned int i = 0; i < n_points; ++i)
1661 values_dofs[n_points * c + i];
1666 values_dofs + c * n_points,
1674 Number *values_dofs,
1676 const bool add_into_values_array)
1678 constexpr std::size_t n_points =
Utilities::pow(fe_degree + 1, dim);
1680 for (
unsigned int c = 0; c < n_components; ++c)
1684 if (add_into_values_array)
1685 for (
unsigned int i = 0; i < n_points; ++i)
1686 values_dofs[n_points * c + i] +=
1689 for (
unsigned int i = 0; i < n_points; ++i)
1690 values_dofs[n_points * c + i] =
1697 values_dofs + c * n_points,
1699 add_into_values_array ||
1717 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1723 const Number *values_dofs,
1728 Assert(n_q_points_1d > fe_degree,
1729 ExcMessage(
"You lose information when going to a collocation "
1730 "space of lower degree, so the evaluation results "
1731 "would be wrong. Thus, this class does not permit "
1732 "the chosen operation."));
1733 constexpr std::size_t n_dofs =
Utilities::pow(fe_degree + 1, dim);
1734 constexpr std::size_t n_q_points =
Utilities::pow(n_q_points_1d, dim);
1736 for (
unsigned int c = 0; c < n_components; ++c)
1742 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1743 n_q_points_1d>::do_forward(1,
1744 shape_data.shape_values_eo,
1745 values_dofs + c * n_dofs,
1760 Number *values_dofs,
1762 const bool add_into_values_array)
1766 Assert(n_q_points_1d > fe_degree,
1767 ExcMessage(
"You lose information when going to a collocation "
1768 "space of lower degree, so the evaluation results "
1769 "would be wrong. Thus, this class does not permit "
1770 "the chosen operation."));
1771 constexpr std::size_t n_q_points =
Utilities::pow(n_q_points_1d, dim);
1773 for (
unsigned int c = 0; c < n_components; ++c)
1789 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1790 n_q_points_1d>::do_backward(1,
1791 shape_data.shape_values_eo,
1792 add_into_values_array,
1807 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1817 template <
bool integrate>
1821 Number *values_dofs_actual,
1823 const bool add_into_values_array =
false);
1828 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1829 template <
bool integrate>
1836 evaluate_or_integrate(
1838 Number *values_dofs,
1842 Assert(dim == 2 || dim == 3,
1843 ExcMessage(
"Only dim = 2,3 implemented for Raviart-Thomas "
1844 "evaluation/integration"));
1858 const unsigned int dofs_per_component =
1860 const unsigned int n_points =
Utilities::pow(n_q_points_1d, dim);
1875 if constexpr (dim > 2)
1876 eval.template tangential<2, 0>(shape_data[1], values, values);
1877 eval.template tangential<1, 0>(shape_data[1], values, values);
1878 eval.template normal<0>(shape_data[0], values, values_dofs, add);
1881 gradients += n_points * dim;
1882 values_dofs += dofs_per_component;
1889 if constexpr (dim > 2)
1890 eval.template tangential<2, 1>(shape_data[1], values, values);
1891 eval.template tangential<0, 1>(shape_data[1], values, values);
1892 eval.template normal<1>(shape_data[0], values, values_dofs, add);
1894 if constexpr (dim > 2)
1897 gradients += n_points * dim;
1898 values_dofs += dofs_per_component;
1905 eval.template tangential<1, 2>(shape_data[1], values, values);
1906 eval.template tangential<0, 2>(shape_data[1], values, values);
1907 eval.template normal<0>(shape_data[0], values, values_dofs, add);
1914 eval.template normal<0>(shape_data[0], values_dofs, values);
1915 eval.template tangential<1, 0>(shape_data[1], values, values);
1916 if constexpr (dim > 2)
1917 eval.template tangential<2, 0>(shape_data[1], values, values);
1924 gradients += n_points * dim;
1925 values_dofs += dofs_per_component;
1927 eval.template normal<1>(shape_data[0], values_dofs, values);
1928 eval.template tangential<0, 1>(shape_data[1], values, values);
1929 if constexpr (dim > 2)
1930 eval.template tangential<2, 1>(shape_data[1], values, values);
1936 if constexpr (dim > 2)
1939 gradients += n_points * dim;
1940 values_dofs += dofs_per_component;
1942 eval.template normal<2>(shape_data[0], values_dofs, values);
1943 eval.template tangential<0, 2>(shape_data[1], values, values);
1944 eval.template tangential<1, 2>(shape_data[1], values, values);
1970 template <
int dim,
typename Number,
bool do_
integrate>
1973 template <
int fe_degree,
int n_q_po
ints_1d,
typename OtherNumber>
1975 run(
const unsigned int n_components,
1977 OtherNumber *values_dofs,
1979 const bool sum_into_values_array_in =
false)
1983 static_assert(std::is_same_v<Number, std::remove_const_t<OtherNumber>>,
1984 "Type of Number and of OtherNumber do not match.");
1991 element_type == ElementType::tensor_general) ||
1992 element_type == ElementType::tensor_raviart_thomas,
1996 bool sum_into_values_array = sum_into_values_array_in;
2002 if constexpr (do_integrate)
2015 sum_into_values_array);
2016 sum_into_values_array =
true;
2021 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
2022 element_type == ElementType::tensor_symmetric_collocation)
2030 sum_into_values_array);
2034 else if (fe_degree >= 0 &&
2036 element_type <= ElementType::tensor_symmetric)
2047 sum_into_values_array);
2049 else if (fe_degree >= 0 &&
2050 element_type <= ElementType::tensor_symmetric_no_collocation)
2061 sum_into_values_array);
2063 else if (element_type == ElementType::tensor_none)
2071 sum_into_values_array);
2073 else if (element_type == ElementType::tensor_symmetric_plus_dg0)
2080 Number>>(n_components,
2084 sum_into_values_array);
2086 else if (element_type == ElementType::truncated_tensor)
2097 sum_into_values_array);
2099 else if (element_type == ElementType::tensor_raviart_thomas)
2101 if constexpr (fe_degree > 0 && n_q_points_1d > 0 && dim > 1)
2110 const_cast<Number *
>(values_dofs),
2112 sum_into_values_array);
2118 "in 2d/3d and with templated degree"));
2132 sum_into_values_array);
2150 template <
typename T>
2153 const unsigned int n_components,
2155 const Number *values_dofs,
2157 const bool sum_into_values_array,
2158 std::bool_constant<false>)
2160 (void)sum_into_values_array;
2162 T::evaluate(n_components, evaluation_flag, values_dofs, fe_eval);
2165 template <
typename T>
2168 const unsigned int n_components,
2170 Number *values_dofs,
2172 const bool sum_into_values_array,
2173 std::bool_constant<true>)
2175 T::integrate(n_components,
2179 sum_into_values_array);
2182 template <
typename T,
typename OtherNumber>
2185 const unsigned int n_components,
2187 OtherNumber *values_dofs,
2189 const bool sum_into_values_array)
2195 sum_into_values_array,
2196 std::bool_constant<do_integrate>());
2206 template <
int dim,
typename Number>
2212 template <
int fe_degree,
int = 0>
2214 run(
const unsigned int n_components,
2216 const Number *in_array,
2219 const unsigned int given_degree =
2220 (fe_degree > -1) ? fe_degree :
2223 const unsigned int dofs_per_component =
2243 for (
unsigned int d = 0; d < n_components; ++d)
2245 const Number *in = in_array + d * dofs_per_component;
2246 Number *out = out_array + d * dofs_per_component;
2249 evaluator.template hessians<0, true, false>(in, out);
2251 evaluator.template hessians<1, true, false>(out, out);
2253 evaluator.template hessians<2, true, false>(out, out);
2255 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2257 const Number inverse_JxW_q = Number(1.) / fe_eval.
JxW(q);
2258 for (
unsigned int d = 0; d < n_components; ++d)
2259 out_array[q + d * dofs_per_component] *= inverse_JxW_q;
2261 for (
unsigned int d = 0; d < n_components; ++d)
2263 Number *out = out_array + d * dofs_per_component;
2265 evaluator.template hessians<2, false, false>(out, out);
2267 evaluator.template hessians<1, false, false>(out, out);
2268 evaluator.template hessians<0, false, false>(out, out);
2282 template <
int dim,
typename Number>
2288 template <
int fe_degree,
int = 0>
2290 run(
const unsigned int n_desired_components,
2293 const bool dyadic_coefficients,
2294 const Number *in_array,
2297 const unsigned int given_degree =
2298 (fe_degree > -1) ? fe_degree :
2301 const unsigned int dofs_per_component =
2305 inverse_coefficients.
size() % dofs_per_component == 0,
2307 "Expected diagonal to be a multiple of scalar dof per cells"));
2309 if (!dyadic_coefficients)
2311 if (inverse_coefficients.
size() != dofs_per_component)
2313 inverse_coefficients.
size());
2319 inverse_coefficients.
size());
2339 const Number *in = in_array;
2340 Number *out = out_array;
2342 const Number *inv_coefficient = inverse_coefficients.
data();
2344 const unsigned int shift_coefficient =
2345 inverse_coefficients.
size() > dofs_per_component ? dofs_per_component :
2348 const auto n_comp_outer = dyadic_coefficients ? 1 : n_desired_components;
2349 const auto n_comp_inner = dyadic_coefficients ? n_desired_components : 1;
2351 for (
unsigned int d = 0; d < n_comp_outer; ++d)
2353 for (
unsigned int di = 0; di < n_comp_inner; ++di)
2355 const Number *in_ = in + di * dofs_per_component;
2356 Number *out_ = out + di * dofs_per_component;
2357 evaluator.template hessians<0, true, false>(in_, out_);
2359 evaluator.template hessians<1, true, false>(out_, out_);
2361 evaluator.template hessians<2, true, false>(out_, out_);
2363 if (dyadic_coefficients)
2365 const auto n_coeff_components =
2366 n_desired_components * n_desired_components;
2367 if (n_desired_components == dim)
2369 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2370 vmult<dim>(&inv_coefficient[q * n_coeff_components],
2373 dofs_per_component);
2377 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2378 vmult<-1>(&inv_coefficient[q * n_coeff_components],
2382 n_desired_components);
2386 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2387 out[q] *= inv_coefficient[q];
2389 for (
unsigned int di = 0; di < n_comp_inner; ++di)
2391 Number *out_ = out + di * dofs_per_component;
2393 evaluator.template hessians<2, false, false>(out_, out_);
2395 evaluator.template hessians<1, false, false>(out_, out_);
2396 evaluator.template hessians<0, false, false>(out_, out_);
2399 in += dofs_per_component;
2400 out += dofs_per_component;
2401 inv_coefficient += shift_coefficient;
2408 template <
int n_components>
2410 vmult(
const Number *inverse_coefficients,
2413 const unsigned int dofs_per_component,
2414 const unsigned int n_given_components = 0)
2416 const unsigned int n_desired_components =
2417 (n_components > -1) ? n_components : n_given_components;
2419 std::array<Number, dim + 2> tmp = {};
2420 Assert(n_desired_components <= dim + 2,
2422 "Number of components larger than dim+2 not supported."));
2424 for (
unsigned int d = 0; d < n_desired_components; ++d)
2425 tmp[d] = src[d * dofs_per_component];
2427 for (
unsigned int d1 = 0; d1 < n_desired_components; ++d1)
2429 const Number *inv_coeff_row =
2430 &inverse_coefficients[d1 * n_desired_components];
2431 Number sum = inv_coeff_row[0] * tmp[0];
2432 for (
unsigned int d2 = 1; d2 < n_desired_components; ++d2)
2433 sum += inv_coeff_row[d2] * tmp[d2];
2434 dst[d1 * dofs_per_component] = sum;
2447 template <
int dim,
typename Number>
2450 template <
int fe_degree,
int n_q_po
ints_1d>
2452 run(
const unsigned int n_desired_components,
2454 const Number *in_array,
2457 static const bool do_inplace =
2458 fe_degree > -1 && (fe_degree + 1 == n_q_points_1d);
2464 const auto &inverse_shape =
2469 const std::size_t dofs_per_component =
2472 const std::size_t n_q_points = do_inplace ?
2490 for (
unsigned int d = 0; d < n_desired_components; ++d)
2492 const Number *in = in_array + d * n_q_points;
2493 Number *out = out_array + d * dofs_per_component;
2496 auto *temp_2 = do_inplace ?
2498 (temp_1 +
std::max(n_q_points, dofs_per_component));
2502 evaluator.template hessians<2, false, false>(in, temp_1);
2503 evaluator.template hessians<1, false, false>(temp_1, temp_2);
2504 evaluator.template hessians<0, false, false>(temp_2, out);
2508 evaluator.template hessians<1, false, false>(in, temp_1);
2509 evaluator.template hessians<0, false, false>(temp_1, out);
2512 evaluator.template hessians<0, false, false>(in, out);
value_type * data() const noexcept
ScalarNumber shape_info_number_type
const ShapeInfoType & get_shape_info() const
Number JxW(const unsigned int q_point) const
const Number * begin_gradients() const
ArrayView< Number > get_scratch_data() const
const Number * begin_values() const
const Number * begin_hessians() const
#define DEAL_II_ALWAYS_INLINE
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
@ tensor_symmetric_no_collocation
@ tensor_symmetric_plus_dg0
EvaluationFlags
The EvaluationFlags enum.
constexpr T fixed_power(const T t)
constexpr T pow(const T base, const int iexp)
void evaluate_hessians_collocation(const unsigned int n_components, FEEvaluationData< dim, Number, false > &fe_eval)
constexpr bool use_collocation_evaluation(const unsigned int fe_degree, const unsigned int n_q_points_1d)
void integrate_gradients_collocation(const MatrixFreeFunctions::UnivariateShapeData< Number2 > &shape, Number *values, const Number *gradients, const bool add_into_values_array)
void evaluate_hessians_slow(const unsigned int n_components, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval)
std::enable_if_t<(variant==evaluate_general), void > apply_matrix_vector_product(const Number2 *matrix, const Number *in, Number *out)
void integrate_hessians_collocation(const unsigned int n_components, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
void evaluate_gradients_collocation(const MatrixFreeFunctions::UnivariateShapeData< Number2 > &shape, const Number *values, Number *gradients)
void integrate_hessians_slow(const unsigned int n_components, const FEEvaluationData< dim, Number, false > &fe_eval, Number *values_dofs, const bool add_into_values_array)
static const unsigned int invalid_unsigned_int
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static bool run(const unsigned int n_components, const FEEvaluationData< dim, Number, false > &fe_eval, const Number *in_array, Number *out_array)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static bool run(const unsigned int n_desired_components, const FEEvaluationData< dim, Number, false > &fe_eval, const ArrayView< const Number > &inverse_coefficients, const bool dyadic_coefficients, const Number *in_array, Number *out_array)
static void vmult(const Number *inverse_coefficients, const Number *src, Number *dst, const unsigned int dofs_per_component, const unsigned int n_given_components=0)
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static constexpr unsigned int n_rows_of_product
static constexpr unsigned int n_columns_of_product
static void do_backward(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const bool add_into_result, Number *values_in, Number *values_out, const unsigned int basis_size_1_variable=numbers::invalid_unsigned_int, const unsigned int basis_size_2_variable=numbers::invalid_unsigned_int)
static void do_forward(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const Number *values_in, Number *values_out, const unsigned int basis_size_1_variable=numbers::invalid_unsigned_int, const unsigned int basis_size_2_variable=numbers::invalid_unsigned_int)
static void do_mass(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const AlignedVector< Number > &coefficients, const Number *values_in, Number *scratch_data, Number *values_out)
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval)
EvaluatorTensorProduct< evaluate_evenodd, dim, fe_degree+1, fe_degree+1, Number, Number2 > Eval
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, OtherNumber *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array)
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array, std::bool_constant< true >)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, OtherNumber *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array_in=false)
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array, std::bool_constant< false >)
static const EvaluatorVariant variant
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval)
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
static Eval create_evaluator_tensor_product(const MatrixFreeFunctions::UnivariateShapeData< Number2 > *univariate_shape_data)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static void evaluate_or_integrate(const EvaluationFlags::EvaluationFlags evaluation_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array=false)
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval)
EvaluatorTensorProduct< variant, dim, fe_degree+1, n_q_points_1d, Number, Number2 > Eval
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
AlignedVector< Number > shape_values
AlignedVector< Number > shape_hessians_collocation
AlignedVector< Number > shape_values_eo
AlignedVector< Number > shape_hessians_eo
AlignedVector< Number > shape_gradients_collocation_eo
unsigned int n_q_points_1d
AlignedVector< Number > shape_gradients_eo
AlignedVector< Number > shape_hessians
AlignedVector< Number > shape_gradients
AlignedVector< Number > shape_gradients_collocation