26#ifdef DEAL_II_WITH_CUDA
39 template <
typename Number>
41 :
val(nullptr,
Utilities::CUDA::delete_device_data<Number>)
47 template <
typename Number>
50 Utilities::CUDA::delete_device_data<Number>)
54 const cudaError_t error_code = cudaMemcpy(
val.get(),
57 cudaMemcpyDeviceToDevice);
63 template <
typename Number>
73 const cudaError_t error_code = cudaMemcpy(
val.get(),
76 cudaMemcpyDeviceToDevice);
84 template <
typename Number>
86 :
val(nullptr,
Utilities::CUDA::delete_device_data<Number>)
94 template <
typename Number>
105 if (omit_zeroing_entries ==
false)
107 const cudaError_t error_code =
108 cudaMemset(
val.get(), 0, n *
sizeof(Number));
116 template <
typename Number>
119 const bool omit_zeroing_entries)
126 template <
typename Number>
131 const std::shared_ptr<const Utilities::MPI::CommunicationPatternBase> &)
135 const cudaError_t error_code = cudaMemcpy(
val.get(),
138 cudaMemcpyHostToDevice);
145 cudaError_t error_code =
146 cudaMalloc(&tmp,
n_elements *
sizeof(Number));
150 error_code = cudaMemcpy(tmp,
153 cudaMemcpyHostToDevice);
172 template <
typename Number>
179 const cudaError_t error_code =
188 template <
typename Number>
203 template <
typename Number>
219 template <
typename Number>
225 "Cannot add two vectors with different numbers of elements"));
238 template <
typename Number>
244 "Cannot add two vectors with different numbers of elements."));
257 template <
typename Number>
263 "Cannot add two vectors with different numbers of elements"));
265 Number *result_device;
266 cudaError_t error_code =
267 cudaMalloc(&result_device,
n_elements *
sizeof(Number));
269 error_code = cudaMemset(result_device, 0,
sizeof(Number));
273 <<<dim3(n_blocks, 1), dim3(
block_size)>>>(result_device,
276 static_cast<unsigned int>(
281 error_code = cudaMemcpy(&result,
284 cudaMemcpyDeviceToHost);
294 template <
typename Number>
307 template <
typename Number>
315 "Cannot add two vectors with different numbers of elements."));
325 template <
typename Number>
337 "Cannot add two vectors with different numbers of elements."));
341 "Cannot add two vectors with different numbers of elements."));
351 template <
typename Number>
362 "Cannot add two vectors with different numbers of elements."));
372 template <
typename Number>
376 Assert(scaling_factors.
size() == this->size(),
378 "Cannot scale two vectors with different numbers of elements."));
383 scaling_factors.
val.get(),
390 template <
typename Number>
397 V.
size() == this->size(),
399 "Cannot assign two vectors with different numbers of elements."));
411 template <
typename Number>
420 template <
typename Number>
424 Number *result_device;
425 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
427 error_code = cudaMemset(result_device, 0,
sizeof(Number));
431 <<<dim3(n_blocks, 1), dim3(
block_size)>>>(result_device,
437 error_code = cudaMemcpy(&result,
440 cudaMemcpyDeviceToHost);
451 template <
typename Number>
455 Number *result_device;
456 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
458 error_code = cudaMemset(result_device, 0,
sizeof(Number));
462 <<<dim3(n_blocks, 1), dim3(
block_size)>>>(result_device,
468 error_code = cudaMemcpy(&result,
471 cudaMemcpyDeviceToHost);
481 template <
typename Number>
490 template <
typename Number>
494 return (*
this) * (*this);
499 template <
typename Number>
503 Number *result_device;
504 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
506 error_code = cudaMemset(result_device, 0,
sizeof(Number));
510 <<<dim3(n_blocks, 1), dim3(
block_size)>>>(result_device,
516 error_code = cudaMemcpy(&result,
519 cudaMemcpyDeviceToHost);
529 template <
typename Number>
542 Number *result_device;
543 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
545 error_code = cudaMemset(result_device, 0,
sizeof(Number));
553 error_code = cudaMemcpy(&result,
556 cudaMemcpyDeviceToHost);
564 template <
typename Number>
567 const unsigned int precision,
568 const bool scientific,
572 std::ios::fmtflags old_flags = out.flags();
573 unsigned int old_precision = out.precision(precision);
575 out.precision(precision);
577 out.setf(std::ios::scientific, std::ios::floatfield);
579 out.setf(std::ios::fixed, std::ios::floatfield);
589 out << cpu_val[i] << std::endl;
594 out.flags(old_flags);
595 out.precision(old_precision);
600 template <
typename Number>
604 std::size_t memory =
sizeof(*this);
605 memory +=
sizeof(Number) *
static_cast<std::size_t
>(
n_elements);
void print(StreamType &out) const
real_type linfty_norm() const
void equ(const Number a, const Vector< Number > &V)
types::global_dof_index size_type
void sadd(const Number s, const Number a, const Vector< Number > &V)
Number add_and_dot(const Number a, const Vector< Number > &V, const Vector< Number > &W)
real_type l2_norm() const
std::unique_ptr< Number[], void(*)(Number *)> val
typename numbers::NumberTraits< Number >::real_type real_type
Vector< Number > & operator*=(const Number factor)
real_type l1_norm() const
value_type mean_value() const
real_type norm_sqr() const
void import_elements(const ReadWriteVector< Number > &V, const VectorOperation::values operation, const std::shared_ptr< const Utilities::MPI::CommunicationPatternBase > &communication_pattern={})
Vector & operator=(const Vector< Number > &v)
Vector< Number > & operator+=(const Vector< Number > &V)
void scale(const Vector< Number > &scaling_factors)
void print(std::ostream &out, const unsigned int precision=2, const bool scientific=true, const bool across=true) const
Number operator*(const Vector< Number > &V) const
void reinit(const size_type n, const bool omit_zeroing_entries=false)
Vector< Number > & operator-=(const Vector< Number > &V)
std::size_t memory_consumption() const
Vector< Number > & operator/=(const Number factor)
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
__global__ void add_aVbW(Number *val, const Number a, const Number *V_val, const Number b, const Number *W_val, const size_type N)
__global__ void add_aV(Number *val, const Number a, const Number *V_val, const size_type N)
__global__ void equ(Number *val, const Number a, const Number *V_val, const size_type N)
__global__ void vec_add(Number *val, const Number a, const size_type N)
__global__ void double_vector_reduction(Number *result, const Number *v1, const Number *v2, const size_type N)
__global__ void vec_scale(Number *val, const Number a, const size_type N)
__global__ void reduction(Number *result, const Number *v, const size_type N)
__global__ void add_and_dot(Number *res, Number *v1, const Number *v2, const Number *v3, const Number a, const size_type N)
__global__ void vector_bin_op(Number *v1, const Number *v2, const size_type N)
__global__ void scale(Number *val, const Number *V_val, const size_type N)
__global__ void sadd(const Number s, Number *val, const Number a, const Number *V_val, const size_type N)
static ::ExceptionBase & ExcIO()
#define AssertCudaKernel()
static ::ExceptionBase & ExcZero()
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertIsFinite(number)
#define AssertCuda(error_code)
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
IndexSet complete_index_set(const IndexSet::size_type N)
Number * allocate_device_data(const std::size_t size)
void copy_to_host(const ArrayView< const T, MemorySpace::CUDA > &in, ArrayView< T, MemorySpace::Host > &out)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
Number linfty_norm(const Tensor< 2, dim, Number > &t)