25 #if defined(AVX) || defined(AVX2) || defined(FMA) || defined(SSE4_1) 29 #if defined(HAS_CPUID) 52 "Function used for calculation of dot product");
57 bool SIMDDetect::avx_available_;
58 bool SIMDDetect::avx2_available_;
59 bool SIMDDetect::avx512F_available_;
60 bool SIMDDetect::avx512BW_available_;
62 bool SIMDDetect::fma_available_;
64 bool SIMDDetect::sse_available_;
67 static double DotProductGeneric(
const double* u,
const double* v,
int n) {
69 for (
int k = 0; k < n; ++k) total += u[k] * v[k];
74 static double DotProductStdInnerProduct(
const double* u,
const double* v,
int n) {
75 return std::inner_product(u, u + n, v, 0.0);
88 SIMDDetect::SIMDDetect() {
90 SetDotProduct(DotProductGeneric);
92 #if defined(HAS_CPUID) 94 unsigned int eax, ebx, ecx, edx;
95 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
99 sse_available_ = (ecx & 0x00080000) != 0;
102 fma_available_ = (ecx & 0x00001000) != 0;
105 avx_available_ = (ecx & 0x10000000) != 0;
106 if (avx_available_) {
110 __cpuid_count(7, 0, eax, ebx, ecx, edx);
111 avx2_available_ = (ebx & 0x00000020) != 0;
112 avx512F_available_ = (ebx & 0x00010000) != 0;
113 avx512BW_available_ = (ebx & 0x40000000) != 0;
117 # elif defined(_WIN32) 121 max_function_id = cpuInfo[0];
122 if (max_function_id >= 1) {
125 sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
127 #if defined(AVX) || defined(AVX2) || defined(FMA) 128 if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
131 fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
134 avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
137 if (max_function_id >= 7) {
139 avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
140 avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
141 avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
148 #error "I don't know how to test for SIMD with this compiler" 156 }
else if (avx2_available_) {
161 }
else if (avx_available_) {
166 }
else if (sse_available_) {
176 const char* dotproduct_method =
"generic";
177 if (!strcmp(dotproduct.string(),
"auto")) {
179 }
else if (!strcmp(dotproduct.string(),
"generic")) {
181 SetDotProduct(DotProductGeneric);
182 dotproduct_method =
"generic";
183 }
else if (!strcmp(dotproduct.string(),
"native")) {
186 dotproduct_method =
"native";
188 }
else if (!strcmp(dotproduct.string(),
"avx2")) {
191 dotproduct_method =
"avx2";
194 }
else if (!strcmp(dotproduct.string(),
"avx")) {
197 dotproduct_method =
"avx";
200 }
else if (!strcmp(dotproduct.string(),
"fma")) {
203 dotproduct_method =
"fma";
206 }
else if (!strcmp(dotproduct.string(),
"sse")) {
209 dotproduct_method =
"sse";
211 }
else if (!strcmp(dotproduct.string(),
"std::inner_product")) {
213 SetDotProduct(DotProductStdInnerProduct);
214 dotproduct_method =
"std::inner_product";
217 tprintf(
"Warning, ignoring unsupported config variable value: dotproduct=%s\n",
218 dotproduct.string());
219 tprintf(
"Support values for dotproduct: auto generic native" 226 " std::inner_product.\n");
229 dotproduct.set_value(dotproduct_method);
DLLSYM void tprintf(const char *format,...)
double DotProductAVX(const double *u, const double *v, int n)
static const IntSimdMatrix * intSimdMatrix
double DotProductFMA(const double *u, const double *v, int n)
double(*)(const double *, const double *, int) DotProductFunction
#define STRING_VAR(name, val, comment)
static const IntSimdMatrix intSimdMatrixAVX2
static TESS_API void Update()
double DotProductNative(const double *u, const double *v, int n)
double DotProductSSE(const double *u, const double *v, int n)
DotProductFunction DotProduct
static const IntSimdMatrix intSimdMatrixSSE