cblas_base.h
Go to the documentation of this file.
1 /*
2  -------------------------------------------------------------------
3 
4  Copyright (C) 2006-2020, Andrew W. Steiner
5 
6  This file is part of O2scl.
7 
8  O2scl is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version.
12 
13  O2scl is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with O2scl. If not, see <http://www.gnu.org/licenses/>.
20 
21  -------------------------------------------------------------------
22 */
23 /*
24  * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman
25  * Copyright (C) 2001, 2007 Brian Gough
26  *
27  * This program is free software; you can redistribute it and/or modify
28  * it under the terms of the GNU General Public License as published by
29  * the Free Software Foundation; either version 3 of the License, or (at
30  * your option) any later version.
31  *
32  * This program is distributed in the hope that it will be useful, but
33  * WITHOUT ANY WARRANTY; without even the implied warranty of
34  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35  * General Public License for more details.
36  *
37  * You should have received a copy of the GNU General Public License
38  * along with this program; if not, write to the Free Software
39  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
40  * 02110-1301, USA.
41  */
42 /** \file cblas_base.h
43  \brief O2scl basic linear algebra function templates
44 
45  See \ref o2scl_cblas for more documentation on
46  these functions.
47 
48  \future Add float and complex versions?
49  \future There are some Level-1 BLAS functions which are already
50  present in vector.h. Should we move all of them to one place or
51  the other? The ones in vector.h are generic in the sense that they
52  can use doubles or floats, but the ones here can use either () or
53  [].
54 
55 */
56 
57 #ifdef DOXYGEN
58 namespace o2scl_cblas {
59 #endif
60 
61  /// Matrix order, either column-major or row-major
62  enum o2cblas_order {o2cblas_RowMajor=101, o2cblas_ColMajor=102};
63 
64  /// Transpose operations
65  enum o2cblas_transpose {o2cblas_NoTrans=111, o2cblas_Trans=112,
66  o2cblas_ConjTrans=113};
67 
68  /// Upper- or lower-triangular
69  enum o2cblas_uplo {o2cblas_Upper=121, o2cblas_Lower=122};
70 
71  /// Unit or generic diagonal
72  enum o2cblas_diag {o2cblas_NonUnit=131, o2cblas_Unit=132};
73 
74  /// Left or right sided operation
75  enum o2cblas_side {o2cblas_Left=141, o2cblas_Right=142};
76 
77  /// \name Test if matrix elements are finite
78  //@{
79  /** \brief Test if the first \c n elements of a matrix are finite
80 
81  If \c n is zero, this will return true without throwing
82  an exception.
83  */
84  template<class mat_t>
85  bool matrix_is_finite(size_t m, size_t n, mat_t &data) {
86  for(size_t i=0;i<m;i++) {
87  for(size_t j=0;j<n;j++) {
88  if (!std::isfinite(O2SCL_IX2(data,i,j))) return false;
89  }
90  }
91  return true;
92  }
93 
94  /** \brief Test if a matrix is finite
95 
96  If \c n is zero, this will return true without throwing
97  an exception.
98  */
99  template<class mat_t> bool matrix_is_finite(mat_t &data) {
100  return matrix_is_finite(data.size1(),data.size2(),data);
101  }
102  //@}
103 
104  /// \name Level-1 BLAS functions
105  //@{
106  /** \brief Compute the absolute sum of vector elements
107 
108  If \c alpha is zero, this function returns and performs
109  no computations.
110  */
111  template<class vec_t> double dasum(const size_t N, const vec_t &X) {
112  double r=0.0;
113  for(size_t i=0;i<N;i++) {
114  r+=fabs(X[i]);
115  }
116  return r;
117  }
118 
119  /** \brief Compute \f$ y= \alpha x+y \f$
120 
121  If \c alpha is zero, this function returns and performs
122  no computations.
123  */
124  template<class vec_t, class vec2_t>
125  void daxpy(const double alpha, const size_t N, const vec_t &X,
126  vec2_t &Y) {
127 
128  size_t i;
129 
130  if (alpha == 0.0) {
131  return;
132  }
133 
134  const size_t m=N % 4;
135 
136  for (i=0;i<m;i++) {
137  O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
138  }
139 
140  for (i=m;i+3<N;i+=4) {
141  O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
142  O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1);
143  O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2);
144  O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3);
145  }
146  }
147 
148  /// Compute \f$ r=x \cdot y \f$
149  template<class vec_t, class vec2_t>
150  double ddot(const size_t N, const vec_t &X, const vec2_t &Y) {
151 
152  double r=0.0;
153  size_t i;
154 
155  const size_t m=N % 4;
156 
157  for (i=0;i<m;i++) {
158  r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
159  }
160 
161  for (i=m;i+3<N;i+=4) {
162  r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
163  r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1);
164  r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2);
165  r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3);
166  }
167 
168  return r;
169  }
170 
171  /** \brief Compute the norm of the vector \c X
172 
173  \note The suffix "2" on the function name indicates that this
174  computes the "2-norm", not that the norm is squared.
175 
176  If \c N is less than or equal to zero, this function returns
177  zero without calling the error handler.
178 
179  This function works only with vectors which hold \c double. For
180  the norm of a general floating point vector, see \ref
181  vector_norm().
182  */
183  template<class vec_t> double dnrm2(const size_t N, const vec_t &X) {
184 
185  double scale=0.0;
186  double ssq=1.0;
187  size_t i;
188 
189  if (N == 0) {
190  return 0;
191  } else if (N == 1) {
192  return fabs(O2SCL_IX(X,0));
193  }
194 
195  for (i=0;i<N;i++) {
196  const double x=O2SCL_IX(X,i);
197 
198  if (x != 0.0) {
199  const double ax=fabs(x);
200 
201  if (scale<ax) {
202  ssq=1.0+ssq*(scale/ax)*(scale/ax);
203  scale=ax;
204  } else {
205  ssq+=(ax/scale)*(ax/scale);
206  }
207  }
208 
209  }
210 
211  return scale*sqrt(ssq);
212  }
213 
214  /** \brief Compute \f$ x=\alpha x \f$
215  */
216  template<class vec_t>
217  void dscal(const double alpha, const size_t N, vec_t &X) {
218 
219  size_t i;
220  const size_t m=N % 4;
221 
222  for (i=0;i<m;i++) {
223  O2SCL_IX(X,i)*=alpha;
224  }
225 
226  for (i=m;i+3<N;i+=4) {
227  O2SCL_IX(X,i)*=alpha;
228  O2SCL_IX(X,i+1)*=alpha;
229  O2SCL_IX(X,i+2)*=alpha;
230  O2SCL_IX(X,i+3)*=alpha;
231  }
232  }
233  //@}
234 
235  /// \name Level-2 BLAS functions
236  //@{
237  /** \brief Compute \f$ y=\alpha \left[\mathrm{op}(A)\right] x+
238  \beta y \f$.
239 
240  If \c M or \c N is zero, or if \c alpha is zero and \c beta is
241  one, this function performs no calculations and returns without
242  calling the error handler.
243  */
244  template<class mat_t, class vec_t, class vec2_t>
245  void dgemv(const enum o2cblas_order order,
246  const enum o2cblas_transpose TransA, const size_t M,
247  const size_t N, const double alpha, const mat_t &A,
248  const vec_t &X, const double beta, vec2_t &Y) {
249 
250  size_t i, j;
251  size_t lenX, lenY;
252 
253  // If conjugate transpose is requested, just assume plain transpose
254  const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
255 
256  if (M == 0 || N == 0) {
257  return;
258  }
259 
260  if (alpha == 0.0 && beta == 1.0) {
261  return;
262  }
263 
264  if (Trans == o2cblas_NoTrans) {
265  lenX=N;
266  lenY=M;
267  } else {
268  lenX=M;
269  lenY=N;
270  }
271 
272  /* form y := beta*y */
273  if (beta == 0.0) {
274  size_t iy=0;
275  for (i=0;i<lenY;i++) {
276  O2SCL_IX(Y,iy)=0.0;
277  iy++;
278  }
279  } else if (beta != 1.0) {
280  size_t iy=0;
281  for (i=0;i<lenY;i++) {
282  O2SCL_IX(Y,iy) *= beta;
283  iy++;
284  }
285  }
286 
287  if (alpha == 0.0) {
288  return;
289  }
290 
291  if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans) ||
292  (order == o2cblas_ColMajor && Trans == o2cblas_Trans)) {
293 
294  /* form y := alpha*A*x+y */
295  size_t iy=0;
296  for (i=0;i<lenY;i++) {
297  double temp=0.0;
298  size_t ix=0;
299  for (j=0;j<lenX;j++) {
300  temp+=O2SCL_IX(X,ix)*O2SCL_IX2(A,i,j);
301  ix++;
302  }
303  O2SCL_IX(Y,iy)+=alpha*temp;
304  iy++;
305  }
306 
307  } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans) ||
308  (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans)) {
309 
310  /* form y := alpha*A'*x+y */
311  size_t ix=0;
312  for (j=0;j<lenX;j++) {
313  const double temp=alpha*O2SCL_IX(X,ix);
314  if (temp != 0.0) {
315  size_t iy=0;
316  for (i=0;i<lenY;i++) {
317  O2SCL_IX(Y,iy)+=temp*O2SCL_IX2(A,j,i);
318  iy++;
319  }
320  }
321  ix++;
322  }
323 
324  } else {
325  O2SCL_ERR("Unrecognized operation in dgemv().",o2scl::exc_einval);
326  }
327  return;
328  }
329 
330  /** \brief Compute \f$ x=\mathrm{op} (A)^{-1} x \f$
331 
332  If \c N is zero, this function does nothing and returns zero.
333  */
334  template<class mat_t, class vec_t>
335  void dtrsv(const enum o2cblas_order order,
336  const enum o2cblas_uplo Uplo,
337  const enum o2cblas_transpose TransA,
338  const enum o2cblas_diag Diag,
339  const size_t M, const size_t N, const mat_t &A, vec_t &X) {
340 
341  const int nonunit=(Diag == o2cblas_NonUnit);
342  int ix, jx;
343  int i, j;
344  const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
345 
346  if (N == 0) {
347  return;
348  }
349 
350  /* form x := inv( A )*x */
351 
352  if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
353  Uplo == o2cblas_Upper) ||
354  (order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
355  Uplo == o2cblas_Lower)) {
356 
357  /* backsubstitution */
358 
359  // O2scl: Note that subtraction of 1 from size_t N here is ok
360  // since we already handled the N=0 case above
361  ix=(int)(N-1);
362  if (nonunit) {
363  O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1);
364  }
365  ix--;
366 
367  for (i=(int)(N-1);i>0 && i--;) {
368  double tmp=O2SCL_IX(X,ix);
369  jx=ix +1;
370  for (j=i+1;j<((int)N);j++) {
371  const double Aij=O2SCL_IX2(A,i,j);
372  tmp-=Aij*O2SCL_IX(X,jx);
373  jx++;
374  }
375  if (nonunit) {
376  O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
377  } else {
378  O2SCL_IX(X,ix)=tmp;
379  }
380  ix--;
381  }
382 
383  } else if ((order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
384  Uplo == o2cblas_Lower) ||
385  (order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
386  Uplo == o2cblas_Upper)) {
387 
388  /* forward substitution */
389  ix=0;
390  if (nonunit) {
391  O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0);
392  }
393  ix++;
394  for (i=1;i<((int)N);i++) {
395  double tmp=O2SCL_IX(X,ix);
396  jx=0;
397  for (j=0;j<i;j++) {
398  const double Aij=O2SCL_IX2(A,i,j);
399  tmp-=Aij*O2SCL_IX(X,jx);
400  jx++;
401  }
402  if (nonunit) {
403  O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
404  } else {
405  O2SCL_IX(X,ix)=tmp;
406  }
407  ix++;
408  }
409 
410  } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
411  Uplo == o2cblas_Upper) ||
412  (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
413  Uplo == o2cblas_Lower)) {
414 
415  /* form x := inv( A' )*x */
416 
417  /* forward substitution */
418  ix=0;
419  if (nonunit) {
420  O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,0,0);
421  }
422  ix++;
423  for (i=1;i<((int)N);i++) {
424  double tmp=O2SCL_IX(X,ix);
425  jx=0;
426  for (j=0;j<i;j++) {
427  const double Aji=O2SCL_IX2(A,j,i);
428  tmp-=Aji*O2SCL_IX(X,jx);
429  jx++;
430  }
431  if (nonunit) {
432  O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
433  } else {
434  O2SCL_IX(X,ix)=tmp;
435  }
436  ix++;
437  }
438 
439  } else if ((order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
440  Uplo == o2cblas_Lower) ||
441  (order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
442  Uplo == o2cblas_Upper)) {
443 
444  /* backsubstitution */
445  // O2scl: Note that subtraction of 1 from size_t N here is ok
446  // since we already handled the N=0 case above
447  ix=(int)(N-1);
448  if (nonunit) {
449  O2SCL_IX(X,ix)=O2SCL_IX(X,ix)/O2SCL_IX2(A,N-1,N-1);
450  }
451  ix--;
452  for (i=(int)(N-1);i>0 && i--;) {
453  double tmp=O2SCL_IX(X,ix);
454  jx=ix+1;
455  for (j=i+1;j<((int)N);j++) {
456  const double Aji=O2SCL_IX2(A,j,i);
457  tmp-=Aji*O2SCL_IX(X,jx);
458  jx++;
459  }
460  if (nonunit) {
461  O2SCL_IX(X,ix)=tmp/O2SCL_IX2(A,i,i);
462  } else {
463  O2SCL_IX(X,ix)=tmp;
464  }
465  ix--;
466  }
467 
468  } else {
469  O2SCL_ERR("Unrecognized operation in dtrsv().",o2scl::exc_einval);
470  }
471  return;
472  }
473 
474  /** \brief Compute \f$ x=op(A) x \f$ for the triangular matrix \c A
475  */
476  template<class mat_t, class vec_t>
477  void dtrmv(const enum o2cblas_order Order,
478  const enum o2cblas_uplo Uplo,
479  const enum o2cblas_transpose TransA,
480  const enum o2cblas_diag Diag, const size_t N,
481  const mat_t &A, vec_t &x) {
482 
483  int i, j;
484 
485  const int nonunit=(Diag == o2cblas_NonUnit);
486  const int Trans=(TransA != o2cblas_ConjTrans) ? TransA : o2cblas_Trans;
487 
488  if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
489  Uplo == o2cblas_Upper) ||
490  (Order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
491  Uplo == o2cblas_Lower)) {
492 
493  /* form x := A*x */
494 
495  for (i=0;i<N;i++) {
496  double temp=0.0;
497  const size_t j_min=i+1;
498  const size_t j_max=N;
499  size_t jx=j_min;
500  for (j=j_min;j<j_max;j++) {
501  temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j);
502  jx++;
503  }
504  if (nonunit) {
505  O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i);
506  } else {
507  O2SCL_IX(x,i)+=temp;
508  }
509  }
510 
511  } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_NoTrans &&
512  Uplo == o2cblas_Lower) ||
513  (Order == o2cblas_ColMajor && Trans == o2cblas_Trans &&
514  Uplo == o2cblas_Upper)) {
515 
516  // O2scl: Note that subtraction of 1 from size_t N here is ok
517  // since we already handled the N=0 case above
518  size_t ix=N-1;
519  for (i=N;i>0 && i--;) {
520  double temp=0.0;
521  const size_t j_min=0;
522  const size_t j_max=i;
523  size_t jx=j_min;
524  for (j=j_min;j<j_max;j++) {
525  temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,i,j);
526  jx++;
527  }
528  if (nonunit) {
529  O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i);
530  } else {
531  O2SCL_IX(x,ix)+=temp;
532  }
533  ix--;
534  }
535 
536  } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
537  Uplo == o2cblas_Upper) ||
538  (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
539  Uplo == o2cblas_Lower)) {
540 
541  /* form x := A'*x */
542  size_t ix=N-1;
543  for (i=N;i>0 && i--;) {
544  double temp=0.0;
545  const size_t j_min=0;
546  const size_t j_max=i;
547  size_t jx=j_min;
548  for (j=j_min;j<j_max;j++) {
549  temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i);
550  jx++;
551  }
552  if (nonunit) {
553  O2SCL_IX(x,ix)=temp+O2SCL_IX(x,ix)*O2SCL_IX2(A,i,i);
554  } else {
555  O2SCL_IX(x,ix)+=temp;
556  }
557  ix--;
558  }
559 
560  } else if ((Order == o2cblas_RowMajor && Trans == o2cblas_Trans &&
561  Uplo == o2cblas_Lower) ||
562  (Order == o2cblas_ColMajor && Trans == o2cblas_NoTrans &&
563  Uplo == o2cblas_Upper)) {
564 
565  for (i=0;i<N;i++) {
566  double temp=0.0;
567  const size_t j_min=i+1;
568  const size_t j_max=N;
569  size_t jx=i+1;
570  for (j=j_min;j<j_max;j++) {
571  temp+=O2SCL_IX(x,jx)*O2SCL_IX2(A,j,i);
572  jx++;
573  }
574  if (nonunit) {
575  O2SCL_IX(x,i)=temp+O2SCL_IX(x,i)*O2SCL_IX2(A,i,i);
576  } else {
577  O2SCL_IX(x,i)+=temp;
578  }
579  }
580 
581  } else {
582  O2SCL_ERR("Unrecognized operation in dtrmv().",
584  }
585 
586  return;
587  }
588  //@}
589 
590  /// \name Level-3 BLAS functions
591  //@{
592  /** \brief Compute \f$ y=\alpha \mathrm{op}(A) \mathrm{op}(B) +
593  \beta C \f$
594 
595  When both \c TransA and \c TransB are \c NoTrans, this function
596  operates on the first M rows and K columns of matrix A, and the
597  first K rows and N columns of matrix B to produce a matrix C
598  with M rows and N columns.
599 
600  This function works for all values of \c Order, \c TransA, and
601  \c TransB.
602  */
603  template<class mat_t>
604  void dgemm(const enum o2cblas_order Order,
605  const enum o2cblas_transpose TransA,
606  const enum o2cblas_transpose TransB, const size_t M,
607  const size_t N, const size_t K, const double alpha,
608  const mat_t &A, const mat_t &B, const double beta, mat_t &C) {
609 
610  size_t i, j, k;
611  size_t n1, n2;
612  int TransF, TransG;
613 
614  if (alpha == 0.0 && beta == 1.0) {
615  return;
616  }
617 
618  /*
619  This is a little more complicated than the original in GSL,
620  which assigned the matrices A and B to variables *F and *G which
621  then allowed some code duplication. We can't really do that
622  here, since we don't have that kind of type info on A and B, so
623  we just handle the two cases separately.
624  */
625 
626  if (Order == o2cblas_RowMajor) {
627 
628  n1=M;
629  n2=N;
630 
631  /* form y := beta*y */
632  if (beta == 0.0) {
633  for (i=0;i<n1;i++) {
634  for (j=0;j<n2;j++) {
635  O2SCL_IX2(C,i,j)=0.0;
636  }
637  }
638  } else if (beta != 1.0) {
639  for (i=0;i<n1;i++) {
640  for (j=0;j<n2;j++) {
641  O2SCL_IX2(C,i,j)*=beta;
642  }
643  }
644  }
645 
646  if (alpha == 0.0) {
647  return;
648  }
649 
650  TransF=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
651  TransG=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
652 
653  if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
654 
655  /* form C := alpha*A*B+C */
656 
657  for (k=0;k<K;k++) {
658  for (i=0;i<n1;i++) {
659  const double temp=alpha*O2SCL_IX2(A,i,k);
660  if (temp != 0.0) {
661  for (j=0;j<n2;j++) {
662  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
663  }
664  }
665  }
666  }
667 
668  } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
669 
670  /* form C := alpha*A*B'+C */
671 
672  for (i=0;i<n1;i++) {
673  for (j=0;j<n2;j++) {
674  double temp=0.0;
675  for (k=0;k<K;k++) {
676  temp+=O2SCL_IX2(A,i,k)*O2SCL_IX2(B,j,k);
677  }
678  O2SCL_IX2(C,i,j)+=alpha*temp;
679  }
680  }
681 
682  } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
683 
684  for (k=0;k<K;k++) {
685  for (i=0;i<n1;i++) {
686  const double temp=alpha*O2SCL_IX2(A,k,i);
687  if (temp != 0.0) {
688  for (j=0;j<n2;j++) {
689  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
690  }
691  }
692  }
693  }
694 
695  } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
696 
697  for (i=0;i<n1;i++) {
698  for (j=0;j<n2;j++) {
699  double temp=0.0;
700  for (k=0;k<K;k++) {
701  temp+=O2SCL_IX2(A,k,i)*O2SCL_IX2(B,j,k);
702  }
703  O2SCL_IX2(C,i,j)+=alpha*temp;
704  }
705  }
706 
707  } else {
708  O2SCL_ERR("Unrecognized operation in dgemm().",o2scl::exc_einval);
709  }
710 
711  } else {
712 
713  // Column-major case
714 
715  n1=N;
716  n2=M;
717 
718  /* form y := beta*y */
719  if (beta == 0.0) {
720  for (i=0;i<n1;i++) {
721  for (j=0;j<n2;j++) {
722  O2SCL_IX2(C,i,j)=0.0;
723  }
724  }
725  } else if (beta != 1.0) {
726  for (i=0;i<n1;i++) {
727  for (j=0;j<n2;j++) {
728  O2SCL_IX2(C,i,j)*=beta;
729  }
730  }
731  }
732 
733  if (alpha == 0.0) {
734  return;
735  }
736 
737  TransF=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
738  TransG=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
739 
740  if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
741 
742  /* form C := alpha*A*B+C */
743 
744  for (k=0;k<K;k++) {
745  for (i=0;i<n1;i++) {
746  const double temp=alpha*O2SCL_IX2(B,i,k);
747  if (temp != 0.0) {
748  for (j=0;j<n2;j++) {
749  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
750  }
751  }
752  }
753  }
754 
755  } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
756 
757  /* form C := alpha*A*B'+C */
758 
759  for (i=0;i<n1;i++) {
760  for (j=0;j<n2;j++) {
761  double temp=0.0;
762  for (k=0;k<K;k++) {
763  temp+=O2SCL_IX2(B,i,k)*O2SCL_IX2(A,j,k);
764  }
765  O2SCL_IX2(C,i,j)+=alpha*temp;
766  }
767  }
768 
769  } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
770 
771  for (k=0;k<K;k++) {
772  for (i=0;i<n1;i++) {
773  const double temp=alpha*O2SCL_IX2(B,k,i);
774  if (temp != 0.0) {
775  for (j=0;j<n2;j++) {
776  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
777  }
778  }
779  }
780  }
781 
782  } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
783 
784  for (i=0;i<n1;i++) {
785  for (j=0;j<n2;j++) {
786  double temp=0.0;
787  for (k=0;k<K;k++) {
788  temp+=O2SCL_IX2(B,k,i)*O2SCL_IX2(A,j,k);
789  }
790  O2SCL_IX2(C,i,j)+=alpha*temp;
791  }
792  }
793 
794  } else {
795  O2SCL_ERR("Unrecognized operation in dgemm().",o2scl::exc_einval);
796  }
797  }
798 
799  return;
800  }
801 
802  /** \brief Compute \f$ B=\alpha \mathrm{op}[\mathrm{inv}(A)] B
803  \f$ where $A$ is triangular
804 
805  This function works for all values of \c Order, \c Side, \c Uplo,
806  \c TransA, and \c Diag . The variable \c Side is \c Left
807  when A is on the left
808 
809  This function operates on the first M rows and N columns of the
810  matrix B. If Side is Left, then this function operates on the
811  first M rows and M columns of A. If Side is Right, then this
812  function operates on the first N rows and N columns of A.
813  */
814  template<class mat_t>
815  void dtrsm(const enum o2cblas_order Order,
816  const enum o2cblas_side Side,
817  const enum o2cblas_uplo Uplo,
818  const enum o2cblas_transpose TransA,
819  const enum o2cblas_diag Diag,
820  const size_t M, const size_t N,
821  const double alpha, const mat_t &A, mat_t &B) {
822 
823  size_t i, j, k;
824  size_t n1, n2;
825 
826  const int nonunit = (Diag == o2cblas_NonUnit);
827  int side, uplo, trans;
828 
829  if (Order == o2cblas_RowMajor) {
830  n1 = M;
831  n2 = N;
832  side = Side;
833  uplo = Uplo;
834  trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
835  } else {
836  n1 = N;
837  n2 = M;
838  side = (Side == o2cblas_Left) ? o2cblas_Right : o2cblas_Left;
839  uplo = (Uplo == o2cblas_Upper) ? o2cblas_Lower : o2cblas_Upper;
840  trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
841  }
842 
843  if (side == o2cblas_Left && uplo == o2cblas_Upper &&
844  trans == o2cblas_NoTrans) {
845 
846  /* form B := alpha * inv(TriU(A)) *B */
847 
848  if (alpha != 1.0) {
849  for (i = 0; i < n1; i++) {
850  for (j = 0; j < n2; j++) {
851  O2SCL_IX2(B,i,j)*=alpha;
852  }
853  }
854  }
855 
856  for (i = n1; i > 0 && i--;) {
857  if (nonunit) {
858  double Aii = O2SCL_IX2(A,i,i);
859  for (j = 0; j < n2; j++) {
860  O2SCL_IX2(B,i,j)/=Aii;
861  }
862  }
863 
864  for (k = 0; k < i; k++) {
865  const double Aki = O2SCL_IX2(A,k,i);
866  for (j = 0; j < n2; j++) {
867  O2SCL_IX2(B,k,j)-=Aki*O2SCL_IX2(B,i,j);
868  }
869  }
870  }
871 
872  } else if (side == o2cblas_Left && uplo == o2cblas_Upper &&
873  trans == o2cblas_Trans) {
874 
875  /* form B := alpha * inv(TriU(A))' *B */
876 
877  if (alpha != 1.0) {
878  for (i = 0; i < n1; i++) {
879  for (j = 0; j < n2; j++) {
880  O2SCL_IX2(B,i,j) *= alpha;
881  }
882  }
883  }
884 
885  for (i = 0; i < n1; i++) {
886  if (nonunit) {
887  double Aii = O2SCL_IX2(A,i,i);
888  for (j = 0; j < n2; j++) {
889  O2SCL_IX2(B,i,j) /= Aii;
890  }
891  }
892 
893  for (k = i + 1; k < n1; k++) {
894  const double Aik = O2SCL_IX2(A,i,k);
895  for (j = 0; j < n2; j++) {
896  O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
897  }
898  }
899  }
900 
901  } else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
902  trans == o2cblas_NoTrans) {
903 
904  /* form B := alpha * inv(TriL(A))*B */
905 
906  if (alpha != 1.0) {
907  for (i = 0; i < n1; i++) {
908  for (j = 0; j < n2; j++) {
909  O2SCL_IX2(B,i,j) *= alpha;
910  }
911  }
912  }
913 
914  for (i = 0; i < n1; i++) {
915  if (nonunit) {
916  double Aii = O2SCL_IX2(A,i,i);
917  for (j = 0; j < n2; j++) {
918  O2SCL_IX2(B,i,j) /= Aii;
919  }
920  }
921 
922  for (k = i + 1; k < n1; k++) {
923  const double Aki = O2SCL_IX2(A,k,i);
924  for (j = 0; j < n2; j++) {
925  O2SCL_IX2(B,k,j) -= Aki * O2SCL_IX2(B,i,j);
926  }
927  }
928  }
929 
930 
931  } else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
932  trans == o2cblas_Trans) {
933 
934  /* form B := alpha * TriL(A)' *B */
935 
936  if (alpha != 1.0) {
937  for (i = 0; i < n1; i++) {
938  for (j = 0; j < n2; j++) {
939  O2SCL_IX2(B,i,j) *= alpha;
940  }
941  }
942  }
943 
944  for (i = n1; i > 0 && i--;) {
945  if (nonunit) {
946  double Aii = O2SCL_IX2(A,i,i);
947  for (j = 0; j < n2; j++) {
948  O2SCL_IX2(B,i,j) /= Aii;
949  }
950  }
951 
952  for (k = 0; k < i; k++) {
953  const double Aik = O2SCL_IX2(A,i,k);
954  for (j = 0; j < n2; j++) {
955  O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
956  }
957  }
958  }
959 
960  } else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
961  trans == o2cblas_NoTrans) {
962 
963  /* form B := alpha * B * inv(TriU(A)) */
964 
965  if (alpha != 1.0) {
966  for (i = 0; i < n1; i++) {
967  for (j = 0; j < n2; j++) {
968  O2SCL_IX2(B,i,j) *= alpha;
969  }
970  }
971  }
972 
973  for (i = 0; i < n1; i++) {
974  for (j = 0; j < n2; j++) {
975  if (nonunit) {
976  double Ajj = O2SCL_IX2(A,j,j);
977  O2SCL_IX2(B,i,j) /= Ajj;
978  }
979 
980  {
981  double Bij = O2SCL_IX2(B,i,j);
982  for (k = j + 1; k < n2; k++) {
983  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
984  }
985  }
986  }
987  }
988 
989  } else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
990  trans == o2cblas_Trans) {
991 
992  /* form B := alpha * B * inv(TriU(A))' */
993 
994  if (alpha != 1.0) {
995  for (i = 0; i < n1; i++) {
996  for (j = 0; j < n2; j++) {
997  O2SCL_IX2(B,i,j) *= alpha;
998  }
999  }
1000  }
1001 
1002  for (i = 0; i < n1; i++) {
1003  for (j = n2; j > 0 && j--;) {
1004 
1005  if (nonunit) {
1006  double Ajj = O2SCL_IX2(A,j,j);
1007  O2SCL_IX2(B,i,j) /= Ajj;
1008  }
1009 
1010  {
1011  double Bij = O2SCL_IX2(B,i,j);
1012  for (k = 0; k < j; k++) {
1013  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
1014  }
1015  }
1016  }
1017  }
1018 
1019  } else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
1020  trans == o2cblas_NoTrans) {
1021 
1022  /* form B := alpha * B * inv(TriL(A)) */
1023 
1024  if (alpha != 1.0) {
1025  for (i = 0; i < n1; i++) {
1026  for (j = 0; j < n2; j++) {
1027  O2SCL_IX2(B,i,j) *= alpha;
1028  }
1029  }
1030  }
1031 
1032  for (i = 0; i < n1; i++) {
1033  for (j = n2; j > 0 && j--;) {
1034 
1035  if (nonunit) {
1036  double Ajj = O2SCL_IX2(A,j,j);
1037  O2SCL_IX2(B,i,j) /= Ajj;
1038  }
1039 
1040  {
1041  double Bij = O2SCL_IX2(B,i,j);
1042  for (k = 0; k < j; k++) {
1043  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
1044  }
1045  }
1046  }
1047  }
1048 
1049  } else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
1050  trans == o2cblas_Trans) {
1051 
1052  /* form B := alpha * B * inv(TriL(A))' */
1053 
1054 
1055  if (alpha != 1.0) {
1056  for (i = 0; i < n1; i++) {
1057  for (j = 0; j < n2; j++) {
1058  O2SCL_IX2(B,i,j) *= alpha;
1059  }
1060  }
1061  }
1062 
1063  for (i = 0; i < n1; i++) {
1064  for (j = 0; j < n2; j++) {
1065  if (nonunit) {
1066  double Ajj = O2SCL_IX2(A,j,j);
1067  O2SCL_IX2(B,i,j) /= Ajj;
1068  }
1069 
1070  {
1071  double Bij = O2SCL_IX2(B,i,j);
1072  for (k = j + 1; k < n2; k++) {
1073  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
1074  }
1075  }
1076  }
1077  }
1078 
1079  } else {
1080  O2SCL_ERR("Bad operation in dtrsm().",o2scl::exc_einval);
1081  }
1082 
1083  return;
1084  }
1085  //@}
1086 
1087  /// \name Helper Level-1 BLAS functions - Subvectors
1088  //@{
1089  /** \brief Compute \f$ y=\alpha x+y \f$ beginning with index \c ie
1090  and ending with index \c N-1
1091 
1092  This function is used in \ref householder_hv().
1093 
1094  If \c alpha is identical with zero or <tt>N==ie</tt>, this
1095  function will perform no calculations and return without calling
1096  the error handler.
1097 
1098  If <tt>ie</tt> is greater than <tt>N-1</tt> then the error
1099  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1100  defined.
1101  */
1102  template<class vec_t, class vec2_t>
1103  void daxpy_subvec(const double alpha, const size_t N, const vec_t &X,
1104  vec2_t &Y, const size_t ie) {
1105 
1106  size_t i;
1107 
1108  if (alpha == 0.0) return;
1109 #if O2SCL_NO_RANGE_CHECK
1110 #else
1111  if (ie+1>N) {
1112  O2SCL_ERR("Invalid index in daxpy_subvec().",o2scl::exc_einval);
1113  }
1114 #endif
1115 
1116  const size_t m=(N-ie) % 4;
1117 
1118  for (i=ie;i<ie+m;i++) {
1119  O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
1120  }
1121 
1122  for (;i+3<N;i+=4) {
1123  O2SCL_IX(Y,i)+=alpha*O2SCL_IX(X,i);
1124  O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX(X,i+1);
1125  O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX(X,i+2);
1126  O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX(X,i+3);
1127  }
1128  }
1129 
1130  /** \brief Compute \f$ r=x \cdot y \f$ beginning with index \c ie and
1131  ending with index \c N-1
1132 
1133  This function is used in \ref householder_hv().
1134 
1135  If <tt>ie</tt> is greater than <tt>N-1</tt> then the error
1136  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1137  defined.
1138  */
1139  template<class vec_t, class vec2_t>
1140  double ddot_subvec(const size_t N, const vec_t &X, const vec2_t &Y,
1141  const size_t ie) {
1142  double r=0.0;
1143  size_t i;
1144 
1145 #if O2SCL_NO_RANGE_CHECK
1146 #else
1147  if (ie+1>N) {
1148  O2SCL_ERR("Invalid index in ddot_subvec().",o2scl::exc_einval);
1149  }
1150 #endif
1151 
1152  const size_t m=(N-ie) % 4;
1153 
1154  for (i=ie;i<ie+m;i++) {
1155  r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
1156  }
1157 
1158  for (;i+3<N;i+=4) {
1159  r+=O2SCL_IX(X,i)*O2SCL_IX(Y,i);
1160  r+=O2SCL_IX(X,i+1)*O2SCL_IX(Y,i+1);
1161  r+=O2SCL_IX(X,i+2)*O2SCL_IX(Y,i+2);
1162  r+=O2SCL_IX(X,i+3)*O2SCL_IX(Y,i+3);
1163  }
1164 
1165  return r;
1166  }
1167 
1168  /** \brief Compute the norm of the vector \c X beginning with
1169  index \c ie and ending with index \c N-1
1170 
1171  Used in \ref householder_transform().
1172 
1173  \note The suffix "2" on the function name indicates that this
1174  computes the "2-norm", not that the norm is squared.
1175 
1176  If <tt>ie</tt> is greater than <tt>N-1</tt> then the error
1177  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1178  defined.
1179  */
1180  template<class vec_t>
1181  double dnrm2_subvec(const size_t N, const vec_t &X, const size_t ie) {
1182 
1183  double scale=0.0;
1184  double ssq=1.0;
1185 
1186 #if O2SCL_NO_RANGE_CHECK
1187 #else
1188  if (ie+1>N) {
1189  O2SCL_ERR("Invalid index in dnrm2_subvec().",o2scl::exc_einval);
1190  }
1191 #endif
1192 
1193  if (ie+1==N) {
1194  return fabs(O2SCL_IX(X,ie));
1195  }
1196 
1197  for (size_t i=ie;i<N;i++) {
1198  const double x=O2SCL_IX(X,i);
1199 
1200  if (x != 0.0) {
1201  const double ax=fabs(x);
1202 
1203  if (scale<ax) {
1204  ssq=1.0+ssq*(scale/ax)*(scale/ax);
1205  scale=ax;
1206  } else {
1207  ssq+=(ax/scale)*(ax/scale);
1208  }
1209  }
1210 
1211  }
1212 
1213  return scale*sqrt(ssq);
1214  }
1215 
1216  /** \brief Compute \f$ x=\alpha x \f$ beginning with index \c ie and
1217  ending with index \c N-1
1218 
1219  This function is used in \ref householder_transform().
1220 
1221  If <tt>ie</tt> is greater than <tt>N-1</tt> then the error
1222  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1223  defined.
1224  */
1225  template<class vec_t>
1226  void dscal_subvec(const double alpha, const size_t N, vec_t &X,
1227  const size_t ie) {
1228 
1229 #if O2SCL_NO_RANGE_CHECK
1230 #else
1231  if (ie+1>N) {
1232  O2SCL_ERR("Invalid index in dscal_subvec().",o2scl::exc_einval);
1233  }
1234 #endif
1235 
1236  size_t i;
1237  const size_t m=(N-ie) % 4;
1238 
1239  for (i=ie;i<ie+m;i++) {
1240  O2SCL_IX(X,i)*=alpha;
1241  }
1242 
1243  for (;i+3<N;i+=4) {
1244  O2SCL_IX(X,i)*=alpha;
1245  O2SCL_IX(X,i+1)*=alpha;
1246  O2SCL_IX(X,i+2)*=alpha;
1247  O2SCL_IX(X,i+3)*=alpha;
1248  }
1249  }
1250  //@}
1251 
1252  /// \name Helper Level-1 BLAS functions - Subcolums of a matrix
1253  //@{
1254  /** \brief Compute \f$ y=\alpha x+y \f$ for a subcolumn of a matrix
1255 
1256  Given the matrix \c X, define the vector \c x as the column with
1257  index \c ic. This function computes \f$ y=\alpha x+y \f$
1258  for elements in the vectors \c x and \c y from row \c ir to
1259  row \c <tt>M-1</tt> (inclusive). All other elements in \c
1260  x and \c y are not referenced.
1261 
1262  Used in householder_hv_sub().
1263  */
1264  template<class mat_t, class vec_t>
1265  void daxpy_subcol(const double alpha, const size_t M, const mat_t &X,
1266  const size_t ir, const size_t ic, vec_t &y) {
1267 
1268 #if O2SCL_NO_RANGE_CHECK
1269 #else
1270  if (ir+1>M) {
1271  O2SCL_ERR("Invalid index in daxpy_subcol().",o2scl::exc_einval);
1272  }
1273 #endif
1274 
1275  if (alpha == 0.0) {
1276  return;
1277  }
1278 
1279  size_t i;
1280  const size_t m=(M-ir) % 4;
1281 
1282  for (i=ir;i<m+ir;i++) {
1283  O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic);
1284  }
1285 
1286  for (;i+3<M;i+=4) {
1287  O2SCL_IX(y,i)+=alpha*O2SCL_IX2(X,i,ic);
1288  O2SCL_IX(y,i+1)+=alpha*O2SCL_IX2(X,i+1,ic);
1289  O2SCL_IX(y,i+2)+=alpha*O2SCL_IX2(X,i+2,ic);
1290  O2SCL_IX(y,i+3)+=alpha*O2SCL_IX2(X,i+3,ic);
1291  }
1292 
1293  return;
1294  }
1295 
1296  /** \brief Compute \f$ r=x \cdot y \f$ for a subcolumn of a matrix
1297 
1298  Given the matrix \c X, define the vector \c x as the column with
1299  index \c ic. This function computes \f$ r=x \cdot y \f$
1300  for elements in the vectors \c x and \c y from row \c ir to
1301  row \c <tt>M-1</tt> (inclusive). All other elements in \c
1302  x and \c y are not referenced.
1303 
1304  Used in householder_hv_sub().
1305  */
1306  template<class mat_t, class vec_t>
1307  double ddot_subcol(const size_t M, const mat_t &X, const size_t ir,
1308  const size_t ic, const vec_t &y) {
1309 #if O2SCL_NO_RANGE_CHECK
1310 #else
1311  if (ir+1>M) {
1312  O2SCL_ERR("Invalid index in ddot_subcol().",o2scl::exc_einval);
1313  }
1314 #endif
1315 
1316  double r=0.0;
1317  size_t i;
1318  const size_t m=(M-ir) % 4;
1319 
1320  for (i=ir;i<m+ir;i++) {
1321  r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i);
1322  }
1323 
1324  for (;i+3<M;i+=4) {
1325  r+=O2SCL_IX2(X,i,ic)*O2SCL_IX(y,i);
1326  r+=O2SCL_IX2(X,i+1,ic)*O2SCL_IX(y,i+1);
1327  r+=O2SCL_IX2(X,i+2,ic)*O2SCL_IX(y,i+2);
1328  r+=O2SCL_IX2(X,i+3,ic)*O2SCL_IX(y,i+3);
1329  }
1330 
1331  return r;
1332  }
1333 
1334  /** \brief Compute the norm of a subcolumn of a matrix
1335 
1336  Given the matrix \c A, define the vector \c x as the column with
1337  index \c ic. This function computes the norm of the part of \c x
1338  from row \c ir to row \c <tt>M-1</tt> (inclusive). All other
1339  elements in \c x are not referenced.
1340 
1341  if \c M is zero, then this function silently returns zero
1342  without calling the error handler.
1343 
1344  This function is used in householder_transform_subcol().
1345 
1346  \note The suffix "2" on the function name indicates that
1347  this computes the "2-norm", not that the norm is squared.
1348  */
1349  template<class mat_t>
1350  double dnrm2_subcol(const mat_t &A, const size_t ir, const size_t ic,
1351  const size_t M) {
1352 
1353  double scale=0.0;
1354  double ssq=1.0;
1355  size_t i;
1356 
1357 #if O2SCL_NO_RANGE_CHECK
1358 #else
1359  if (ir+1>M) {
1360  O2SCL_ERR("Invalid index in dnrm2_subcol().",o2scl::exc_einval);
1361  }
1362 #endif
1363 
1364  // Handle the one-element vector case separately
1365  if (ir+1 == M) {
1366  return fabs(O2SCL_IX2(A,ir,ic));
1367  }
1368 
1369  for (i=ir;i<M;i++) {
1370  const double x=O2SCL_IX2(A,i,ic);
1371 
1372  if (x != 0.0) {
1373  const double ax=fabs(x);
1374 
1375  if (scale<ax) {
1376  ssq=1.0+ssq*(scale/ax)*(scale/ax);
1377  scale=ax;
1378  } else {
1379  ssq+=(ax/scale)*(ax/scale);
1380  }
1381  }
1382 
1383  }
1384 
1385  return scale*sqrt(ssq);
1386  }
1387 
1388  /** \brief Compute \f$ x=\alpha x \f$ for a subcolumn of a matrix
1389 
1390  Given the matrix \c A, define the vector \c x as the column with
1391  index \c ic. This function computes \f$ x= \alpha x \f$ for
1392  elements in the vectors \c x from row \c ir to row \c
1393  <tt>M-1</tt> (inclusive). All other elements in \c x are not
1394  referenced.
1395 
1396  Used in householder_transform_subcol().
1397  */
1398  template<class mat_t>
1399  void dscal_subcol(mat_t &A, const size_t ir, const size_t ic,
1400  const size_t M, const double alpha) {
1401 
1402 #if O2SCL_NO_RANGE_CHECK
1403 #else
1404  if (ir+1>M) {
1405  O2SCL_ERR("Invalid index in dscal_subcol().",o2scl::exc_einval);
1406  }
1407 #endif
1408 
1409  size_t i;
1410  const size_t m=(M-ir) % 4;
1411 
1412  for (i=ir;i<m+ir;i++) {
1413  O2SCL_IX2(A,i,ic)*=alpha;
1414  }
1415 
1416  for (;i+3<M;i+=4) {
1417  O2SCL_IX2(A,i,ic)*=alpha;
1418  O2SCL_IX2(A,i+1,ic)*=alpha;
1419  O2SCL_IX2(A,i+2,ic)*=alpha;
1420  O2SCL_IX2(A,i+3,ic)*=alpha;
1421  }
1422 
1423  return;
1424  }
1425 
1426  /** \brief Compute \f$ x=\alpha x \f$ for a subcolumn of a matrix
1427 
1428  Given the matrix \c A, define the vector \c x as the column with
1429  index \c ic. This function computes \f$ x= \alpha x \f$ for
1430  elements in the vectors \c x from row \c ir to row \c
1431  <tt>M-1</tt> (inclusive). All other elements in \c x are not
1432  referenced.
1433 
1434  Used in householder_transform_subcol().
1435  */
1436  template<class mat_t>
1437  double dasum_subcol(mat_t &A, const size_t ir, const size_t ic,
1438  const size_t M) {
1439 
1440 #if O2SCL_NO_RANGE_CHECK
1441 #else
1442  if (ir+1>M) {
1443  O2SCL_ERR("Invalid index in dscal_subcol().",o2scl::exc_einval);
1444  }
1445 #endif
1446 
1447  size_t i;
1448  double r=0.0;
1449  const size_t m=(M-ir) % 4;
1450 
1451  for (i=ir;i<m+ir;i++) {
1452  r+=fabs(O2SCL_IX2(A,i,ic));
1453  }
1454 
1455  for (;i+3<M;i+=4) {
1456  r+=fabs(O2SCL_IX2(A,i,ic));
1457  r+=fabs(O2SCL_IX2(A,i+1,ic));
1458  r+=fabs(O2SCL_IX2(A,i+2,ic));
1459  r+=fabs(O2SCL_IX2(A,i+3,ic));
1460  }
1461 
1462  return r;
1463  }
1464  //@}
1465 
1466  /// \name Helper Level-1 BLAS functions - Subrows of a matrix
1467  //@{
1468  /** \brief Compute \f$ y=\alpha x+y \f$ for a subrow of a matrix
1469 
1470  Given the matrix \c X, define the vector \c x as the row with
1471  index \c ir. This function computes \f$ y=\alpha x+y \f$ for
1472  elements in the vectors \c x from column \c ic to column \c
1473  <tt>N-1</tt> (inclusive). All other elements in \c x and
1474  \c y are not referenced.
1475 
1476  If <tt>ic</tt> is greater than <tt>N-1</tt> then the error
1477  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1478  defined.
1479 
1480  Used in householder_hv_sub().
1481  */
1482  template<class mat_t, class vec_t>
1483  void daxpy_subrow(const double alpha, const size_t N, const mat_t &X,
1484  const size_t ir, const size_t ic, vec_t &Y) {
1485 
1486 #if O2SCL_NO_RANGE_CHECK
1487 #else
1488  if (ic+1>N) {
1489  O2SCL_ERR("Invalid index in daxpy_subrow().",o2scl::exc_einval);
1490  }
1491 #endif
1492 
1493  if (alpha == 0.0) {
1494  return;
1495  }
1496 
1497  size_t i;
1498  const size_t m=(N-ic) % 4;
1499 
1500  for (i=ic;i<m+ic;i++) {
1501  O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i);
1502  }
1503 
1504  for (;i+3<N;i+=4) {
1505  O2SCL_IX(Y,i)+=alpha*O2SCL_IX2(X,ir,i);
1506  O2SCL_IX(Y,i+1)+=alpha*O2SCL_IX2(X,ir,i+1);
1507  O2SCL_IX(Y,i+2)+=alpha*O2SCL_IX2(X,ir,i+2);
1508  O2SCL_IX(Y,i+3)+=alpha*O2SCL_IX2(X,ir,i+3);
1509  }
1510 
1511  return;
1512  }
1513 
1514  /** \brief Compute \f$ r=x \cdot y \f$ for a subrow of a matrix
1515 
1516  Given the matrix \c X, define the vector \c x as the row with
1517  index \c ir. This function computes \f$ r=x \cdot y \f$ for
1518  elements in the vectors \c x from column \c ic to column \c
1519  <tt>N-1</tt> (inclusive). All other elements in \c x and
1520  \c y are not referenced.
1521 
1522  If <tt>ic</tt> is greater than <tt>N-1</tt> then the error
1523  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1524  defined.
1525 
1526  Used in householder_hv_sub().
1527  */
1528  template<class mat_t, class vec_t>
1529  double ddot_subrow(const size_t N, const mat_t &X, const size_t ir,
1530  const size_t ic, const vec_t &Y) {
1531 
1532 #if O2SCL_NO_RANGE_CHECK
1533 #else
1534  if (ic+1>N) {
1535  O2SCL_ERR("Invalid index in ddot_subrow().",o2scl::exc_einval);
1536  }
1537 #endif
1538 
1539  double r=0.0;
1540  size_t i;
1541  const size_t m=(N-ic) % 4;
1542 
1543  for (i=ic;i<m+ic;i++) {
1544  r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i);
1545  }
1546 
1547  for (;i+3<N;i+=4) {
1548  r+=O2SCL_IX2(X,ir,i)*O2SCL_IX(Y,i);
1549  r+=O2SCL_IX2(X,ir,i+1)*O2SCL_IX(Y,i+1);
1550  r+=O2SCL_IX2(X,ir,i+2)*O2SCL_IX(Y,i+2);
1551  r+=O2SCL_IX2(X,ir,i+3)*O2SCL_IX(Y,i+3);
1552  }
1553 
1554  return r;
1555  }
1556 
1557  /** \brief Compute the norm of a subrow of a matrix
1558 
1559  Given the matrix \c X, define the vector \c x as the row with
1560  index \c ir. This function computes the norm of the part of \c x
1561  from column \c ic to column \c <tt>N-1</tt> (inclusive). All
1562  other elements in \c x are not referenced.
1563 
1564  \note The suffix "2" on the function name indicates that this
1565  computes the "2-norm", not that the norm is squared.
1566  */
1567  template<class mat_t>
1568  double dnrm2_subrow(const mat_t &M, const size_t ir, const size_t ic,
1569  const size_t N) {
1570 
1571  double scale=0.0;
1572  double ssq=1.0;
1573  size_t i;
1574 
1575  if (ic+1==N) {
1576  return fabs(O2SCL_IX2(M,ir,ic));
1577  }
1578 
1579  for (i=ic;i<N;i++) {
1580  const double x=O2SCL_IX2(M,ir,i);
1581 
1582  if (x != 0.0) {
1583  const double ax=fabs(x);
1584 
1585  if (scale<ax) {
1586  ssq=1.0+ssq*(scale/ax)*(scale/ax);
1587  scale=ax;
1588  } else {
1589  ssq+=(ax/scale)*(ax/scale);
1590  }
1591  }
1592 
1593  }
1594 
1595  return scale*sqrt(ssq);
1596  }
1597 
1598  /** \brief Compute \f$ x=\alpha x \f$ for a subrow of a matrix
1599 
1600  Given the matrix \c A, define the vector \c x as the row with
1601  index \c ir. This function computes \f$ x = \alpha x \f$ for
1602  elements in the vectors \c x from column \c ic to column \c
1603  <tt>N-1</tt> (inclusive). All other elements in \c x and
1604  \c y are not referenced.
1605 
1606  If <tt>ic</tt> is greater than <tt>N-1</tt> then the error
1607  handler will be called if \c O2SCL_NO_RANGE_CHECK is not
1608  defined.
1609  */
1610  template<class mat_t>
1611  void dscal_subrow(mat_t &A, const size_t ir, const size_t ic,
1612  const size_t N, const double alpha) {
1613 
1614 #if O2SCL_NO_RANGE_CHECK
1615 #else
1616  if (ic+1>N) {
1617  O2SCL_ERR("Invalid index in dscal_subrow().",o2scl::exc_einval);
1618  }
1619 #endif
1620 
1621  size_t i;
1622  const size_t m=(N-ic) % 4;
1623 
1624  for (i=ic;i<m+ic;i++) {
1625  O2SCL_IX2(A,ir,i)*=alpha;
1626  }
1627 
1628  for (;i+3<N;i+=4) {
1629  O2SCL_IX2(A,ir,i)*=alpha;
1630  O2SCL_IX2(A,ir,i+1)*=alpha;
1631  O2SCL_IX2(A,ir,i+2)*=alpha;
1632  O2SCL_IX2(A,ir,i+3)*=alpha;
1633  }
1634 
1635  return;
1636  }
1637  //@}
1638 
1639 #ifdef O2SCL_NEVER_DEFINED
1640  /// \name Helper Level-3 BLAS functions
1641  //@{
1642  /** \brief Compute \f$ y=\alpha \mathrm{op}(A) \mathrm{op}(B) +
1643  \beta C \f$ using only part of the matrices A, B, and C
1644 
1645  When both \c TransA and \c TransB are \c NoTrans, this function
1646  operates on the rows in \f$ [\mathrm{rstarta},M-1] \f$ and
1647  columns \f$ [\mathrm{cstarta,K-1] \f$ in matrix A and rows in
1648  \f$ [\mathrm{rstartb},K-1] \f$ and columns \f$
1649  [\mathrm{cstartb,N-1] \f$ in matrix B. The results are placed in
1650  rows \f$ [\mathrm{rstartc},M-1] \f$ and columns \f$
1651  [\mathrm{cstartc},N-1] \f$ .
1652 
1653  This function works for all values of \c Order, \c TransA, and
1654  \c TransB.
1655  */
1656  template<class mat_t>
1657  void dgemm_submat(const enum o2cblas_order Order,
1658  const enum o2cblas_transpose TransA,
1659  const enum o2cblas_transpose TransB, const size_t M,
1660  const size_t N, const size_t K, const double alpha,
1661  const mat_t &A, const mat_t &B,
1662  const double beta, mat_t &C, size_t rstarta,
1663  size_t cstarta, size_t rstartb, size_t cstartb,
1664  size_t rstartc, size_t cstartc) {
1665 
1666  size_t i, j, k;
1667  size_t n1, n2;
1668  int TransF, TransG;
1669 
1670  if (alpha == 0.0 && beta == 1.0) {
1671  return;
1672  }
1673 
1674  /*
1675  This is a little more complicated than the original in GSL,
1676  which assigned the matrices A and B to variables *F and *G which
1677  then allowed some code duplication. We can't really do that
1678  here, since we don't have that kind of type info on A and B, so
1679  we just handle the two cases separately.
1680  */
1681 
1682  if (Order == o2cblas_RowMajor) {
1683 
1684  n1=M;
1685  n2=N;
1686 
1687  /* form y := beta*y */
1688  if (beta == 0.0) {
1689  for (i=rstartc;i<n1;i++) {
1690  for (j=cstartc;j<n2;j++) {
1691  O2SCL_IX2(C,i,j)=0.0;
1692  }
1693  }
1694  } else if (beta != 1.0) {
1695  for (i=rstartc;i<n1;i++) {
1696  for (j=cstartc;j<n2;j++) {
1697  O2SCL_IX2(C,i,j)*=beta;
1698  }
1699  }
1700  }
1701 
1702  if (alpha == 0.0) {
1703  return;
1704  }
1705 
1706  TransF=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1707  TransG=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
1708 
1709  if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
1710 
1711  /* form C := alpha*A*B+C */
1712 
1713  for (k=cstarta;k<K;k++) {
1714  for (i=rstarta;i<n1;i++) {
1715  const double temp=alpha*O2SCL_IX2(A,i,k);
1716  if (temp != 0.0) {
1717  for (j=rstartc;j<n2;j++) {
1718  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
1719  }
1720  }
1721  }
1722  }
1723 
1724  } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
1725 
1726  /* form C := alpha*A*B'+C */
1727 
1728  for (i=mstart;i<n1;i++) {
1729  for (j=nstart;j<n2;j++) {
1730  double temp=0.0;
1731  for (k=kstart;k<K;k++) {
1732  temp+=O2SCL_IX2(A,i,k)*O2SCL_IX2(B,j,k);
1733  }
1734  O2SCL_IX2(C,i,j)+=alpha*temp;
1735  }
1736  }
1737 
1738  } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
1739 
1740  for (k=kstart;k<K;k++) {
1741  for (i=mstart;i<n1;i++) {
1742  const double temp=alpha*O2SCL_IX2(A,k,i);
1743  if (temp != 0.0) {
1744  for (j=nstart;j<n2;j++) {
1745  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(B,k,j);
1746  }
1747  }
1748  }
1749  }
1750 
1751  } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
1752 
1753  for (i=mstart;i<n1;i++) {
1754  for (j=nstart;j<n2;j++) {
1755  double temp=0.0;
1756  for (k=kstart;k<K;k++) {
1757  temp+=O2SCL_IX2(A,k,i)*O2SCL_IX2(B,j,k);
1758  }
1759  O2SCL_IX2(C,i,j)+=alpha*temp;
1760  }
1761  }
1762 
1763  } else {
1764  O2SCL_ERR("Unrecognized operation in dgemm_submat().",
1766  }
1767 
1768  } else {
1769 
1770  // Column-major case
1771 
1772  n1=N;
1773  n2=M;
1774 
1775  /* form y := beta*y */
1776  if (beta == 0.0) {
1777  for (i=nstart;i<n1;i++) {
1778  for (j=mstart;j<n2;j++) {
1779  O2SCL_IX2(C,i,j)=0.0;
1780  }
1781  }
1782  } else if (beta != 1.0) {
1783  for (i=nstart;i<n1;i++) {
1784  for (j=mstart;j<n2;j++) {
1785  O2SCL_IX2(C,i,j)*=beta;
1786  }
1787  }
1788  }
1789 
1790  if (alpha == 0.0) {
1791  return;
1792  }
1793 
1794  TransF=(TransB == o2cblas_ConjTrans) ? o2cblas_Trans : TransB;
1795  TransG=(TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1796 
1797  if (TransF == o2cblas_NoTrans && TransG == o2cblas_NoTrans) {
1798 
1799  /* form C := alpha*A*B+C */
1800 
1801  for (k=kstart;k<K;k++) {
1802  for (i=nstart;i<n1;i++) {
1803  const double temp=alpha*O2SCL_IX2(B,i,k);
1804  if (temp != 0.0) {
1805  for (j=mstart;j<n2;j++) {
1806  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
1807  }
1808  }
1809  }
1810  }
1811 
1812  } else if (TransF == o2cblas_NoTrans && TransG == o2cblas_Trans) {
1813 
1814  /* form C := alpha*A*B'+C */
1815 
1816  for (i=nstart;i<n1;i++) {
1817  for (j=mstart;j<n2;j++) {
1818  double temp=0.0;
1819  for (k=kstart;k<K;k++) {
1820  temp+=O2SCL_IX2(B,i,k)*O2SCL_IX2(A,j,k);
1821  }
1822  O2SCL_IX2(C,i,j)+=alpha*temp;
1823  }
1824  }
1825 
1826  } else if (TransF == o2cblas_Trans && TransG == o2cblas_NoTrans) {
1827 
1828  for (k=kstart;k<K;k++) {
1829  for (i=nstart;i<n1;i++) {
1830  const double temp=alpha*O2SCL_IX2(B,k,i);
1831  if (temp != 0.0) {
1832  for (j=mstart;j<n2;j++) {
1833  O2SCL_IX2(C,i,j)+=temp*O2SCL_IX2(A,k,j);
1834  }
1835  }
1836  }
1837  }
1838 
1839  } else if (TransF == o2cblas_Trans && TransG == o2cblas_Trans) {
1840 
1841  for (i=nstart;i<n1;i++) {
1842  for (j=mstart;j<n2;j++) {
1843  double temp=0.0;
1844  for (k=kstart;k<K;k++) {
1845  temp+=O2SCL_IX2(B,k,i)*O2SCL_IX2(A,j,k);
1846  }
1847  O2SCL_IX2(C,i,j)+=alpha*temp;
1848  }
1849  }
1850 
1851  } else {
1852  O2SCL_ERR("Unrecognized operation in dgemm_submat().",
1854  }
1855  }
1856 
1857  return;
1858  }
1859 
1860  /** \brief Compute \f$ B=\alpha \mathrm{op}[\mathrm{inv}(A)] B
1861  \f$ where $A$ is triangular using only part of the matrices
1862  A and B
1863 
1864  This function works for all values of \c Order, \c Side, \c Uplo,
1865  \c TransA, and \c Diag .
1866  */
1867  template<class mat_t>
1868  void dtrsm_submat(const enum o2cblas_order Order,
1869  const enum o2cblas_side Side,
1870  const enum o2cblas_uplo Uplo,
1871  const enum o2cblas_transpose TransA,
1872  const enum o2cblas_diag Diag,
1873  const size_t M, const size_t N, const double alpha,
1874  const mat_t &A, mat_t &B, size_t mstart, size_t nstart) {
1875 
1876  size_t i, j, k;
1877  size_t n1, n2;
1878  size_t istart, jstart;
1879 
1880  const int nonunit = (Diag == o2cblas_NonUnit);
1881  int side, uplo, trans;
1882 
1883  if (Order == o2cblas_RowMajor) {
1884  n1 = M;
1885  n2 = N;
1886  side = Side;
1887  uplo = Uplo;
1888  trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1889  istart=mstart;
1890  jstart=nstart;
1891  } else {
1892  n1 = N;
1893  n2 = M;
1894  side = (Side == o2cblas_Left) ? o2cblas_Right : o2cblas_Left;
1895  uplo = (Uplo == o2cblas_Upper) ? o2cblas_Lower : o2cblas_Upper;
1896  trans = (TransA == o2cblas_ConjTrans) ? o2cblas_Trans : TransA;
1897  istart=nstart;
1898  jstart=mstart;
1899  }
1900 
1901  if (side == o2cblas_Left && uplo == o2cblas_Upper &&
1902  trans == o2cblas_NoTrans) {
1903 
1904  /* form B := alpha * inv(TriU(A)) *B */
1905 
1906  if (alpha != 1.0) {
1907  for (i = istart; i < n1; i++) {
1908  for (j = jstart; j < n2; j++) {
1909  O2SCL_IX2(B,i,j)*=alpha;
1910  }
1911  }
1912  }
1913 
1914  for (i = n1; i > istart && i--;) {
1915  if (nonunit) {
1916  double Aii = O2SCL_IX2(A,i,i);
1917  for (j = jstart; j < n2; j++) {
1918  O2SCL_IX2(B,i,j)/=Aii;
1919  }
1920  }
1921 
1922  for (k = 0; k < i; k++) {
1923  const double Aki = O2SCL_IX2(A,k,i);
1924  for (j = jstart; j < n2; j++) {
1925  O2SCL_IX2(B,k,j)-=Aki*O2SCL_IX2(B,i,j);
1926  }
1927  }
1928  }
1929 
1930  } else if (side == o2cblas_Left && uplo == o2cblas_Upper &&
1931  trans == o2cblas_Trans) {
1932 
1933  /* form B := alpha * inv(TriU(A))' *B */
1934 
1935  if (alpha != 1.0) {
1936  for (i = istart; i < n1; i++) {
1937  for (j = jstart; j < n2; j++) {
1938  O2SCL_IX2(B,i,j) *= alpha;
1939  }
1940  }
1941  }
1942 
1943  for (i = istart; i < n1; i++) {
1944  if (nonunit) {
1945  double Aii = O2SCL_IX2(A,i,i);
1946  for (j = jstart; j < n2; j++) {
1947  O2SCL_IX2(B,i,j) /= Aii;
1948  }
1949  }
1950 
1951  for (k = i + 1; k < n1; k++) {
1952  const double Aik = O2SCL_IX2(A,i,k);
1953  for (j = jstart; j < n2; j++) {
1954  O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
1955  }
1956  }
1957  }
1958 
1959  } else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
1960  trans == o2cblas_NoTrans) {
1961 
1962  /* form B := alpha * inv(TriL(A))*B */
1963 
1964  if (alpha != 1.0) {
1965  for (i = istart; i < n1; i++) {
1966  for (j = jstart; j < n2; j++) {
1967  O2SCL_IX2(B,i,j) *= alpha;
1968  }
1969  }
1970  }
1971 
1972  for (i = istart; i < n1; i++) {
1973  if (nonunit) {
1974  double Aii = O2SCL_IX2(A,i,i);
1975  for (j = jstart; j < n2; j++) {
1976  O2SCL_IX2(B,i,j) /= Aii;
1977  }
1978  }
1979 
1980  for (k = i + 1; k < n1; k++) {
1981  const double Aki = O2SCL_IX2(A,k,i);
1982  for (j = jstart; j < n2; j++) {
1983  O2SCL_IX2(B,k,j) -= Aki * O2SCL_IX2(B,i,j);
1984  }
1985  }
1986  }
1987 
1988 
1989  } else if (side == o2cblas_Left && uplo == o2cblas_Lower &&
1990  trans == o2cblas_Trans) {
1991 
1992  /* form B := alpha * TriL(A)' *B */
1993 
1994  if (alpha != 1.0) {
1995  for (i = istart; i < n1; i++) {
1996  for (j = jstart; j < n2; j++) {
1997  O2SCL_IX2(B,i,j) *= alpha;
1998  }
1999  }
2000  }
2001 
2002  for (i = n1; i > istart && i--;) {
2003  if (nonunit) {
2004  double Aii = O2SCL_IX2(A,i,i);
2005  for (j = jstart; j < n2; j++) {
2006  O2SCL_IX2(B,i,j) /= Aii;
2007  }
2008  }
2009 
2010  for (k = 0; k < i; k++) {
2011  const double Aik = O2SCL_IX2(A,i,k);
2012  for (j = jstart; j < n2; j++) {
2013  O2SCL_IX2(B,k,j) -= Aik * O2SCL_IX2(B,i,j);
2014  }
2015  }
2016  }
2017 
2018  } else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
2019  trans == o2cblas_NoTrans) {
2020 
2021  /* form B := alpha * B * inv(TriU(A)) */
2022 
2023  if (alpha != 1.0) {
2024  for (i = istart; i < n1; i++) {
2025  for (j = jstart; j < n2; j++) {
2026  O2SCL_IX2(B,i,j) *= alpha;
2027  }
2028  }
2029  }
2030 
2031  for (i = istart; i < n1; i++) {
2032  for (j = jstart; j < n2; j++) {
2033  if (nonunit) {
2034  double Ajj = O2SCL_IX2(A,j,j);
2035  O2SCL_IX2(B,i,j) /= Ajj;
2036  }
2037 
2038  {
2039  double Bij = O2SCL_IX2(B,i,j);
2040  for (k = j + 1; k < n2; k++) {
2041  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
2042  }
2043  }
2044  }
2045  }
2046 
2047  } else if (side == o2cblas_Right && uplo == o2cblas_Upper &&
2048  trans == o2cblas_Trans) {
2049 
2050  /* form B := alpha * B * inv(TriU(A))' */
2051 
2052  if (alpha != 1.0) {
2053  for (i = istart; i < n1; i++) {
2054  for (j = jstart; j < n2; j++) {
2055  O2SCL_IX2(B,i,j) *= alpha;
2056  }
2057  }
2058  }
2059 
2060  for (i = istart; i < n1; i++) {
2061  for (j = n2; j > jstart && j--;) {
2062 
2063  if (nonunit) {
2064  double Ajj = O2SCL_IX2(A,j,j);
2065  O2SCL_IX2(B,i,j) /= Ajj;
2066  }
2067 
2068  {
2069  double Bij = O2SCL_IX2(B,i,j);
2070  for (k = 0; k < j; k++) {
2071  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
2072  }
2073  }
2074  }
2075  }
2076 
2077  } else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
2078  trans == o2cblas_NoTrans) {
2079 
2080  /* form B := alpha * B * inv(TriL(A)) */
2081 
2082  if (alpha != 1.0) {
2083  for (i = istart; i < n1; i++) {
2084  for (j = jstart; j < n2; j++) {
2085  O2SCL_IX2(B,i,j) *= alpha;
2086  }
2087  }
2088  }
2089 
2090  for (i = istart; i < n1; i++) {
2091  for (j = n2; j > jstart && j--;) {
2092 
2093  if (nonunit) {
2094  double Ajj = O2SCL_IX2(A,j,j);
2095  O2SCL_IX2(B,i,j) /= Ajj;
2096  }
2097 
2098  {
2099  double Bij = O2SCL_IX2(B,i,j);
2100  for (k = 0; k < j; k++) {
2101  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,j,k) * Bij;
2102  }
2103  }
2104  }
2105  }
2106 
2107  } else if (side == o2cblas_Right && uplo == o2cblas_Lower &&
2108  trans == o2cblas_Trans) {
2109 
2110  /* form B := alpha * B * inv(TriL(A))' */
2111 
2112 
2113  if (alpha != 1.0) {
2114  for (i = istart; i < n1; i++) {
2115  for (j = jstart; j < n2; j++) {
2116  O2SCL_IX2(B,i,j) *= alpha;
2117  }
2118  }
2119  }
2120 
2121  for (i = istart; i < n1; i++) {
2122  for (j = jstart; j < n2; j++) {
2123  if (nonunit) {
2124  double Ajj = O2SCL_IX2(A,j,j);
2125  O2SCL_IX2(B,i,j) /= Ajj;
2126  }
2127 
2128  {
2129  double Bij = O2SCL_IX2(B,i,j);
2130  for (k = j + 1; k < n2; k++) {
2131  O2SCL_IX2(B,i,k) -= O2SCL_IX2(A,k,j) * Bij;
2132  }
2133  }
2134  }
2135  }
2136 
2137  } else {
2138  O2SCL_ERR("Bad operation in dtrsm().",o2scl::exc_einval);
2139  }
2140 
2141  return;
2142  }
2143  //@}
2144 
2145 #endif
2146 
2147 #ifdef DOXYGEN
2148 }
2149 #endif
2150 
o2scl_cblas::dasum_subcol
double dasum_subcol(mat_t &A, const size_t ir, const size_t ic, const size_t M)
Compute for a subcolumn of a matrix.
Definition: cblas_base.h:1437
o2scl_cblas::dgemv
void dgemv(const enum o2cblas_order order, const enum o2cblas_transpose TransA, const size_t M, const size_t N, const double alpha, const mat_t &A, const vec_t &X, const double beta, vec2_t &Y)
Compute .
Definition: cblas_base.h:245
o2scl_cblas::matrix_is_finite
bool matrix_is_finite(size_t m, size_t n, mat_t &data)
Test if the first n elements of a matrix are finite.
Definition: cblas_base.h:85
o2scl_cblas::daxpy
void daxpy(const double alpha, const size_t N, const vec_t &X, vec2_t &Y)
Compute .
Definition: cblas_base.h:125
o2scl_cblas::dnrm2_subrow
double dnrm2_subrow(const mat_t &M, const size_t ir, const size_t ic, const size_t N)
Compute the norm of a subrow of a matrix.
Definition: cblas_base.h:1568
o2scl_cblas::ddot_subvec
double ddot_subvec(const size_t N, const vec_t &X, const vec2_t &Y, const size_t ie)
Compute beginning with index ie and ending with index N-1.
Definition: cblas_base.h:1140
o2scl_cblas::dnrm2_subcol
double dnrm2_subcol(const mat_t &A, const size_t ir, const size_t ic, const size_t M)
Compute the norm of a subcolumn of a matrix.
Definition: cblas_base.h:1350
o2scl_cblas::dscal_subcol
void dscal_subcol(mat_t &A, const size_t ir, const size_t ic, const size_t M, const double alpha)
Compute for a subcolumn of a matrix.
Definition: cblas_base.h:1399
o2scl_cblas::ddot
double ddot(const size_t N, const vec_t &X, const vec2_t &Y)
Compute .
Definition: cblas_base.h:150
o2scl_cblas
Namespace for O2scl CBLAS function templates.
Definition: cblas.h:144
o2scl_cblas::dtrmv
void dtrmv(const enum o2cblas_order Order, const enum o2cblas_uplo Uplo, const enum o2cblas_transpose TransA, const enum o2cblas_diag Diag, const size_t N, const mat_t &A, vec_t &x)
Compute for the triangular matrix A.
Definition: cblas_base.h:477
o2scl_cblas::dscal
void dscal(const double alpha, const size_t N, vec_t &X)
Compute .
Definition: cblas_base.h:217
o2scl_cblas::dnrm2_subvec
double dnrm2_subvec(const size_t N, const vec_t &X, const size_t ie)
Compute the norm of the vector X beginning with index ie and ending with index N-1.
Definition: cblas_base.h:1181
o2scl_cblas::dgemm
void dgemm(const enum o2cblas_order Order, const enum o2cblas_transpose TransA, const enum o2cblas_transpose TransB, const size_t M, const size_t N, const size_t K, const double alpha, const mat_t &A, const mat_t &B, const double beta, mat_t &C)
Compute .
Definition: cblas_base.h:604
o2scl_cblas::daxpy_subrow
void daxpy_subrow(const double alpha, const size_t N, const mat_t &X, const size_t ir, const size_t ic, vec_t &Y)
Compute for a subrow of a matrix.
Definition: cblas_base.h:1483
o2scl_cblas::dtrsm
void dtrsm(const enum o2cblas_order Order, const enum o2cblas_side Side, const enum o2cblas_uplo Uplo, const enum o2cblas_transpose TransA, const enum o2cblas_diag Diag, const size_t M, const size_t N, const double alpha, const mat_t &A, mat_t &B)
Compute where $A$ is triangular.
Definition: cblas_base.h:815
o2scl_cblas::ddot_subcol
double ddot_subcol(const size_t M, const mat_t &X, const size_t ir, const size_t ic, const vec_t &y)
Compute for a subcolumn of a matrix.
Definition: cblas_base.h:1307
o2scl::exc_einval
@ exc_einval
invalid argument supplied by user
Definition: err_hnd.h:59
o2scl_cblas::o2cblas_side
o2cblas_side
Left or right sided operation.
Definition: cblas_base.h:75
o2scl_cblas::dasum
double dasum(const size_t N, const vec_t &X)
Compute the absolute sum of vector elements.
Definition: cblas_base.h:111
o2scl_cblas::daxpy_subcol
void daxpy_subcol(const double alpha, const size_t M, const mat_t &X, const size_t ir, const size_t ic, vec_t &y)
Compute for a subcolumn of a matrix.
Definition: cblas_base.h:1265
o2scl_cblas::dtrsv
void dtrsv(const enum o2cblas_order order, const enum o2cblas_uplo Uplo, const enum o2cblas_transpose TransA, const enum o2cblas_diag Diag, const size_t M, const size_t N, const mat_t &A, vec_t &X)
Compute .
Definition: cblas_base.h:335
o2scl_cblas::dscal_subrow
void dscal_subrow(mat_t &A, const size_t ir, const size_t ic, const size_t N, const double alpha)
Compute for a subrow of a matrix.
Definition: cblas_base.h:1611
o2scl_cblas::o2cblas_order
o2cblas_order
Matrix order, either column-major or row-major.
Definition: cblas_base.h:62
o2scl_cblas::ddot_subrow
double ddot_subrow(const size_t N, const mat_t &X, const size_t ir, const size_t ic, const vec_t &Y)
Compute for a subrow of a matrix.
Definition: cblas_base.h:1529
o2scl_cblas::dscal_subvec
void dscal_subvec(const double alpha, const size_t N, vec_t &X, const size_t ie)
Compute beginning with index ie and ending with index N-1.
Definition: cblas_base.h:1226
O2SCL_ERR
#define O2SCL_ERR(d, n)
Set an error with message d and code n.
Definition: err_hnd.h:273
o2scl_cblas::daxpy_subvec
void daxpy_subvec(const double alpha, const size_t N, const vec_t &X, vec2_t &Y, const size_t ie)
Compute beginning with index ie and ending with index N-1.
Definition: cblas_base.h:1103
o2scl_cblas::o2cblas_transpose
o2cblas_transpose
Transpose operations.
Definition: cblas_base.h:65
o2scl_cblas::o2cblas_diag
o2cblas_diag
Unit or generic diagonal.
Definition: cblas_base.h:72
o2scl_cblas::dnrm2
double dnrm2(const size_t N, const vec_t &X)
Compute the norm of the vector X.
Definition: cblas_base.h:183
o2scl_cblas::o2cblas_uplo
o2cblas_uplo
Upper- or lower-triangular.
Definition: cblas_base.h:69

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).