1 /*
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_HEMV_TCC
34 #define CXXBLAS_LEVEL2_HEMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename ALPHA, typename MA, typename VX,
39 typename BETA, typename VY>
40 void
41 hemv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
42 IndexType n,
43 const ALPHA &alpha,
44 const MA *A, IndexType ldA,
45 const VX *x, IndexType incX,
46 const BETA &beta,
47 VY *y, IndexType incY)
48 {
49 if (order==ColMajor) {
50 upLo = (upLo==Upper) ? Lower : Upper;
51 conjugateA = Transpose(conjugateA^Conj);
52 }
53 scal_generic(n, beta, y, incY);
54 if (upLo==Upper) {
55 if (conjugateA==Conj) {
56 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
57 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
58
59 VY _y = VY(0);
60 dot_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
61 x+iX+incX, incX, _y);
62 y[iY] += alpha*_y;
63
64 axpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
65 y+iY+incY, incY);
66 }
67 } else {
68 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
69 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
70
71 VY _y = VY(0);
72 dotu_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
73 x+iX+incX, incX, _y);
74 y[iY] += alpha*_y;
75
76 acxpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
77 y+iY+incY, incY);
78 }
79 }
80 } else {
81 if (conjugateA==Conj) {
82 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
83 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
84
85 VY _y = VY(0);
86 dot_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
87 y[iY] += alpha*_y;
88
89 axpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
90 }
91 } else {
92 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
93 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
94
95 VY _y = VY(0);
96 dotu_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
97 y[iY] += alpha*_y;
98
99 acxpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
100 }
101 }
102 }
103 }
104
105 //------------------------------------------------------------------------------
106
107 template <typename IndexType, typename ALPHA, typename MA, typename VX,
108 typename BETA, typename VY>
109 void
110 hemv(StorageOrder order, StorageUpLo upLo,
111 IndexType n,
112 const ALPHA &alpha,
113 const MA *A, IndexType ldA,
114 const VX *x, IndexType incX,
115 const BETA &beta,
116 VY *y, IndexType incY)
117 {
118 CXXBLAS_DEBUG_OUT("hemv_generic");
119
120 if (incX<0) {
121 x -= incX*(n-1);
122 }
123 if (incY<0) {
124 y -= incY*(n-1);
125 }
126 hemv_generic(order, upLo, NoTrans, n,
127 alpha, A, ldA, x, incX,
128 beta, y, incY);
129 }
130
131 #ifdef HAVE_CBLAS
132
133 // chemv
134 template <typename IndexType>
135 typename If<IndexType>::isBlasCompatibleInteger
136 hemv(StorageOrder order, StorageUpLo upLo,
137 IndexType n, ComplexDouble &alpha,
138 const ComplexFloat *A, IndexType ldA,
139 const ComplexFloat *x, IndexType incX,
140 ComplexDouble &beta,
141 ComplexFloat *y, IndexType incY)
142 {
143 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemv");
144
145 cblas_chemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
146 reinterpret_cast<const float *>(&alpha),
147 reinterpret_cast<const float *>(A), ldA,
148 reinterpret_cast<const float *>(x), incX,
149 reinterpret_cast<const float *>(&beta),
150 reinterpret_cast<const float *>(y), incY);
151 }
152
153 // zhemv
154 template <typename IndexType>
155 typename If<IndexType>::isBlasCompatibleInteger
156 hemv(StorageOrder order, StorageUpLo upLo,
157 IndexType n, ComplexDouble &alpha,
158 const ComplexDouble *A, IndexType ldA,
159 const ComplexDouble *x, IndexType incX,
160 ComplexDouble &beta,
161 ComplexDouble *y, IndexType incY)
162 {
163 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemv");
164
165 cblas_zhemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
166 reinterpret_cast<const double *>(&alpha),
167 reinterpret_cast<const double *>(A), ldA,
168 reinterpret_cast<const double *>(x), incX,
169 reinterpret_cast<const double *>(&beta),
170 reinterpret_cast<const double *>(y), incY);
171 }
172
173 #endif // HAVE_CBLAS
174
175 } // namespace cxxblas
176
177 #endif // CXXBLAS_LEVEL2_HEMV_TCC
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_HEMV_TCC
34 #define CXXBLAS_LEVEL2_HEMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename ALPHA, typename MA, typename VX,
39 typename BETA, typename VY>
40 void
41 hemv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
42 IndexType n,
43 const ALPHA &alpha,
44 const MA *A, IndexType ldA,
45 const VX *x, IndexType incX,
46 const BETA &beta,
47 VY *y, IndexType incY)
48 {
49 if (order==ColMajor) {
50 upLo = (upLo==Upper) ? Lower : Upper;
51 conjugateA = Transpose(conjugateA^Conj);
52 }
53 scal_generic(n, beta, y, incY);
54 if (upLo==Upper) {
55 if (conjugateA==Conj) {
56 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
57 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
58
59 VY _y = VY(0);
60 dot_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
61 x+iX+incX, incX, _y);
62 y[iY] += alpha*_y;
63
64 axpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
65 y+iY+incY, incY);
66 }
67 } else {
68 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
69 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
70
71 VY _y = VY(0);
72 dotu_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
73 x+iX+incX, incX, _y);
74 y[iY] += alpha*_y;
75
76 acxpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
77 y+iY+incY, incY);
78 }
79 }
80 } else {
81 if (conjugateA==Conj) {
82 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
83 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
84
85 VY _y = VY(0);
86 dot_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
87 y[iY] += alpha*_y;
88
89 axpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
90 }
91 } else {
92 for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
93 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
94
95 VY _y = VY(0);
96 dotu_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
97 y[iY] += alpha*_y;
98
99 acxpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
100 }
101 }
102 }
103 }
104
105 //------------------------------------------------------------------------------
106
107 template <typename IndexType, typename ALPHA, typename MA, typename VX,
108 typename BETA, typename VY>
109 void
110 hemv(StorageOrder order, StorageUpLo upLo,
111 IndexType n,
112 const ALPHA &alpha,
113 const MA *A, IndexType ldA,
114 const VX *x, IndexType incX,
115 const BETA &beta,
116 VY *y, IndexType incY)
117 {
118 CXXBLAS_DEBUG_OUT("hemv_generic");
119
120 if (incX<0) {
121 x -= incX*(n-1);
122 }
123 if (incY<0) {
124 y -= incY*(n-1);
125 }
126 hemv_generic(order, upLo, NoTrans, n,
127 alpha, A, ldA, x, incX,
128 beta, y, incY);
129 }
130
131 #ifdef HAVE_CBLAS
132
133 // chemv
134 template <typename IndexType>
135 typename If<IndexType>::isBlasCompatibleInteger
136 hemv(StorageOrder order, StorageUpLo upLo,
137 IndexType n, ComplexDouble &alpha,
138 const ComplexFloat *A, IndexType ldA,
139 const ComplexFloat *x, IndexType incX,
140 ComplexDouble &beta,
141 ComplexFloat *y, IndexType incY)
142 {
143 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemv");
144
145 cblas_chemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
146 reinterpret_cast<const float *>(&alpha),
147 reinterpret_cast<const float *>(A), ldA,
148 reinterpret_cast<const float *>(x), incX,
149 reinterpret_cast<const float *>(&beta),
150 reinterpret_cast<const float *>(y), incY);
151 }
152
153 // zhemv
154 template <typename IndexType>
155 typename If<IndexType>::isBlasCompatibleInteger
156 hemv(StorageOrder order, StorageUpLo upLo,
157 IndexType n, ComplexDouble &alpha,
158 const ComplexDouble *A, IndexType ldA,
159 const ComplexDouble *x, IndexType incX,
160 ComplexDouble &beta,
161 ComplexDouble *y, IndexType incY)
162 {
163 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemv");
164
165 cblas_zhemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
166 reinterpret_cast<const double *>(&alpha),
167 reinterpret_cast<const double *>(A), ldA,
168 reinterpret_cast<const double *>(x), incX,
169 reinterpret_cast<const double *>(&beta),
170 reinterpret_cast<const double *>(y), incY);
171 }
172
173 #endif // HAVE_CBLAS
174
175 } // namespace cxxblas
176
177 #endif // CXXBLAS_LEVEL2_HEMV_TCC