1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_GEMV_TCC
34 #define CXXBLAS_LEVEL2_GEMV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
42 typename BETA, typename VY>
43 void
44 gemv_generic(StorageOrder order, Transpose transA, Transpose conjX,
45 IndexType m, IndexType n,
46 const ALPHA &alpha,
47 const MA *A, IndexType ldA,
48 const VX *x, IndexType incX,
49 const BETA &beta,
50 VY *y, IndexType incY)
51 {
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 gemv_generic(RowMajor, transA, conjX, n, m, alpha, A, ldA,
55 x, incX, beta, y, incY);
56 return;
57 }
58 VX *_x = 0;
59
60 if ((transA==NoTrans) || (transA==Conj)) {
61 if (incX<0) {
62 x -= incX*(n-1);
63 }
64 if (incY<0) {
65 y -= incY*(m-1);
66 }
67 if (conjX==Conj) {
68 _x = new VX[n];
69 for (IndexType j=0, jX=0; j<n; ++j, jX+=incX) {
70 _x[j] = conjugate(x[jX]);
71 }
72 x = _x;
73 incX = IndexType(1);
74 }
75
76 scal_generic(m, beta, y, incY);
77 if (transA==Conj) {
78 for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
79 VY _y;
80 dot_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
81 y[iY] += alpha*_y;
82 }
83 } else {
84 for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
85 VY _y;
86 dotu_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
87 y[iY] += alpha*_y;
88 }
89 }
90 } else {
91 if (incX<0) {
92 x -= incX*(m-1);
93 }
94 if (incY<0) {
95 y -= incY*(n-1);
96 }
97 if (conjX==Conj) {
98 _x = new VX[m];
99 for (IndexType j=0, jX=0; j<m; ++j, jX+=incX) {
100 _x[j] = conjugate(x[jX]);
101 }
102 x = _x;
103 incX = IndexType(1);
104 }
105
106 scal_generic(n, beta, y, incY);
107 if (transA==ConjTrans) {
108 for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
109 VY _y;
110 dot_generic(m, A+i, ldA, x, incX, _y);
111 y[iY] += alpha*_y;
112 }
113 } else {
114 for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
115 VY _y;
116 dotu_generic(m, A+i, ldA, x, incX, _y);
117 y[iY] += alpha*_y;
118 }
119 }
120 }
121 if (conjX==Conj) {
122 delete [] _x;
123 }
124 }
125
126 //------------------------------------------------------------------------------
127
128 template <typename IndexType, typename ALPHA, typename MA, typename VX,
129 typename BETA, typename VY>
130 void
131 gemv(StorageOrder order, Transpose trans,
132 IndexType m, IndexType n,
133 const ALPHA &alpha,
134 const MA *A, IndexType ldA,
135 const VX *x, IndexType incX,
136 const BETA &beta,
137 VY *y, IndexType incY)
138 {
139 CXXBLAS_DEBUG_OUT("gemv_generic");
140
141 if ((m==0) || (n==0)) {
142 return;
143 }
144 gemv_generic(order, trans, NoTrans, m, n,
145 alpha, A, ldA, x, incX,
146 beta, y, incY);
147 }
148
149 #ifdef HAVE_CBLAS
150
151 // sgemv
152 template <typename IndexType>
153 typename If<IndexType>::isBlasCompatibleInteger
154 gemv(StorageOrder order, Transpose trans,
155 IndexType m, IndexType n,
156 float alpha,
157 const float *A, IndexType ldA,
158 const float *x, IndexType incX,
159 float beta,
160 float *y, IndexType incY)
161 {
162 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_sgemv");
163
164 cblas_sgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
165 m, n,
166 alpha,
167 A, ldA,
168 x, incX,
169 beta,
170 y, incY);
171 }
172
173 // dgemv
174 template <typename IndexType>
175 typename If<IndexType>::isBlasCompatibleInteger
176 gemv(StorageOrder order, Transpose trans,
177 IndexType m, IndexType n,
178 double alpha,
179 const double *A, IndexType ldA,
180 const double *x, IndexType incX,
181 double beta,
182 double *y, IndexType incY)
183 {
184 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dgemv");
185
186 cblas_dgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
187 m, n,
188 alpha,
189 A, ldA,
190 x, incX,
191 beta,
192 y, incY);
193 }
194
195 // cgemv
196 template <typename IndexType>
197 typename If<IndexType>::isBlasCompatibleInteger
198 gemv(StorageOrder order, Transpose trans,
199 IndexType m, IndexType n,
200 ComplexFloat &alpha,
201 const ComplexFloat *A, IndexType ldA,
202 const ComplexFloat *x, IndexType incX,
203 ComplexFloat &beta,
204 ComplexFloat *y, IndexType incY)
205 {
206 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cgemv");
207
208 cblas_cgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
209 m, n,
210 reinterpret_cast<const float *>(&alpha),
211 reinterpret_cast<const float *>(A), ldA,
212 reinterpret_cast<const float *>(x), incX,
213 reinterpret_cast<const float *>(&beta),
214 reinterpret_cast<const float *>(y), incY);
215 }
216
217 // zgemv
218 template <typename IndexType>
219 typename If<IndexType>::isBlasCompatibleInteger
220 gemv(StorageOrder order, Transpose trans,
221 IndexType m, IndexType n,
222 ComplexDouble &alpha,
223 const ComplexDouble *A, IndexType ldA,
224 const ComplexDouble *x, IndexType incX,
225 ComplexDouble &beta,
226 ComplexDouble *y, IndexType incY)
227 {
228 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zgemv");
229
230 cblas_zgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
231 m, n,
232 reinterpret_cast<const double *>(&alpha),
233 reinterpret_cast<const double *>(A), ldA,
234 reinterpret_cast<const double *>(x), incX,
235 reinterpret_cast<const double *>(&beta),
236 reinterpret_cast<const double *>(y), incY);
237 }
238
239 #endif // HAVE_CBLAS
240
241 } // namespace cxxblas
242
243 #endif // CXXBLAS_LEVEL2_GEMV_TCC
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_GEMV_TCC
34 #define CXXBLAS_LEVEL2_GEMV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
42 typename BETA, typename VY>
43 void
44 gemv_generic(StorageOrder order, Transpose transA, Transpose conjX,
45 IndexType m, IndexType n,
46 const ALPHA &alpha,
47 const MA *A, IndexType ldA,
48 const VX *x, IndexType incX,
49 const BETA &beta,
50 VY *y, IndexType incY)
51 {
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 gemv_generic(RowMajor, transA, conjX, n, m, alpha, A, ldA,
55 x, incX, beta, y, incY);
56 return;
57 }
58 VX *_x = 0;
59
60 if ((transA==NoTrans) || (transA==Conj)) {
61 if (incX<0) {
62 x -= incX*(n-1);
63 }
64 if (incY<0) {
65 y -= incY*(m-1);
66 }
67 if (conjX==Conj) {
68 _x = new VX[n];
69 for (IndexType j=0, jX=0; j<n; ++j, jX+=incX) {
70 _x[j] = conjugate(x[jX]);
71 }
72 x = _x;
73 incX = IndexType(1);
74 }
75
76 scal_generic(m, beta, y, incY);
77 if (transA==Conj) {
78 for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
79 VY _y;
80 dot_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
81 y[iY] += alpha*_y;
82 }
83 } else {
84 for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
85 VY _y;
86 dotu_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
87 y[iY] += alpha*_y;
88 }
89 }
90 } else {
91 if (incX<0) {
92 x -= incX*(m-1);
93 }
94 if (incY<0) {
95 y -= incY*(n-1);
96 }
97 if (conjX==Conj) {
98 _x = new VX[m];
99 for (IndexType j=0, jX=0; j<m; ++j, jX+=incX) {
100 _x[j] = conjugate(x[jX]);
101 }
102 x = _x;
103 incX = IndexType(1);
104 }
105
106 scal_generic(n, beta, y, incY);
107 if (transA==ConjTrans) {
108 for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
109 VY _y;
110 dot_generic(m, A+i, ldA, x, incX, _y);
111 y[iY] += alpha*_y;
112 }
113 } else {
114 for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
115 VY _y;
116 dotu_generic(m, A+i, ldA, x, incX, _y);
117 y[iY] += alpha*_y;
118 }
119 }
120 }
121 if (conjX==Conj) {
122 delete [] _x;
123 }
124 }
125
126 //------------------------------------------------------------------------------
127
128 template <typename IndexType, typename ALPHA, typename MA, typename VX,
129 typename BETA, typename VY>
130 void
131 gemv(StorageOrder order, Transpose trans,
132 IndexType m, IndexType n,
133 const ALPHA &alpha,
134 const MA *A, IndexType ldA,
135 const VX *x, IndexType incX,
136 const BETA &beta,
137 VY *y, IndexType incY)
138 {
139 CXXBLAS_DEBUG_OUT("gemv_generic");
140
141 if ((m==0) || (n==0)) {
142 return;
143 }
144 gemv_generic(order, trans, NoTrans, m, n,
145 alpha, A, ldA, x, incX,
146 beta, y, incY);
147 }
148
149 #ifdef HAVE_CBLAS
150
151 // sgemv
152 template <typename IndexType>
153 typename If<IndexType>::isBlasCompatibleInteger
154 gemv(StorageOrder order, Transpose trans,
155 IndexType m, IndexType n,
156 float alpha,
157 const float *A, IndexType ldA,
158 const float *x, IndexType incX,
159 float beta,
160 float *y, IndexType incY)
161 {
162 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_sgemv");
163
164 cblas_sgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
165 m, n,
166 alpha,
167 A, ldA,
168 x, incX,
169 beta,
170 y, incY);
171 }
172
173 // dgemv
174 template <typename IndexType>
175 typename If<IndexType>::isBlasCompatibleInteger
176 gemv(StorageOrder order, Transpose trans,
177 IndexType m, IndexType n,
178 double alpha,
179 const double *A, IndexType ldA,
180 const double *x, IndexType incX,
181 double beta,
182 double *y, IndexType incY)
183 {
184 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dgemv");
185
186 cblas_dgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
187 m, n,
188 alpha,
189 A, ldA,
190 x, incX,
191 beta,
192 y, incY);
193 }
194
195 // cgemv
196 template <typename IndexType>
197 typename If<IndexType>::isBlasCompatibleInteger
198 gemv(StorageOrder order, Transpose trans,
199 IndexType m, IndexType n,
200 ComplexFloat &alpha,
201 const ComplexFloat *A, IndexType ldA,
202 const ComplexFloat *x, IndexType incX,
203 ComplexFloat &beta,
204 ComplexFloat *y, IndexType incY)
205 {
206 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cgemv");
207
208 cblas_cgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
209 m, n,
210 reinterpret_cast<const float *>(&alpha),
211 reinterpret_cast<const float *>(A), ldA,
212 reinterpret_cast<const float *>(x), incX,
213 reinterpret_cast<const float *>(&beta),
214 reinterpret_cast<const float *>(y), incY);
215 }
216
217 // zgemv
218 template <typename IndexType>
219 typename If<IndexType>::isBlasCompatibleInteger
220 gemv(StorageOrder order, Transpose trans,
221 IndexType m, IndexType n,
222 ComplexDouble &alpha,
223 const ComplexDouble *A, IndexType ldA,
224 const ComplexDouble *x, IndexType incX,
225 ComplexDouble &beta,
226 ComplexDouble *y, IndexType incY)
227 {
228 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zgemv");
229
230 cblas_zgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
231 m, n,
232 reinterpret_cast<const double *>(&alpha),
233 reinterpret_cast<const double *>(A), ldA,
234 reinterpret_cast<const double *>(x), incX,
235 reinterpret_cast<const double *>(&beta),
236 reinterpret_cast<const double *>(y), incY);
237 }
238
239 #endif // HAVE_CBLAS
240
241 } // namespace cxxblas
242
243 #endif // CXXBLAS_LEVEL2_GEMV_TCC