1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_GEMV_TCC
 34 #define CXXBLAS_LEVEL2_GEMV_TCC 1
 35 
 36 #include <complex>
 37 #include <cxxblas/level1/level1.h>
 38 
 39 namespace cxxblas {
 40 
 41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
 42           typename BETA, typename VY>
 43 void
 44 gemv_generic(StorageOrder order, Transpose transA, Transpose conjX,
 45              IndexType m, IndexType n,
 46              const ALPHA &alpha,
 47              const MA *A, IndexType ldA,
 48              const VX *x, IndexType incX,
 49              const BETA &beta,
 50              VY *y, IndexType incY)
 51 {
 52     if (order==ColMajor) {
 53         transA = Transpose(transA^Trans);
 54         gemv_generic(RowMajor, transA, conjX, n, m, alpha, A, ldA,
 55                      x, incX, beta, y, incY);
 56         return;
 57     }
 58     VX *_x = 0;
 59 
 60     if ((transA==NoTrans) || (transA==Conj)) {
 61         if (incX<0) {
 62             x -= incX*(n-1);
 63         }
 64         if (incY<0) {
 65             y -= incY*(m-1);
 66         }
 67         if (conjX==Conj) {
 68             _x = new VX[n];
 69             for (IndexType j=0, jX=0; j<n; ++j, jX+=incX) {
 70                 _x[j] = conjugate(x[jX]);
 71             }
 72             x = _x;
 73             incX = IndexType(1);
 74         }
 75 
 76         scal_generic(m, beta, y, incY);
 77         if (transA==Conj) {
 78             for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
 79                 VY _y;
 80                 dot_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
 81                 y[iY] += alpha*_y;
 82             }
 83         } else {
 84             for (IndexType i=0, iY=0; i<m; ++i, iY+=incY) {
 85                 VY _y;
 86                 dotu_generic(n, A+i*ldA, IndexType(1), x, incX, _y);
 87                 y[iY] += alpha*_y;
 88             }
 89         }
 90     } else {
 91         if (incX<0) {
 92             x -= incX*(m-1);
 93         }
 94         if (incY<0) {
 95             y -= incY*(n-1);
 96         }
 97         if (conjX==Conj) {
 98             _x = new VX[m];
 99             for (IndexType j=0, jX=0; j<m; ++j, jX+=incX) {
100                 _x[j] = conjugate(x[jX]);
101             }
102             x = _x;
103             incX = IndexType(1);
104         }
105 
106         scal_generic(n, beta, y, incY);
107         if (transA==ConjTrans) {
108             for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
109                 VY _y;
110                 dot_generic(m, A+i, ldA, x, incX, _y);
111                 y[iY] += alpha*_y;
112             }
113         } else {
114             for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
115                 VY _y;
116                 dotu_generic(m, A+i, ldA, x, incX, _y);
117                 y[iY] += alpha*_y;
118             }
119         }
120     }
121     if (conjX==Conj) {
122         delete [] _x;
123     }
124 }
125 
126 //------------------------------------------------------------------------------
127 
128 template <typename IndexType, typename ALPHA, typename MA, typename VX,
129           typename BETA, typename VY>
130 void
131 gemv(StorageOrder order, Transpose trans,
132      IndexType m, IndexType n,
133      const ALPHA &alpha,
134      const MA *A, IndexType ldA,
135      const VX *x, IndexType incX,
136      const BETA &beta,
137      VY *y, IndexType incY)
138 {
139     CXXBLAS_DEBUG_OUT("gemv_generic");
140 
141     if ((m==0) || (n==0)) {
142         return;
143     }
144     gemv_generic(order, trans, NoTrans, m, n,
145                  alpha, A, ldA, x, incX,
146                  beta, y, incY);
147 }
148 
149 #ifdef HAVE_CBLAS
150 
151 // sgemv
152 template <typename IndexType>
153 typename If<IndexType>::isBlasCompatibleInteger
154 gemv(StorageOrder order, Transpose trans,
155      IndexType m, IndexType n,
156      float alpha,
157      const float *A, IndexType ldA,
158      const float *x, IndexType incX,
159      float beta,
160      float *y, IndexType incY)
161 {
162     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_sgemv");
163 
164     cblas_sgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
165                 m,  n,
166                 alpha,
167                 A, ldA,
168                 x, incX,
169                 beta,
170                 y, incY);
171 }
172 
173 // dgemv
174 template <typename IndexType>
175 typename If<IndexType>::isBlasCompatibleInteger
176 gemv(StorageOrder order, Transpose trans,
177      IndexType m, IndexType n,
178      double alpha,
179      const double *A, IndexType ldA,
180      const double *x, IndexType incX,
181      double beta,
182      double *y, IndexType incY)
183 {
184     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dgemv");
185 
186     cblas_dgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
187                 m,  n,
188                 alpha,
189                 A, ldA,
190                 x, incX,
191                 beta,
192                 y, incY);
193 }
194 
195 // cgemv
196 template <typename IndexType>
197 typename If<IndexType>::isBlasCompatibleInteger
198 gemv(StorageOrder order, Transpose trans,
199      IndexType m, IndexType n,
200      ComplexFloat &alpha,
201      const ComplexFloat *A, IndexType ldA,
202      const ComplexFloat *x, IndexType incX,
203      ComplexFloat &beta,
204      ComplexFloat *y, IndexType incY)
205 {
206     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cgemv");
207 
208     cblas_cgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
209                 m,  n,
210                 reinterpret_cast<const float *>(&alpha),
211                 reinterpret_cast<const float *>(A), ldA,
212                 reinterpret_cast<const float *>(x), incX,
213                 reinterpret_cast<const float *>(&beta),
214                 reinterpret_cast<const float *>(y), incY);
215 }
216 
217 // zgemv
218 template <typename IndexType>
219 typename If<IndexType>::isBlasCompatibleInteger
220 gemv(StorageOrder order, Transpose trans,
221      IndexType m, IndexType n,
222      ComplexDouble &alpha,
223      const ComplexDouble *A, IndexType ldA,
224      const ComplexDouble *x, IndexType incX,
225      ComplexDouble &beta,
226      ComplexDouble *y, IndexType incY)
227 {
228     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zgemv");
229 
230     cblas_zgemv(CBLAS::getCblasType(order), CBLAS::getCblasType(trans),
231                 m,  n,
232                 reinterpret_cast<const double *>(&alpha),
233                 reinterpret_cast<const double *>(A), ldA,
234                 reinterpret_cast<const double *>(x), incX,
235                 reinterpret_cast<const double *>(&beta),
236                 reinterpret_cast<const double *>(y), incY);
237 }
238 
239 #endif // HAVE_CBLAS
240 
241 // namespace cxxblas
242 
243 #endif // CXXBLAS_LEVEL2_GEMV_TCC