1 /*
  2  *   Copyright (c) 2010, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_HEMV_TCC
 34 #define CXXBLAS_LEVEL2_HEMV_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA, typename VX,
 39           typename BETA, typename VY>
 40 void
 41 hemv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
 42              IndexType n,
 43              const ALPHA &alpha,
 44              const MA *A, IndexType ldA,
 45              const VX *x, IndexType incX,
 46              const BETA &beta,
 47              VY *y, IndexType incY)
 48 {
 49     if (order==ColMajor) {
 50         upLo = (upLo==Upper) ? Lower : Upper;
 51         conjugateA = Transpose(conjugateA^Conj);
 52     }
 53     scal_generic(n, beta, y, incY);
 54     if (upLo==Upper) {
 55         if (conjugateA==Conj) {
 56             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 57                 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
 58 
 59                 VY _y = VY(0);
 60                 dot_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
 61                                    x+iX+incX, incX, _y);
 62                 y[iY] += alpha*_y;
 63 
 64                 axpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
 65                                                  y+iY+incY, incY);
 66             }
 67         } else {
 68             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 69                 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
 70 
 71                 VY _y = VY(0);
 72                 dotu_generic(n-i-1, A+i*ldA+i+1, IndexType(1),
 73                                     x+iX+incX, incX, _y);
 74                 y[iY] += alpha*_y;
 75 
 76                 acxpy_generic(n-i-1, alpha*x[iX], A+i*ldA+i+1, IndexType(1),
 77                                                   y+iY+incY, incY);
 78             }
 79         }
 80     } else {
 81         if (conjugateA==Conj) {
 82             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 83                 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
 84 
 85                 VY _y = VY(0);
 86                 dot_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
 87                 y[iY] += alpha*_y;
 88 
 89                 axpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
 90             }
 91         } else {
 92             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 93                 y[iY] += alpha*cxxblas::real(A[i*ldA+i]) * x[iX];
 94 
 95                 VY _y = VY(0);
 96                 dotu_generic(i, A+i*ldA, IndexType(1), x, incX, _y);
 97                 y[iY] += alpha*_y;
 98 
 99                 acxpy_generic(i, alpha*x[iX], A+i*ldA, IndexType(1), y, incY);
100             }
101         }
102     }
103 }
104 
105 //------------------------------------------------------------------------------
106 
107 template <typename IndexType, typename ALPHA, typename MA, typename VX,
108           typename BETA, typename VY>
109 void
110 hemv(StorageOrder order, StorageUpLo upLo,
111      IndexType n,
112      const ALPHA &alpha,
113      const MA *A, IndexType ldA,
114      const VX *x, IndexType incX,
115      const BETA &beta,
116      VY *y, IndexType incY)
117 {
118     CXXBLAS_DEBUG_OUT("hemv_generic");
119 
120     if (incX<0) {
121         x -= incX*(n-1);
122     }
123     if (incY<0) {
124         y -= incY*(n-1);
125     }
126     hemv_generic(order, upLo, NoTrans, n,
127                  alpha, A, ldA, x, incX,
128                  beta, y, incY);
129 }
130 
131 #ifdef HAVE_CBLAS
132 
133 // chemv
134 template <typename IndexType>
135 typename If<IndexType>::isBlasCompatibleInteger
136 hemv(StorageOrder order, StorageUpLo upLo,
137      IndexType n, ComplexDouble &alpha,
138      const ComplexFloat *A, IndexType ldA,
139      const ComplexFloat *x, IndexType incX,
140      ComplexDouble &beta,
141      ComplexFloat *y, IndexType incY)
142 {
143     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemv");
144 
145     cblas_chemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n, 
146                 reinterpret_cast<const float *>(&alpha),
147                 reinterpret_cast<const float *>(A), ldA,
148                 reinterpret_cast<const float *>(x), incX,
149                 reinterpret_cast<const float *>(&beta),
150                 reinterpret_cast<const float *>(y), incY);
151 }
152 
153 // zhemv
154 template <typename IndexType>
155 typename If<IndexType>::isBlasCompatibleInteger
156 hemv(StorageOrder order, StorageUpLo upLo,
157      IndexType n, ComplexDouble &alpha,
158      const ComplexDouble *A, IndexType ldA,
159      const ComplexDouble *x, IndexType incX,
160      ComplexDouble &beta,
161      ComplexDouble *y, IndexType incY)
162 {
163     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemv");
164 
165     cblas_zhemv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo), n,
166                 reinterpret_cast<const double *>(&alpha),
167                 reinterpret_cast<const double *>(A), ldA,
168                 reinterpret_cast<const double *>(x), incX,
169                 reinterpret_cast<const double *>(&beta),
170                 reinterpret_cast<const double *>(y), incY);
171 }
172 
173 #endif // HAVE_CBLAS
174 
175 // namespace cxxblas
176 
177 #endif // CXXBLAS_LEVEL2_HEMV_TCC