1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_HBMV_TCC
 34 #define CXXBLAS_LEVEL2_HBMV_TCC 1
 35 
 36 #include <complex>
 37 #include <cxxblas/level1/level1.h>
 38 
 39 namespace cxxblas {
 40 
 41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
 42           typename BETA, typename VY>
 43 void
 44 hbmv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
 45              IndexType n, IndexType k,
 46              const ALPHA &alpha,
 47              const MA *A, IndexType ldA,
 48              const VX *x, IndexType incX,
 49              const BETA &beta,
 50              VY *y, IndexType incY)
 51 {
 52     using std::max;
 53     using std::min;
 54     // TODO:  using cxxblas::real <- causes an error
 55 
 56     if (order==ColMajor) {
 57         upLo = (upLo==Upper) ? Lower : Upper;
 58         conjugateA = Transpose(conjugateA^Conj);
 59     }
 60     scal_generic(n, beta, y, incY);
 61     if (upLo==Upper) {
 62         if (conjugateA==Conj) {
 63             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 64                 IndexType len = min(k+1, n-i);
 65 
 66                 y[iY] += alpha*cxxblas::real(A[ldA*i])*x[iX];
 67 
 68                 VY _y;
 69                 dot_generic(len-1, A+ldA*i+1, IndexType(1),
 70                                    x+iX+incX, IndexType(incX),
 71                                    _y);
 72                 y[iY] += alpha*_y;
 73 
 74                 axpy_generic(len-1, x[iX] * alpha,
 75                                     A+ldA*i+1, IndexType(1),
 76                                     y+iY+incY, incY);
 77             }
 78         } else {
 79             for (IndexType i=0, iX=0, iY=0; i<n; ++i, iX+=incX, iY+=incY) {
 80                 IndexType len = min(k+1, n-i);
 81 
 82                 y[iY] += alpha*cxxblas::real(A[ldA*i])*x[iX];
 83 
 84                 VY _y;
 85                 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
 86                                     x+iX+incX, IndexType(incX),
 87                                     _y);
 88                 y[iY] += alpha*_y;
 89 
 90                 acxpy_generic(len-1, x[iX] * alpha,
 91                                      A+ldA*i+1, IndexType(1),
 92                                      y+iY+incY, incY);
 93             }
 94         }
 95     } else { /* upLo==Lower */
 96         if (conjugateA==Conj) {
 97             for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
 98                 IndexType iA = max(k-i, IndexType(0));
 99                 IndexType len = min(k, i) + 1;
100                 IndexType _i = max(i-k, IndexType(0));
101 
102                 y[iY] += alpha*cxxblas::real(A[ldA*i+iA+ len-1])*x[i*incX];
103 
104                 VY _y;
105                 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
106                                    x+_i*incX, IndexType(incX),
107                                    _y);
108                 y[iY] += alpha*_y;
109 
110                 axpy_generic(len-1, x[i*incX] * alpha,
111                                     A+ldA*i+iA, IndexType(1),
112                                     y+_i*incY, incY);
113             }
114         } else {
115             for (IndexType i=0, iY=0; i<n; ++i, iY+=incY) {
116                 IndexType iA = max(k-i, IndexType(0));
117                 IndexType len = min(k, i) + 1;
118                 IndexType _i = max(i-k, IndexType(0));
119 
120                 y[iY] += alpha*cxxblas::real(A[ldA*i+iA+ len-1])*x[i*incX];
121 
122                 VY _y;
123                 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
124                                     x+_i*incX, IndexType(incX),
125                                     _y);
126                 y[iY] += alpha*_y;
127 
128                 acxpy_generic(len-1, x[i*incX] * alpha,
129                                      A+ldA*i+iA, IndexType(1),
130                                      y+_i*incY, incY);
131             }
132         }
133     }
134 }
135 
136 //------------------------------------------------------------------------------
137 
138 template <typename IndexType, typename ALPHA, typename MA, typename VX,
139           typename BETA, typename VY>
140 void
141 hbmv(StorageOrder order, StorageUpLo upLo,
142      IndexType n, IndexType k,
143      const ALPHA &alpha,
144      const MA *A, IndexType ldA,
145      const VX *x, IndexType incX,
146      const BETA &beta,
147      VY *y, IndexType incY)
148 {
149     CXXBLAS_DEBUG_OUT("hbmv_generic");
150 
151     if (n==0) {
152         return;
153     }
154     if (incX<0) {
155         x -= incX*(n-1);
156     }
157     if (incY<0) {
158         y -= incY*(n-1);
159     }
160     hbmv_generic(order, upLo, NoTrans,
161                  n, k,
162                  alpha, A, ldA,
163                  x, incX,
164                  beta,
165                  y, incY);
166 }
167 
168 // namespace cxxblas
169 
170 #endif // CXXBLAS_LEVEL2_HBMV_TCC