1 /*
  2  *   Copyright (c) 2010, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_HPMV_TCC
 34 #define CXXBLAS_LEVEL2_HPMV_TCC 1
 35 
 36 #include <complex>
 37 #include <cxxblas/level1/level1.h>
 38 
 39 namespace cxxblas {
 40 
 41 template <typename IndexType, typename ALPHA, typename MA, typename VX,
 42           typename BETA, typename VY>
 43 void
 44 hpmv_generic(StorageOrder order, StorageUpLo upLo, Transpose conjugateA,
 45              IndexType n,
 46              const ALPHA &alpha,
 47              const MA *A,
 48              const VX *x, IndexType incX,
 49              const BETA &beta,
 50              VY *y, IndexType incY)
 51 {
 52     if (order==ColMajor) {
 53         upLo = (upLo==Upper) ? Lower : Upper;
 54         conjugateA = Transpose(conjugateA^Conj);
 55     }
 56     scal_generic(n, beta, y, incY);
 57     if (upLo==Upper) {
 58         if (conjugateA==Conj) {
 59             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
 60                 y[iY] += alpha*cxxblas::real(A[i*(2*n-i+1)/2]) * x[iX];
 61 
 62                 VY _y = VY(0);
 63                 dot_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
 64                                    x+iX+incX, incX, _y);
 65                 y[iY] += alpha*_y;
 66                 axpy_generic(n-i-1, alpha*x[iX],
 67                                     A+i*(2*n-i+1)/2+1, IndexType(1),
 68                                     y+iY+incY, incY);
 69             }
 70         } else {
 71             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
 72                 y[iY] += alpha*cxxblas::real(A[i*(2*n-i+1)/2]) * x[iX];
 73 
 74                 VY _y = VY(0);
 75                 dotu_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
 76                                     x+iX+incX, incX, _y);
 77                 y[iY] += alpha*_y;
 78                 acxpy_generic(n-i-1, alpha*x[iX],
 79                                      A+i*(2*n-i+1)/2+1, IndexType(1),
 80                                      y+iY+incY, incY);
 81             }
 82         }
 83     } else {
 84         if (conjugateA==Conj) {
 85             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
 86                 y[iY] += alpha*cxxblas::real(A[i+i*(i+1)/2]) * x[iX];
 87 
 88                 VY _y = VY(0);
 89                 dot_generic(i, A+i*(i+1)/2, IndexType(1), x, incX, _y);
 90                 y[iY] += alpha*_y;
 91                 axpy_generic(i, alpha*x[iX],
 92                                 A+i*(i+1)/2, IndexType(1),
 93                                 y, incY);
 94             }
 95         } else {
 96             for (IndexType i=0, iY=0, iX=0; i<n; ++i, iX+=incX, iY+=incY) {
 97                 y[iY] += alpha*cxxblas::real(A[i+i*(i+1)/2]) * x[iX];
 98 
 99                 VY _y = VY(0);
100                 dotu_generic(i, A+i*(i+1)/2, IndexType(1), x, incX, _y);
101                 y[iY] += alpha*_y;
102                 acxpy_generic(i, alpha*x[iX],
103                                  A+i*(i+1)/2, IndexType(1),
104                                  y, incY);
105             }
106         }
107     }
108 }
109 
110 //------------------------------------------------------------------------------
111 
112 template <typename IndexType, typename ALPHA, typename MA, typename VX,
113           typename BETA, typename VY>
114 void
115 hpmv(StorageOrder order, StorageUpLo upLo,
116      IndexType n,
117      const ALPHA &alpha,
118      const MA *A,
119      const VX *x, IndexType incX,
120      const BETA &beta,
121      VY *y, IndexType incY)
122 {
123     CXXBLAS_DEBUG_OUT("hpmv_generic");
124 
125     if (incX<0) {
126         x -= incX*(n-1);
127     }
128     if (incY<0) {
129         y -= incY*(n-1);
130     }
131     hpmv_generic(order, upLo, NoTrans, n, alpha, A, x, incX, beta, y, incY);
132 }
133 
134 // namespace cxxblas
135 
136 #endif // CXXBLAS_LEVEL2_HPMV_TCC