1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef FLENS_BLAS_CLOSURES_MV_TCC
 34 #define FLENS_BLAS_CLOSURES_MV_TCC 1
 35 
 36 #ifdef FLENS_DEBUG_CLOSURES
 37 #   include <flens/blas/blaslogon.h>
 38 #else
 39 #   include <flens/blas/blaslogoff.h>
 40 #endif
 41 
 42 namespace flens { namespace blas {
 43 
 44 //== TriangularMatrix - Vector products ========================================
 45 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
 46 void
 47 mv(Transpose trans,
 48    const ALPHA &alpha, const TriangularMatrix<MA> &_A, const Vector<VX> &_x,
 49    const BETA &beta, Vector<VY> &_y)
 50 {
 51     using namespace DEBUGCLOSURE;
 52 
 53     const typename TriangularMatrix<MA>::Impl  &A = _A.impl();
 54     const typename Vector<VX>::Impl            &x = _x.impl();
 55     typename Vector<VY>::Impl                  &y = _y.impl();
 56 
 57 //
 58 //  If beta is not Zero we need a temporary
 59 //
 60 #   ifndef FLENS_DEBUG_CLOSURES
 61     ASSERT(beta==BETA(0));
 62 #   else
 63     const bool tmpNeeded = (beta!=BETA(0));
 64     typename Vector<VY>::Impl  yTmp;
 65     if (tmpNeeded) {
 66         FLENS_BLASLOG_TMP_ADD(yTmp);
 67         yTmp = y;
 68     }
 69 #   endif
 70 
 71     if (!identical(x, y)) {
 72         y = x;
 73     }
 74     mv(trans, A, y);
 75 
 76     if (alpha!=ALPHA(1)) {
 77         scal(alpha, y.impl());
 78     }
 79 
 80 #   ifdef FLENS_DEBUG_CLOSURES
 81     if (tmpNeeded) {
 82         y += beta*yTmp;
 83         FLENS_BLASLOG_TMP_REMOVE(yTmp, y);
 84     }
 85 #   endif
 86 }
 87 
 88 //== SymmetricMatrix - Vector products =========================================
 89 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
 90 void
 91 mv(Transpose trans,
 92    const ALPHA &alpha, const SymmetricMatrix<MA> &A, const Vector<VX> &x,
 93    const BETA &beta, Vector<VY> &y)
 94 {
 95 #   ifdef FLENS_DEBUG_CLOSURES
 96 
 97     if (trans==NoTrans || trans==Trans) {
 98         mv(alpha, A.impl(), x.impl(), beta, y.impl());
 99     } else {
100         typedef typename MA::Impl::ElementType        TA;
101         typedef GeMatrix<FullStorage<TA, ColMajor> >  RMA;
102 
103         RMA _A;
104         FLENS_BLASLOG_TMP_ADD(_A);
105 
106         copy(trans, A, _A);
107 
108         FLENS_BLASLOG_TMP_REMOVE(_A, A);
109         mv(NoTrans, alpha, _A, x.impl(), beta, y.impl());
110     }
111 
112 #   else
113 
114     ASSERT(trans==NoTrans || trans==Trans);
115     mv(alpha, A.impl(), x.impl(), beta, y.impl());
116 
117 #   endif
118 }
119 
120 //== HermitianMatrix - Vector products =========================================
121 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
122 void
123 mv(Transpose trans,
124    const ALPHA &alpha, const HermitianMatrix<MA> &A, const Vector<VX> &x,
125    const BETA &beta, Vector<VY> &y);
126 
127 
128 //== Matrix - Vector products ==================================================
129 //
130 //  This gets called if everything else fails
131 //
132 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
133 void
134 mv(Transpose trans,
135    const ALPHA &alpha, const Matrix<MA> &A, const Vector<VX> &x,
136    const BETA &beta, Vector<VY> &y)
137 {
138     FLENS_BLASLOG_BEGIN_GEMV(trans, alpha, A, x, beta, y);
139 
140     typedef typename MA::Impl::ElementType        TA;
141     typedef typename VX::Impl::ElementType        TX;
142 
143     typedef GeMatrix<FullStorage<TA, ColMajor> >  RMA;
144     typedef DenseVector<Array<TX> >               RVX;
145 
146     FLENS_BLASLOG_TMP_TRON;
147     RMA _A = A.impl();
148     RVX _x = x.impl();
149     FLENS_BLASLOG_TMP_TROFF;
150 
151     mv(trans, alpha, _A, _x, beta, y.impl());
152 
153     FLENS_BLASLOG_TMP_REMOVE(_A, A);
154     FLENS_BLASLOG_TMP_REMOVE(_x, x);
155 
156     FLENS_BLASLOG_END;
157 }
158 
159 } } // namespace blas, flens
160 
161 #endif // FLENS_BLAS_CLOSURES_MV_TCC
162