1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef FLENS_BLAS_LEVEL1_RAXPY_TCC
 34 #define FLENS_BLAS_LEVEL1_RAXPY_TCC 1
 35 
 36 #include <cxxblas/cxxblas.h>
 37 #include <flens/aux/macros.h>
 38 #include <flens/typedefs.h>
 39 
 40 #ifdef FLENS_DEBUG_CLOSURES
 41 #   include <flens/blas/blaslogon.h>
 42 #else
 43 #   include <flens/blas/blaslogoff.h>
 44 #endif
 45 
 46 namespace flens { namespace blas {
 47 
 48 //-- raxpy
 49 template <typename ALPHA, typename VX, typename VY>
 50 void
 51 raxpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
 52 {
 53     FLENS_BLASLOG_SETTAG("--> ");
 54     FLENS_BLASLOG_BEGIN_RAXPY(alpha, x, y);
 55 
 56     if (y.length()==0) {
 57 //
 58 //      So we allow  y += 1/alpha*x  for an empty vector y
 59 //
 60         typedef typename DenseVector<VY>::ElementType  T;
 61         const T  Zero(0);
 62 
 63         y.resize(x, Zero);
 64     }
 65     ASSERT(y.length()==x.length());
 66 
 67 #   ifdef HAVE_CXXBLAS_RAXPY
 68     cxxblas::raxpy(x.length(), alpha,
 69                    x.data(), x.stride(),
 70                    y.data(), y.stride());
 71 #   else
 72     ASSERT(0);
 73 #   endif
 74     FLENS_BLASLOG_END;
 75     FLENS_BLASLOG_UNSETTAG;
 76 }
 77 
 78 //-- geraxpy
 79 //
 80 //  B += A/alpha
 81 //
 82 template <typename ALPHA, typename MA, typename MB>
 83 void
 84 raxpy(Transpose trans,
 85       const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
 86 {
 87     if (B.numRows()==0 || B.numCols()==0) {
 88 //
 89 //      So we allow  B += 1/alpha*A  for an empty matrix B
 90 //
 91         typedef typename GeMatrix<MB>::ElementType  T;
 92         const T  Zero(0);
 93 
 94         if ((trans==NoTrans) || (trans==Conj)) {
 95             B.resize(A.numRows(), A.numCols(), Zero);
 96         } else {
 97             B.resize(A.numCols(), A.numRows(), Zero);
 98         }
 99     }
100 
101 #   ifndef NDEBUG
102     if ((trans==NoTrans) || (trans==Conj)) {
103         ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
104     } else {
105         ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
106     }
107 #   endif
108 
109 
110     trans = (MA::order==MB::order)
111           ? Transpose(trans ^ NoTrans)
112           : Transpose(trans ^ Trans);
113 
114 
115 #   ifndef FLENS_DEBUG_CLOSURES
116 #   ifndef NDEBUG
117     if (trans==Trans || trans==ConjTrans) {
118         ASSERT(!DEBUGCLOSURE::identical(A, B));
119     }
120 #   endif
121 #   else
122 //
123 //  If A and B are identical a temporary is needed if we want to use axpy
124 //  for B += A^T/alpha or B+= A^H/alpha
125 //
126     if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
127         typename Result<GeMatrix<MA> >::Type _A = A;
128         FLENS_BLASLOG_TMP_ADD(_A);
129 
130         axpy(trans, alpha, _A, B);
131 
132         FLENS_BLASLOG_TMP_REMOVE(_A, A);
133         return;
134     }
135 #   endif
136 
137     FLENS_BLASLOG_SETTAG("--> ");
138     FLENS_BLASLOG_BEGIN_MRAXPY(trans, alpha, A, B);
139 
140 #   ifdef HAVE_CXXBLAS_GERAXPY
141     geraxpy(MB::order, trans,
142             B.numRows(), B.numCols(), alpha,
143             A.data(), A.leadingDimension(),
144             B.data(), B.leadingDimension());
145 #   else
146     ASSERT(0);
147 #   endif
148     FLENS_BLASLOG_END;
149     FLENS_BLASLOG_UNSETTAG;
150 }
151 
152 
153 } } // namespace blas, flens
154 
155 #endif // FLENS_BLAS_LEVEL1_RAXPY_TCC