1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef FLENS_BLAS_LEVEL1_AXPY_TCC
 34 #define FLENS_BLAS_LEVEL1_AXPY_TCC 1
 35 
 36 #include <cxxblas/cxxblas.h>
 37 #include <flens/aux/macros.h>
 38 #include <flens/blas/closures/debugclosure.h>
 39 #include <flens/typedefs.h>
 40 
 41 #ifdef FLENS_DEBUG_CLOSURES
 42 #   include <flens/blas/blaslogon.h>
 43 #else
 44 #   include <flens/blas/blaslogoff.h>
 45 #endif
 46 
 47 namespace flens { namespace blas {
 48 
 49 //-- axpy
 50 template <typename ALPHA, typename VX, typename VY>
 51 void
 52 axpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
 53 {
 54     FLENS_BLASLOG_SETTAG("--> ");
 55     FLENS_BLASLOG_BEGIN_AXPY(alpha, x, y);
 56 
 57     if (y.length()==0) {
 58 //
 59 //      So we allow  y += alpha*x  for an empty vector y
 60 //
 61         typedef typename DenseVector<VY>::ElementType  T;
 62         const T  Zero(0);
 63 
 64         y.resize(x, Zero);
 65     }
 66     ASSERT(y.length()==x.length());
 67 
 68 #   ifdef HAVE_CXXBLAS_AXPY
 69     cxxblas::axpy(x.length(), alpha,
 70                   x.data(), x.stride(),
 71                   y.data(), y.stride());
 72 #   else
 73     ASSERT(0);
 74 #   endif
 75 
 76     FLENS_BLASLOG_END;
 77     FLENS_BLASLOG_UNSETTAG;
 78 }
 79 
 80 //-- geaxpy
 81 template <typename ALPHA, typename MA, typename MB>
 82 void
 83 axpy(Transpose trans,
 84      const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
 85 {
 86     if (B.numRows()==0 || B.numCols()==0) {
 87 //
 88 //      So we allow  B += alpha*A  for an empty matrix B
 89 //
 90         typedef typename GeMatrix<MB>::ElementType  T;
 91         const T  Zero(0);
 92 
 93         if ((trans==NoTrans) || (trans==Conj)) {
 94             B.resize(A.numRows(), A.numCols(),
 95                      A.firstRow(), A.firstCol(),
 96                      Zero);
 97         } else {
 98             B.resize(A.numCols(), A.numRows(),
 99                      A.firstCol(), A.firstRow(),
100                      Zero);
101         }
102     }
103 
104 #   ifndef NDEBUG
105     if ((trans==NoTrans) || (trans==Conj)) {
106         ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
107     } else {
108         ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
109     }
110 #   endif
111 
112 
113     trans = (MA::order==MB::order)
114           ? Transpose(trans ^ NoTrans)
115           : Transpose(trans ^ Trans);
116 
117 
118 #   ifndef FLENS_DEBUG_CLOSURES
119 #   ifndef NDEBUG
120     if (trans==Trans || trans==ConjTrans) {
121         ASSERT(!DEBUGCLOSURE::identical(A, B));
122     }
123 #   endif
124 #   else
125 //
126 //  If A and B are identical a temporary is needed if we want to use axpy
127 //  for B += alpha*A^T or B+= alpha*A^H
128 //
129     if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
130         typename GeMatrix<MA>::NoView _A;
131         FLENS_BLASLOG_TMP_ADD(_A);
132 
133         copy(trans, A, _A);
134         axpy(NoTrans, alpha, A, B);
135 
136         FLENS_BLASLOG_TMP_REMOVE(_A, A);
137         return;
138     }
139 #   endif
140 
141 
142     FLENS_BLASLOG_SETTAG("--> ");
143     FLENS_BLASLOG_BEGIN_MAXPY(trans, alpha, A, B);
144 
145 #   ifdef HAVE_CXXBLAS_GEAXPY
146     geaxpy(MB::order, trans,
147            B.numRows(), B.numCols(), alpha,
148            A.data(), A.leadingDimension(),
149            B.data(), B.leadingDimension());
150 #   else
151     ASSERT(0);
152 #   endif
153     FLENS_BLASLOG_END;
154     FLENS_BLASLOG_UNSETTAG;
155 }
156 
157 } } // namespace blas, flens
158 
159 #endif // FLENS_BLAS_LEVEL1_AXPY_TCC