1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef FLENS_BLAS_CLOSURES_MVSWITCH_TCC
 34 #define FLENS_BLAS_CLOSURES_MVSWITCH_TCC 1
 35 
 36 #include <flens/aux/aux.h>
 37 #include <flens/blas/closures/debugclosure.h>
 38 #include <flens/blas/closures/prune.h>
 39 #include <flens/blas/level1/level1.h>
 40 #include <flens/blas/level2/level2.h>
 41 #include <flens/typedefs.h>
 42 
 43 #ifdef FLENS_DEBUG_CLOSURES
 44 #   include <flens/blas/blaslogon.h>
 45 #else
 46 #   include <flens/blas/blaslogoff.h>
 47 #endif
 48 
 49 namespace flens { namespace blas {
 50 
 51 //
 52 //  This switch evaluates closures of the form
 53 //
 54 //      y = beta*y + A*x
 55 //
 56 //  If x is a closure then it gets evaluated and a temporary gets created to
 57 //  store the result.  For matrix A we distinguish between three cases:
 58 //  case 1: A is no closure
 59 //  case 2: A is a scaling closure (i.e. scale*A)
 60 //  case 3: A is some other closure
 61 
 62 //
 63 //  Entry point for mvSwitch
 64 //
 65 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
 66 typename RestrictTo<IsSame<MA, typename MA::Impl>::value &&
 67                     IsSame<VX, typename VX::Impl>::value &&
 68                     IsSame<VY, typename VY::Impl>::value,
 69          void>::Type
 70 mvSwitch(Transpose trans, const ALPHA &alpha, const MA &A, const VX &x,
 71          const BETA &beta, VY &y)
 72 {
 73     ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
 74 //
 75 //  If A is a closure then prune arbitrary many OpTrans/OpConj
 76 //
 77     typedef typename PruneConjTrans<MA>::Remainder RMA;
 78 
 79     trans = Transpose(trans^PruneConjTrans<MA>::trans);
 80     const RMA  &_A = PruneConjTrans<MA>::remainder(A);
 81 //
 82 //  If x is a closure it gets evaluated.  In this case a temporary gets
 83 //  created.  Otherwise we only keep a reference
 84 //
 85     FLENS_BLASLOG_TMP_TRON;
 86     const typename Result<VX>::Type  &_x = x;
 87     FLENS_BLASLOG_TMP_TROFF;
 88 //
 89 //  Call mv implementation
 90 //
 91     mvCase(trans, alpha, _A, _x, beta, y);
 92 //
 93 //  If a temporary was created and registered before we now unregister it
 94 //
 95 #   ifdef FLENS_DEBUG_CLOSURES
 96     if (!IsSame<VX, typename Result<VX>::Type>::value) {
 97         FLENS_BLASLOG_TMP_REMOVE(_x, x);
 98     }
 99 #   else
100     static_assert(IsSame<VX, typename Result<VX>::Type>::value,
101                   "temporary required");
102 #   endif
103 }
104 
105 //
106 //  case 1: A is no closure
107 //
108 template <typename ALPHA, typename MA, typename VX, typename BETA, typename VY>
109 typename RestrictTo<!IsClosure<MA>::value,
110          void>::Type
111 mvCase(Transpose trans, const ALPHA &alpha, const MA &A, const VX &x,
112        const BETA &beta, VY &y)
113 {
114     mv(trans, alpha, A, x, beta, y);
115 }
116 
117 //
118 //  case 2: A is closure of type scale*A
119 //
120 template <typename ALPHA, typename T, typename MA, typename VX, typename BETA,
121           typename VY>
122 void
123 mvCase(Transpose trans, const ALPHA &alpha,
124        const MatrixClosure<OpMult, ScalarValue<T>, MA> &scale_A,
125        const VX &x, const BETA &beta, VY &y)
126 {
127 //
128 //  If A is a closure then prune arbitrary many OpTrans/OpConj
129 //
130     typedef typename PruneConjTrans<MA>::Remainder  _MA;
131     typedef typename Result<_MA>::Type              RMA;
132 
133     Transpose _trans = Transpose(trans^PruneConjTrans<MA>::trans);
134 //
135 //  If the remaining A is a closure it gets evaluated.  In this case
136 //  a temporary gets created.  Otherwise we only keep a reference
137 //
138     FLENS_BLASLOG_TMP_TRON;
139     const _MA &_A  = PruneConjTrans<MA>::remainder(scale_A.right());
140     const RMA &A   = _A;
141     FLENS_BLASLOG_TMP_TROFF;
142 
143     mv(_trans, alpha*scale_A.left().value(), A, x, beta, y);
144 
145 //
146 //  If a temporary was created and registered before we now unregister it
147 //
148 #   ifdef FLENS_DEBUG_CLOSURES
149     if (!IsSame<_MA, RMA>::value) {
150         FLENS_BLASLOG_TMP_REMOVE(A, _A);
151     }
152 #   else
153     static_assert(IsSame<_MA, RMA>::value, "temporary required");
154 #   endif
155 }
156 
157 //
158 //  case 3: A is some other closure
159 //
160 template <typename ALPHA, typename Op, typename L, typename R, typename VX,
161           typename BETA, typename VY>
162 void
163 mvCase(Transpose trans, const ALPHA &alpha, const MatrixClosure<Op, L, R> &A,
164          const VX &x, const BETA &beta, VY &y)
165 {
166     typedef MatrixClosure<Op, L, R>  MC;
167 
168 //
169 //  Create (most certainly) temporary for the result of A
170 //
171     FLENS_BLASLOG_TMP_TRON;
172     typedef typename Result<MC>::Type  MA;
173     const MA &_A = A;
174     FLENS_BLASLOG_TMP_TROFF;
175 
176     mv(trans, alpha, _A, x, beta, y);
177 
178 //
179 //  If a temporary was created and registered before we now unregister it
180 //
181 #   ifdef FLENS_DEBUG_CLOSURES
182     if (!IsSame<MC, MA>::value) {
183         FLENS_BLASLOG_TMP_REMOVE(_A, A);
184     }
185 #   else
186     static_assert(IsSame<MC, MA>::value, "temporary required");
187 #   endif
188 }
189 
190 } } // namespace blas, flens
191 
192 #endif // FLENS_BLAS_CLOSURES_MVSWITCH_TCC