1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *     SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
 35  *
 36  *  -- LAPACK auxiliary routine (version 3.2) --
 37  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 38  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 39  *     November 2006
 40  */
 41 
 42 #ifndef FLENS_LAPACK_AUX_LARF_TCC
 43 #define FLENS_LAPACK_AUX_LARF_TCC 1
 44 
 45 #include <flens/blas/blas.h>
 46 #include <flens/lapack/lapack.h>
 47 
 48 namespace flens { namespace lapack {
 49 
 50 //== generic lapack implementation =============================================
 51 
 52 template <typename VV, typename TAU, typename MC, typename VWORK>
 53 void
 54 larf_generic(Side side, const DenseVector<VV> &v, const TAU &tau,
 55              GeMatrix<MC> &C, DenseVector<VWORK> &work)
 56 {
 57     using lapack::ilalc;
 58     using lapack::ilalr;
 59 
 60     typedef typename GeMatrix<MC>::ElementType  ElementType;
 61     typedef typename GeMatrix<MC>::IndexType    IndexType;
 62 
 63     const Underscore<IndexType> _;
 64 
 65     const ElementType  Zero(0), One(1);
 66 
 67     const IndexType m = C.numRows();
 68     const IndexType n = C.numCols();
 69 
 70     IndexType lastV = 0;
 71     IndexType lastC = 0;
 72 
 73     if (tau!=Zero) {
 74 //
 75 //      Set up variables for scanning V.  LASTV begins pointing to the end of V.
 76 //      Look for the last non-zero row in V.
 77 //
 78         if (side==Left) {
 79             lastV = m;
 80         } else {
 81             lastV = n;
 82         }
 83         IndexType i = (v.inc()>0) ? 1 + (lastV-1)*v.inc() : 1;
 84 //
 85 //      Look for the last non-zero row in V.
 86 //
 87         while (lastV>0 && v(i)==Zero) {
 88             --lastV;
 89             i -= v.inc();
 90         }
 91         if (side==Left) {
 92 //
 93 //          Scan for the last non-zero column in C(1:lastv,:).
 94 //
 95             lastC = ilalc(C(_(1,lastV),_));
 96         } else {
 97 //
 98 //          Scan for the last non-zero row in C(:,1:lastv).
 99 //
100             lastC = ilalr(C(_,_(1,lastV)));
101         }
102     }
103 //
104 //  Note that lastc.eq.0 renders the BLAS operations null; no special
105 //  case is needed at this level.
106 //
107     const auto _v = v(_(1,lastV));
108 
109     if (side==Left) {
110 //
111 //      Form  H * C
112 //
113         if (lastV>0) {
114             auto _work = work(_(1,lastC));
115             auto _C = C(_(1,lastV),_(1,lastC));
116 //
117 //          work(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1)
118 //
119             blas::mv(Trans, One, _C, _v, Zero, _work);
120 //
121 //          C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * work(1:lastc,1)'
122 //
123             blas::r(-tau, _v, _work, _C);
124         }
125     } else {
126 //
127 //      Form  C * H
128 //
129         if (lastV>0) {
130             auto _work = work(_(1,lastC));
131             auto _C = C(_(1,lastC),_(1,lastV));
132 //
133 //          work(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1)
134 //
135             blas::mv(NoTrans, One, _C, _v, Zero, _work);
136 //
137 //          C(1:lastc,1:lastv) := C(...) - work(1:lastc,1) * v(1:lastv,1)'
138 //
139             blas::r(-tau, _work, _v, _C);
140         }
141     }
142 }
143 
144 //== interface for native lapack ===============================================
145 
146 #ifdef CHECK_CXXLAPACK
147 
148 template <typename VV, typename TAU, typename MC, typename VWORK>
149 void
150 larf_native(Side side, const DenseVector<VV> &v, const TAU &tau,
151             GeMatrix<MC> &C, DenseVector<VWORK> &work)
152 {
153     typedef typename GeMatrix<MC>::ElementType  T;
154 
155     const char      SIDE    = getF77LapackChar<Side>(side);
156     const INTEGER   M       = C.numRows();
157     const INTEGER   N       = C.numCols();
158     const INTEGER   INCV    = v.inc()*v.stride();
159     const INTEGER   LDC     = C.leadingDimension();
160 
161     LAPACK_IMPL(dlarf)(&SIDE,
162                        &M,
163                        &N,
164                        v.data(),
165                        &INCV,
166                        &tau,
167                        C.data(),
168                        &LDC,
169                        work.data());
170 }
171 
172 #endif // CHECK_CXXLAPACK
173 
174 //== public interface ==========================================================
175 
176 template <typename VV, typename TAU, typename MC, typename VWORK>
177 void
178 larf(Side side, const DenseVector<VV> &v, const TAU &tau,
179      GeMatrix<MC> &C, DenseVector<VWORK> &work)
180 {
181     LAPACK_DEBUG_OUT("larf");
182 
183 //
184 //  Test the input parameters
185 //
186     ASSERT((v.inc()>0 && v.firstIndex()==1)
187         || (v.inc()<0 && v.lastIndex()==1));
188     ASSERT((side==Left && v.length()==C.numRows())
189         || (side==Right && v.length()==C.numCols()));
190     ASSERT(C.firstRow()==1);
191     ASSERT(C.firstCol()==1);
192     ASSERT((side==Left && work.length()>=C.numCols())
193         || (side==Right && work.length()>=C.numRows()));
194 
195 #   ifdef CHECK_CXXLAPACK
196 //
197 //  Make copies of output arguments
198 //
199     typename GeMatrix<MC>::NoView       C_org      = C;
200     typename DenseVector<VV>::NoView    work_org   = work;
201 #   endif
202 
203 //
204 //  Call implementation
205 //
206     larf_generic(side, v, tau, C, work);
207 
208 #   ifdef CHECK_CXXLAPACK
209 //
210 //  Make copies of results computed by the generic implementation
211 //
212     typename GeMatrix<MC>::NoView       C_generic      = C;
213     typename DenseVector<VV>::NoView    work_generic   = work;
214 
215 //
216 //  restore output arguments
217 //
218     C       = C_org;
219     work    = work_org;
220 
221 //
222 //  Compare generic results with results from the native implementation
223 //
224     larf_native(side, v, tau, C, work);
225 
226     bool failed = false;
227 
228     if (! isIdentical(C_generic, C, "C_generic""C")) {
229         std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
230         std::cerr << "F77LAPACK: C = " << C << std::endl;
231         failed = true;
232     }
233 
234     if (! isIdentical(work_generic, work, "work_generic""work")) {
235         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
236         std::cerr << "F77LAPACK: work = " << work << std::endl;
237         failed = true;
238     }
239 
240     if (failed) {
241         ASSERT(0);
242     }
243 #   endif
244 }
245 
246 //-- forwarding ----------------------------------------------------------------
247 template <typename VV, typename TAU, typename MC, typename VWORK>
248 void
249 larf(Side side, const VV &v, const TAU &tau, MC &&C, VWORK &&work)
250 {
251     CHECKPOINT_ENTER;
252     larf(side, v, tau, C, work);
253     CHECKPOINT_LEAVE;
254 }
255 
256 } } // namespace lapack, flens
257 
258 #endif // FLENS_LAPACK_AUX_LARF_TCC