1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.2) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *     November 2006
 41  */
 42 
 43 #ifndef FLENS_LAPACK_LQ_ORGL2_TCC
 44 #define FLENS_LAPACK_LQ_ORGL2_TCC 1
 45 
 46 #include <flens/blas/blas.h>
 47 #include <flens/lapack/lapack.h>
 48 
 49 namespace flens { namespace lapack {
 50 
 51 //== generic lapack implementation =============================================
 52 
 53 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
 54 void
 55 orgl2_generic(IndexType                 k,
 56               GeMatrix<MA>              &A,
 57               const DenseVector<VTAU>   &tau,
 58               DenseVector<VWORK>        &work)
 59 {
 60     typedef typename GeMatrix<MA>::ElementType  T;
 61 
 62     const T Zero(0), One(1);
 63 
 64     const Underscore<IndexType> _;
 65 
 66     const IndexType m = A.numRows();
 67     const IndexType n = A.numCols();
 68 
 69 //
 70 //  Quick return if possible
 71 //
 72     if (m<=0) {
 73         return;
 74     }
 75 //
 76 //  Initialise rows k+1:m to rows of the unit matrix
 77 //
 78     if (k<m) {
 79         for (IndexType j=1; j<=n; ++j) {
 80             A(_(k+1,m),j) = Zero;
 81             if (j>k && j<=m) {
 82                 A(j,j) = One;
 83             }
 84         }
 85     }
 86 
 87     for (IndexType i=k; i>=1; --i) {
 88 //
 89 //      Apply H(i) to A(i:m,i:n) from the left
 90 //
 91         if (i<n) {
 92             if (i<m) {
 93                 A(i,i) = One;
 94                 larf(Right, A(i,_(i,n)), tau(i), A(_(i+1,m),_(i,n)), work);
 95             }
 96             A(i,_(i+1,n)) *= -tau(i);
 97         }
 98         A(i,i) = One - tau(i);
 99 //
100 //      Set A(i,1:i-1) to zero
101 //
102         A(i,_(1,i-1)) = Zero;
103     }
104 }
105 
106 //== interface for native lapack ===============================================
107 
108 #ifdef CHECK_CXXLAPACK
109 
110 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
111 void
112 orgl2_native(IndexType                 k,
113              GeMatrix<MA>              &A,
114              const DenseVector<VTAU>   &tau,
115              DenseVector<VWORK>        &work)
116 {
117     typedef typename GeMatrix<MA>::ElementType  T;
118 
119     const INTEGER    M      = A.numRows();
120     const INTEGER    N      = A.numCols();
121     const INTEGER    K      = k;
122     const INTEGER    LDA    = A.leadingDimension();
123     INTEGER          INFO;
124 
125     if (IsSame<T,DOUBLE>::value) {
126         LAPACK_IMPL(dorgl2)(&M,
127                             &N,
128                             &K,
129                             A.data(),
130                             &LDA,
131                             tau.data(),
132                             work.data(),
133                             &INFO);
134     } else {
135         ASSERT(0);
136     }
137     ASSERT(INFO==0);
138 }
139 
140 #endif // CHECK_CXXLAPACK
141 
142 //== public interface ==========================================================
143 
144 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
145 void
146 orgl2(IndexType                 k,
147       GeMatrix<MA>              &A,
148       const DenseVector<VTAU>   &tau,
149       DenseVector<VWORK>        &work)
150 {
151 //
152 //  Test the input parameters
153 //
154 #   ifndef NDEBUG
155     ASSERT(A.firstRow()==IndexType(1));
156     ASSERT(A.firstCol()==IndexType(1));
157 
158     const IndexType m = A.numRows();
159     const IndexType n = A.numCols();
160 
161     ASSERT(tau.firstIndex()==IndexType(1));
162     ASSERT(tau.length()==k);
163     ASSERT(work.length()>=m);
164 
165     ASSERT(n>=m);
166     ASSERT(m>=k);
167     ASSERT(0<=k);
168 #   endif
169 
170 //
171 //  Make copies of output arguments
172 //
173 #   ifdef CHECK_CXXLAPACK
174     typename GeMatrix<MA>::NoView       _A      = A;
175     typename DenseVector<VWORK>::NoView _work   = work;
176 #   endif
177 
178 //
179 //  Call implementation
180 //
181     orgl2_generic(k, A, tau, work);
182 
183 //
184 //  Compare results
185 //
186 #   ifdef CHECK_CXXLAPACK
187     orgl2_native(k, _A, tau, _work);
188 
189     bool failed = false;
190     if (! isIdentical(A, _A, " A""A_")) {
191         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
192         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
193         failed = true;
194     }
195 
196     if (! isIdentical(work, _work, " work""_work")) {
197         std::cerr << "CXXLAPACK:  work = " << work << std::endl;
198         std::cerr << "F77LAPACK: _work = " << _work << std::endl;
199         failed = true;
200     }
201 
202     if (failed) {
203         std::cerr << "error in: orgl2.tcc" << std::endl;
204         ASSERT(0);
205     } else {
206 //        std::cerr << "passed: orgl2.tcc" << std::endl;
207     }
208 #   endif
209 }
210 
211 //-- forwarding ----------------------------------------------------------------
212 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
213 void
214 orgl2(IndexType k, MA &&A, const VTAU &tau, VWORK &&work)
215 {
216     orgl2(k, A, tau, work);
217 }
218 
219 } } // namespace lapack, flens
220 
221 #endif // FLENS_LAPACK_LQ_ORGL2_TCC