1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.2) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *     November 2006
 41  */
 42 
 43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
 44 #define FLENS_LAPACK_GESV_TRF_TCC 1
 45 
 46 #include <algorithm>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 #include <flens/lapack/interface/include/f77lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename MA, typename VP>
 57 typename GeMatrix<MA>::IndexType
 58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
 59 {
 60     using std::max;
 61     using std::min;
 62 
 63     typedef typename GeMatrix<MA>::ElementType  T;
 64     typedef typename GeMatrix<MA>::IndexType    IndexType;
 65 
 66     const Underscore<IndexType> _;
 67 
 68     const IndexType m = A.numRows();
 69     const IndexType n = A.numCols();
 70 
 71     IndexType info = 0;
 72 //
 73 //  Quick return if possible
 74 //
 75     if ((m==0) || (n==0)) {
 76         return info;
 77     }
 78 //
 79 //  Determine the block size for this environment.
 80 //
 81     IndexType bs = ilaenv<T>(1"GETRF""", m, n, -1, -1);
 82 
 83     if ((bs<=1) || (bs>=min(m,n))) {
 84 //
 85 //      Use unblocked code.
 86 //
 87         info = tf2(A, piv);
 88     } else {
 89 //
 90 //      Use blocked code.
 91 //
 92         for (IndexType j=1; j<=min(m,n); j+=bs) {
 93             IndexType jb = min(min(m,n)-j+1, bs);
 94 //
 95 //          Row and column partitioning of A
 96 //
 97             const auto rows1    = _(    j, j+jb-1);
 98             const auto rows2    = _( j+jb,      m);
 99 
100             const auto rows12   = _(    j,      m);
101 
102             const auto cols0    = _(    1,    j-1);
103             const auto cols1    = _(    j, j+jb-1);
104             const auto cols2    = _( j+jb,      n);
105 //
106 //          Factor diagonal and subdiagonal blocks and test for exact
107 //          singularity.
108 //
109             IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 //          Adjust INFO and the pivot indices.
112 //
113             if ((info==0) && (_info>0)) {
114                 info = _info + j - 1;
115             }
116             for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117                 piv(i) += j-1;
118             }
119 //
120 //          Apply interchanges to columns 1:J-1.
121 //
122             laswp(A(_,cols0), piv(rows1, j));
123 
124             if (j+jb<=n) {
125 //
126 //              Apply interchanges to columns J+JB:N.
127 //
128                 laswp(A(_,cols2), piv(rows1, j));
129 //
130 //              Compute block row of U.
131 //
132                 blas::sm(Left, NoTrans, T(1),
133                          A(rows1, cols1).lowerUnit(),
134                          A(rows1, cols2));
135 
136                 if (j+jb<=m) {
137 //
138 //                  Update trailing submatrix.
139 //
140                     blas::mm(NoTrans, NoTrans,
141                              T(-1), A(rows2, cols1), A(rows1, cols2),
142                              T( 1), A(rows2, cols2));
143                 }
144             }
145         }
146     }
147     return info;
148 }
149 
150 //== interface for native lapack ===============================================
151 
152 #ifdef CHECK_CXXLAPACK
153 
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &_A, DenseVector<VP> &piv)
157 {
158     typedef typename GeMatrix<MA>::ElementType  T;
159 
160     const INTEGER    M = _A.numRows();
161     const INTEGER    N = _A.numCols();
162     T               *A = _A.data();
163     const INTEGER    LDA = _A.leadingDimension();
164     INTEGER         *IPIV = piv.data();
165     INTEGER          INFO;
166 
167     if (IsSame<T, DOUBLE>::value) {
168         LAPACK_IMPL(dgetrf)(&M, &N, A, &LDA, IPIV, &INFO);
169     } else {
170         ASSERT(0);
171     }
172 
173     return INFO;
174 }
175 
176 #endif // CHECK_CXXLAPACK
177 
178 //== public interface ==========================================================
179 
180 template <typename MA, typename VP>
181 typename GeMatrix<MA>::IndexType
182 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
183 {
184     typedef typename GeMatrix<MA>::IndexType    IndexType;
185 
186 //
187 //  Test the input parameters
188 //
189     ASSERT(A.firstRow()==1);
190     ASSERT(A.firstCol()==1);
191     ASSERT((piv.inc()>0 && piv.firstIndex()==1)
192         || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
193 
194 #   ifdef CHECK_CXXLAPACK
195 //
196 //  Make copies of output arguments
197 //
198     typename GeMatrix<MA>::NoView       _A      = A;
199     typename DenseVector<VP>::NoView    _piv    = piv;
200 #   endif
201 
202 //
203 //  Call implementation
204 //
205     IndexType info = trf_generic(A, piv);
206 
207 #   ifdef CHECK_CXXLAPACK
208 //
209 //  Compare results
210 //
211     IndexType _info = trf_native(_A, _piv);
212 
213     bool failed = false;
214     if (! isIdentical(A, _A, " A""_A")) {
215         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
216         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
217         failed = true;
218     }
219 
220     if (! isIdentical(piv, _piv, " piv""_piv")) {
221         std::cerr << "CXXLAPACK:  piv = " << piv << std::endl;
222         std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
223         failed = true;
224     }
225 
226     if (! isIdentical(info, _info, " info""_info")) {
227         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
228         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
229         failed = true;
230     }
231 
232     if (failed) {
233         ASSERT(0);
234     }
235 
236 #   endif
237 
238     return info;
239 }
240 
241 //-- forwarding ----------------------------------------------------------------
242 template <typename MA, typename VP>
243 typename MA::IndexType
244 trf(MA &&A, VP &&piv)
245 {
246     typedef typename MA::IndexType  IndexType;
247 
248     CHECKPOINT_ENTER;
249     IndexType info =  trf(A, piv);
250     CHECKPOINT_LEAVE;
251 
252     return info;
253 }
254 
255 } } // namespace lapack, flens
256 
257 #endif // FLENS_LAPACK_GESV_TRF_TCC