1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.2) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *     November 2006
 41  */
 42 
 43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
 44 #define FLENS_LAPACK_GESV_TRF_TCC 1
 45 
 46 #include <algorithm>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 #include <flens/lapack/interface/include/f77lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename MA, typename VP>
 57 typename GeMatrix<MA>::IndexType
 58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
 59 {
 60     using std::max;
 61     using std::min;
 62 
 63     typedef typename GeMatrix<MA>::ElementType  T;
 64     typedef typename GeMatrix<MA>::IndexType    IndexType;
 65 
 66     const Underscore<IndexType> _;
 67 
 68     const IndexType m = A.numRows();
 69     const IndexType n = A.numCols();
 70 
 71     IndexType info = 0;
 72 //
 73 //  Quick return if possible
 74 //
 75     if ((m==0) || (n==0)) {
 76         return info;
 77     }
 78 //
 79 //  Determine the block size for this environment.
 80 //
 81     IndexType bs = ilaenv<T>(1"GETRF""", m, n, -1, -1);
 82 
 83     if ((bs<=1) || (bs>=min(m,n))) {
 84 //
 85 //      Use unblocked code.
 86 //
 87         info = tf2(A, piv);
 88     } else {
 89 //
 90 //      Use blocked code.
 91 //
 92         for (IndexType j=1; j<=min(m,n); j+=bs) {
 93             IndexType jb = min(min(m,n)-j+1, bs);
 94 //
 95 //          Row and column partitioning of A
 96 //
 97             const auto rows1    = _(    j, j+jb-1);
 98             const auto rows2    = _( j+jb,      m);
 99 
100             const auto rows12   = _(    j,      m);
101 
102             const auto cols0    = _(    1,    j-1);
103             const auto cols1    = _(    j, j+jb-1);
104             const auto cols2    = _( j+jb,      n);
105 //
106 //          Factor diagonal and subdiagonal blocks and test for exact
107 //          singularity.
108 //
109             IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 //          Adjust INFO and the pivot indices.
112 //
113             if ((info==0) && (_info>0)) {
114                 info = _info + j - 1;
115             }
116             for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117                 piv(i) += j-1;
118             }
119 //
120 //          Apply interchanges to columns 1:J-1.
121 //
122             laswp(A(_,cols0), piv(rows1, j));
123 
124             if (j+jb<=n) {
125 //
126 //              Apply interchanges to columns J+JB:N.
127 //
128                 laswp(A(_,cols2), piv(rows1, j));
129 //
130 //              Compute block row of U.
131 //
132                 blas::sm(Left, NoTrans, T(1),
133                          A(rows1, cols1).lowerUnit(),
134                          A(rows1, cols2));
135 
136                 if (j+jb<=m) {
137 //
138 //                  Update trailing submatrix.
139 //
140                     blas::mm(NoTrans, NoTrans,
141                              T(-1), A(rows2, cols1), A(rows1, cols2),
142                              T( 1), A(rows2, cols2));
143                 }
144             }
145         }
146     }
147     return info;
148 }
149 
150 //== interface for native lapack ===============================================
151 
152 #ifdef CHECK_CXXLAPACK
153 
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &A, DenseVector<VP> &piv)
157 {
158     return cxxlapack::getrf(A.numRows(), A.numCols(),
159                             A.data(), A.leadingDimension(),
160                             piv.data());
161 }
162 
163 #endif // CHECK_CXXLAPACK
164 
165 //== public interface ==========================================================
166 
167 template <typename MA, typename VP>
168 typename GeMatrix<MA>::IndexType
169 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
170 {
171     typedef typename GeMatrix<MA>::IndexType    IndexType;
172 
173 //
174 //  Test the input parameters
175 //
176     ASSERT(A.firstRow()==1);
177     ASSERT(A.firstCol()==1);
178     ASSERT((piv.inc()>0 && piv.firstIndex()==1)
179         || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
180 
181 #   ifdef CHECK_CXXLAPACK
182 //
183 //  Make copies of output arguments
184 //
185     typename GeMatrix<MA>::NoView       _A      = A;
186     typename DenseVector<VP>::NoView    _piv    = piv;
187 #   endif
188 
189 //
190 //  Call implementation
191 //
192     IndexType info = trf_generic(A, piv);
193 
194 #   ifdef CHECK_CXXLAPACK
195 //
196 //  Compare results
197 //
198     IndexType _info = trf_native(_A, _piv);
199 
200     bool failed = false;
201     if (! isIdentical(A, _A, " A""_A")) {
202         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
203         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
204         failed = true;
205     }
206 
207     if (! isIdentical(piv, _piv, " piv""_piv")) {
208         std::cerr << "CXXLAPACK:  piv = " << piv << std::endl;
209         std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
210         failed = true;
211     }
212 
213     if (! isIdentical(info, _info, " info""_info")) {
214         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
215         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
216         failed = true;
217     }
218 
219     if (failed) {
220         ASSERT(0);
221     }
222 
223 #   endif
224 
225     return info;
226 }
227 
228 //-- forwarding ----------------------------------------------------------------
229 template <typename MA, typename VP>
230 typename MA::IndexType
231 trf(MA &&A, VP &&piv)
232 {
233     typedef typename MA::IndexType  IndexType;
234 
235     CHECKPOINT_ENTER;
236     IndexType info =  trf(A, piv);
237     CHECKPOINT_LEAVE;
238 
239     return info;
240 }
241 
242 } } // namespace lapack, flens
243 
244 #endif // FLENS_LAPACK_GESV_TRF_TCC