1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35     SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.3.1) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *  -- April 2011                                                      --
 41  */
 42 
 43 #ifndef FLENS_LAPACK_GESV_POTRF_TCC
 44 #define FLENS_LAPACK_GESV_POTRF_TCC 1
 45 
 46 #include <algorithm>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 #include <flens/lapack/interface/include/f77lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename MA>
 57 typename SyMatrix<MA>::IndexType
 58 potrf_generic(SyMatrix<MA> &A)
 59 {
 60     typedef typename SyMatrix<MA>::ElementType  T;
 61     typedef typename SyMatrix<MA>::IndexType    IndexType;
 62 
 63     const Underscore<IndexType> _;
 64 
 65     const IndexType n = A.dim();
 66     const bool upper = (A.upLo()==Upper);
 67 
 68     const T One(1);
 69 
 70     IndexType info = 0;
 71 //
 72 //  Quick return if possible
 73 //
 74     if (n==0) {
 75         return info;
 76     }
 77 //
 78 //  Determine the block size for this environment.
 79 //
 80     const char *upLo = (upper) ? "U" : "L";
 81     IndexType nb = ilaenv<T>(1"POGTRF", upLo, n);
 82 
 83     if ((nb<=1) || (nb>=n)) {
 84 //
 85 //      Use unblocked code.
 86 //
 87         info = potf2(A);
 88     } else {
 89 //
 90 //      Use blocked code.
 91 //
 92         if (upper) {
 93 //
 94 //          Compute the Cholesky factorization A = U**T*U.
 95 //
 96             for (IndexType j=1; j<=n; j+=nb) {
 97 //
 98 //              Partition matrix
 99 //
100                 IndexType jb = min(nb, n-j+1);
101 
102                 const auto range1 = _(1,j-1);
103                 const auto range2 = _(j,j+jb-1);
104                 const auto range3 = _(j+jb,n);
105 
106                 const auto A12 = A(range1,range2);
107                 const auto A13 = A(range1,range3);
108                 const auto U22 = A(range2,range2).upper();
109                 auto A22 = A(range2,range2).upper().symmetric();
110                 auto A23 = A(range2,range3);
111 //
112 //              Update and factorize the current diagonal block and test
113 //              for non-positive-definiteness.
114 //
115                 blas::rk(Trans, -One, A12, One, A22);
116                 info = potf2(A22);
117                 if (info!=0) {
118                     info += j-1;
119                     break;
120                 }
121                 if (j+jb<=n) {
122 //
123 //                  Compute the current block row.
124 //
125                     blas::mm(Trans, NoTrans, -One, A12, A13, One, A23);
126                     blas::sm(Left, Trans, One, U22, A23);
127                 }
128             }
129         } else {
130 //
131 //          Compute the Cholesky factorization A = L*L**T.
132 //
133             for (IndexType j=1; j<=n; j+=nb) {
134 //
135 //              Partition matrix
136 //
137                 IndexType jb = min(nb, n-j+1);
138 
139                 const auto range1 = _(1,j-1);
140                 const auto range2 = _(j,j+jb-1);
141                 const auto range3 = _(j+jb,n);
142 
143                 const auto A21 = A(range2,range1);
144                 const auto A31 = A(range3,range1);
145                 const auto L22 = A(range2,range2).lower();
146                 auto A22 = A(range2,range2).lower().symmetric();
147                 auto A32 = A(range3,range2);
148 //
149 //              Update and factorize the current diagonal block and test
150 //              for non-positive-definiteness.
151 //
152                 blas::rk(NoTrans, -One, A21, One, A22);
153                 info = potf2(A22);
154                 if (info!=0) {
155                     info += j-1;
156                     break;
157                 }
158                 if (j+jb<=n) {
159 //
160 //                  Compute the current block column.
161 //
162                     blas::mm(NoTrans, Trans, -One, A31, A21, One, A32);
163                     blas::sm(Right, Trans, One, L22, A32);
164                 }
165             }
166         }
167     }
168     return info;
169 }
170 
171 //== interface for native lapack ===============================================
172 
173 #ifdef CHECK_CXXLAPACK
174 
175 template <typename MA>
176 typename SyMatrix<MA>::IndexType
177 potrf_native(SyMatrix<MA> &_A)
178 {
179     typedef typename SyMatrix<MA>::ElementType  T;
180 
181     const char       UPLO = char(_A.upLo());
182     const INTEGER    N = _A.dim();
183     T               *A = _A.data();
184     const INTEGER    LDA = _A.leadingDimension();
185     INTEGER          INFO;
186 
187     if (IsSame<T, DOUBLE>::value) {
188         LAPACK_IMPL(dpotrf)(&UPLO, &N, A, &LDA, &INFO);
189     } else {
190         ASSERT(0);
191     }
192 
193     return INFO;
194 }
195 
196 #endif // CHECK_CXXLAPACK
197 
198 //== public interface ==========================================================
199 
200 template <typename MA>
201 typename SyMatrix<MA>::IndexType
202 potrf(SyMatrix<MA> &A)
203 {
204     typedef typename SyMatrix<MA>::IndexType    IndexType;
205 
206 //
207 //  Test the input parameters
208 //
209     ASSERT(A.firstRow()==1);
210     ASSERT(A.firstCol()==1);
211 
212 #   ifdef CHECK_CXXLAPACK
213 //
214 //  Make copies of output arguments
215 //
216     typename SyMatrix<MA>::NoView       _A      = A;
217 #   endif
218 
219 //
220 //  Call implementation
221 //
222     const IndexType info = potrf_generic(A);
223 
224 #   ifdef CHECK_CXXLAPACK
225 //
226 //  Compare results
227 //
228     const IndexType _info = potrf_native(_A);
229 
230     bool failed = false;
231     if (! isIdentical(A, _A, " A""_A")) {
232         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
233         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
234         failed = true;
235     }
236 
237     if (! isIdentical(info, _info, " info""_info")) {
238         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
239         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
240         failed = true;
241     }
242 
243     if (failed) {
244         ASSERT(0);
245     }
246 
247 #   endif
248 
249     return info;
250 }
251 
252 //-- forwarding ----------------------------------------------------------------
253 template <typename MA>
254 typename MA::IndexType
255 potrf(MA &&A)
256 {
257     typedef typename MA::IndexType  IndexType;
258 
259     CHECKPOINT_ENTER;
260     const IndexType info =  potrf(A);
261     CHECKPOINT_LEAVE;
262 
263     return info;
264 }
265 
266 } } // namespace lapack, flens
267 
268 #endif // FLENS_LAPACK_GESV_POTRF_TCC