1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_GESV_POTRF_TCC
44 #define FLENS_LAPACK_GESV_POTRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 typename SyMatrix<MA>::IndexType
58 potrf_generic(SyMatrix<MA> &A)
59 {
60 typedef typename SyMatrix<MA>::ElementType T;
61 typedef typename SyMatrix<MA>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const IndexType n = A.dim();
66 const bool upper = (A.upLo()==Upper);
67
68 const T One(1);
69
70 IndexType info = 0;
71 //
72 // Quick return if possible
73 //
74 if (n==0) {
75 return info;
76 }
77 //
78 // Determine the block size for this environment.
79 //
80 const char *upLo = (upper) ? "U" : "L";
81 IndexType nb = ilaenv<T>(1, "POGTRF", upLo, n);
82
83 if ((nb<=1) || (nb>=n)) {
84 //
85 // Use unblocked code.
86 //
87 info = potf2(A);
88 } else {
89 //
90 // Use blocked code.
91 //
92 if (upper) {
93 //
94 // Compute the Cholesky factorization A = U**T*U.
95 //
96 for (IndexType j=1; j<=n; j+=nb) {
97 //
98 // Partition matrix
99 //
100 IndexType jb = min(nb, n-j+1);
101
102 const auto range1 = _(1,j-1);
103 const auto range2 = _(j,j+jb-1);
104 const auto range3 = _(j+jb,n);
105
106 const auto A12 = A(range1,range2);
107 const auto A13 = A(range1,range3);
108 const auto U22 = A(range2,range2).upper();
109 auto A22 = A(range2,range2).upper().symmetric();
110 auto A23 = A(range2,range3);
111 //
112 // Update and factorize the current diagonal block and test
113 // for non-positive-definiteness.
114 //
115 blas::rk(Trans, -One, A12, One, A22);
116 info = potf2(A22);
117 if (info!=0) {
118 info += j-1;
119 break;
120 }
121 if (j+jb<=n) {
122 //
123 // Compute the current block row.
124 //
125 blas::mm(Trans, NoTrans, -One, A12, A13, One, A23);
126 blas::sm(Left, Trans, One, U22, A23);
127 }
128 }
129 } else {
130 //
131 // Compute the Cholesky factorization A = L*L**T.
132 //
133 for (IndexType j=1; j<=n; j+=nb) {
134 //
135 // Partition matrix
136 //
137 IndexType jb = min(nb, n-j+1);
138
139 const auto range1 = _(1,j-1);
140 const auto range2 = _(j,j+jb-1);
141 const auto range3 = _(j+jb,n);
142
143 const auto A21 = A(range2,range1);
144 const auto A31 = A(range3,range1);
145 const auto L22 = A(range2,range2).lower();
146 auto A22 = A(range2,range2).lower().symmetric();
147 auto A32 = A(range3,range2);
148 //
149 // Update and factorize the current diagonal block and test
150 // for non-positive-definiteness.
151 //
152 blas::rk(NoTrans, -One, A21, One, A22);
153 info = potf2(A22);
154 if (info!=0) {
155 info += j-1;
156 break;
157 }
158 if (j+jb<=n) {
159 //
160 // Compute the current block column.
161 //
162 blas::mm(NoTrans, Trans, -One, A31, A21, One, A32);
163 blas::sm(Right, Trans, One, L22, A32);
164 }
165 }
166 }
167 }
168 return info;
169 }
170
171 //== interface for native lapack ===============================================
172
173 #ifdef CHECK_CXXLAPACK
174
175 template <typename MA>
176 typename SyMatrix<MA>::IndexType
177 potrf_native(SyMatrix<MA> &_A)
178 {
179 typedef typename SyMatrix<MA>::ElementType T;
180
181 const char UPLO = char(_A.upLo());
182 const INTEGER N = _A.dim();
183 T *A = _A.data();
184 const INTEGER LDA = _A.leadingDimension();
185 INTEGER INFO;
186
187 if (IsSame<T, DOUBLE>::value) {
188 LAPACK_IMPL(dpotrf)(&UPLO, &N, A, &LDA, &INFO);
189 } else {
190 ASSERT(0);
191 }
192
193 return INFO;
194 }
195
196 #endif // CHECK_CXXLAPACK
197
198 //== public interface ==========================================================
199
200 template <typename MA>
201 typename SyMatrix<MA>::IndexType
202 potrf(SyMatrix<MA> &A)
203 {
204 typedef typename SyMatrix<MA>::IndexType IndexType;
205
206 //
207 // Test the input parameters
208 //
209 ASSERT(A.firstRow()==1);
210 ASSERT(A.firstCol()==1);
211
212 # ifdef CHECK_CXXLAPACK
213 //
214 // Make copies of output arguments
215 //
216 typename SyMatrix<MA>::NoView _A = A;
217 # endif
218
219 //
220 // Call implementation
221 //
222 const IndexType info = potrf_generic(A);
223
224 # ifdef CHECK_CXXLAPACK
225 //
226 // Compare results
227 //
228 const IndexType _info = potrf_native(_A);
229
230 bool failed = false;
231 if (! isIdentical(A, _A, " A", "_A")) {
232 std::cerr << "CXXLAPACK: A = " << A << std::endl;
233 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
234 failed = true;
235 }
236
237 if (! isIdentical(info, _info, " info", "_info")) {
238 std::cerr << "CXXLAPACK: info = " << info << std::endl;
239 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
240 failed = true;
241 }
242
243 if (failed) {
244 ASSERT(0);
245 }
246
247 # endif
248
249 return info;
250 }
251
252 //-- forwarding ----------------------------------------------------------------
253 template <typename MA>
254 typename MA::IndexType
255 potrf(MA &&A)
256 {
257 typedef typename MA::IndexType IndexType;
258
259 CHECKPOINT_ENTER;
260 const IndexType info = potrf(A);
261 CHECKPOINT_LEAVE;
262
263 return info;
264 }
265
266 } } // namespace lapack, flens
267
268 #endif // FLENS_LAPACK_GESV_POTRF_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_GESV_POTRF_TCC
44 #define FLENS_LAPACK_GESV_POTRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 typename SyMatrix<MA>::IndexType
58 potrf_generic(SyMatrix<MA> &A)
59 {
60 typedef typename SyMatrix<MA>::ElementType T;
61 typedef typename SyMatrix<MA>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const IndexType n = A.dim();
66 const bool upper = (A.upLo()==Upper);
67
68 const T One(1);
69
70 IndexType info = 0;
71 //
72 // Quick return if possible
73 //
74 if (n==0) {
75 return info;
76 }
77 //
78 // Determine the block size for this environment.
79 //
80 const char *upLo = (upper) ? "U" : "L";
81 IndexType nb = ilaenv<T>(1, "POGTRF", upLo, n);
82
83 if ((nb<=1) || (nb>=n)) {
84 //
85 // Use unblocked code.
86 //
87 info = potf2(A);
88 } else {
89 //
90 // Use blocked code.
91 //
92 if (upper) {
93 //
94 // Compute the Cholesky factorization A = U**T*U.
95 //
96 for (IndexType j=1; j<=n; j+=nb) {
97 //
98 // Partition matrix
99 //
100 IndexType jb = min(nb, n-j+1);
101
102 const auto range1 = _(1,j-1);
103 const auto range2 = _(j,j+jb-1);
104 const auto range3 = _(j+jb,n);
105
106 const auto A12 = A(range1,range2);
107 const auto A13 = A(range1,range3);
108 const auto U22 = A(range2,range2).upper();
109 auto A22 = A(range2,range2).upper().symmetric();
110 auto A23 = A(range2,range3);
111 //
112 // Update and factorize the current diagonal block and test
113 // for non-positive-definiteness.
114 //
115 blas::rk(Trans, -One, A12, One, A22);
116 info = potf2(A22);
117 if (info!=0) {
118 info += j-1;
119 break;
120 }
121 if (j+jb<=n) {
122 //
123 // Compute the current block row.
124 //
125 blas::mm(Trans, NoTrans, -One, A12, A13, One, A23);
126 blas::sm(Left, Trans, One, U22, A23);
127 }
128 }
129 } else {
130 //
131 // Compute the Cholesky factorization A = L*L**T.
132 //
133 for (IndexType j=1; j<=n; j+=nb) {
134 //
135 // Partition matrix
136 //
137 IndexType jb = min(nb, n-j+1);
138
139 const auto range1 = _(1,j-1);
140 const auto range2 = _(j,j+jb-1);
141 const auto range3 = _(j+jb,n);
142
143 const auto A21 = A(range2,range1);
144 const auto A31 = A(range3,range1);
145 const auto L22 = A(range2,range2).lower();
146 auto A22 = A(range2,range2).lower().symmetric();
147 auto A32 = A(range3,range2);
148 //
149 // Update and factorize the current diagonal block and test
150 // for non-positive-definiteness.
151 //
152 blas::rk(NoTrans, -One, A21, One, A22);
153 info = potf2(A22);
154 if (info!=0) {
155 info += j-1;
156 break;
157 }
158 if (j+jb<=n) {
159 //
160 // Compute the current block column.
161 //
162 blas::mm(NoTrans, Trans, -One, A31, A21, One, A32);
163 blas::sm(Right, Trans, One, L22, A32);
164 }
165 }
166 }
167 }
168 return info;
169 }
170
171 //== interface for native lapack ===============================================
172
173 #ifdef CHECK_CXXLAPACK
174
175 template <typename MA>
176 typename SyMatrix<MA>::IndexType
177 potrf_native(SyMatrix<MA> &_A)
178 {
179 typedef typename SyMatrix<MA>::ElementType T;
180
181 const char UPLO = char(_A.upLo());
182 const INTEGER N = _A.dim();
183 T *A = _A.data();
184 const INTEGER LDA = _A.leadingDimension();
185 INTEGER INFO;
186
187 if (IsSame<T, DOUBLE>::value) {
188 LAPACK_IMPL(dpotrf)(&UPLO, &N, A, &LDA, &INFO);
189 } else {
190 ASSERT(0);
191 }
192
193 return INFO;
194 }
195
196 #endif // CHECK_CXXLAPACK
197
198 //== public interface ==========================================================
199
200 template <typename MA>
201 typename SyMatrix<MA>::IndexType
202 potrf(SyMatrix<MA> &A)
203 {
204 typedef typename SyMatrix<MA>::IndexType IndexType;
205
206 //
207 // Test the input parameters
208 //
209 ASSERT(A.firstRow()==1);
210 ASSERT(A.firstCol()==1);
211
212 # ifdef CHECK_CXXLAPACK
213 //
214 // Make copies of output arguments
215 //
216 typename SyMatrix<MA>::NoView _A = A;
217 # endif
218
219 //
220 // Call implementation
221 //
222 const IndexType info = potrf_generic(A);
223
224 # ifdef CHECK_CXXLAPACK
225 //
226 // Compare results
227 //
228 const IndexType _info = potrf_native(_A);
229
230 bool failed = false;
231 if (! isIdentical(A, _A, " A", "_A")) {
232 std::cerr << "CXXLAPACK: A = " << A << std::endl;
233 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
234 failed = true;
235 }
236
237 if (! isIdentical(info, _info, " info", "_info")) {
238 std::cerr << "CXXLAPACK: info = " << info << std::endl;
239 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
240 failed = true;
241 }
242
243 if (failed) {
244 ASSERT(0);
245 }
246
247 # endif
248
249 return info;
250 }
251
252 //-- forwarding ----------------------------------------------------------------
253 template <typename MA>
254 typename MA::IndexType
255 potrf(MA &&A)
256 {
257 typedef typename MA::IndexType IndexType;
258
259 CHECKPOINT_ENTER;
260 const IndexType info = potrf(A);
261 CHECKPOINT_LEAVE;
262
263 return info;
264 }
265
266 } } // namespace lapack, flens
267
268 #endif // FLENS_LAPACK_GESV_POTRF_TCC