1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
44 #define FLENS_LAPACK_GESV_TRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA, typename VP>
57 typename GeMatrix<MA>::IndexType
58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
59 {
60 using std::max;
61 using std::min;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const Underscore<IndexType> _;
67
68 const IndexType m = A.numRows();
69 const IndexType n = A.numCols();
70
71 IndexType info = 0;
72 //
73 // Quick return if possible
74 //
75 if ((m==0) || (n==0)) {
76 return info;
77 }
78 //
79 // Determine the block size for this environment.
80 //
81 IndexType bs = ilaenv<T>(1, "GETRF", "", m, n, -1, -1);
82
83 if ((bs<=1) || (bs>=min(m,n))) {
84 //
85 // Use unblocked code.
86 //
87 info = tf2(A, piv);
88 } else {
89 //
90 // Use blocked code.
91 //
92 for (IndexType j=1; j<=min(m,n); j+=bs) {
93 IndexType jb = min(min(m,n)-j+1, bs);
94 //
95 // Row and column partitioning of A
96 //
97 const auto rows1 = _( j, j+jb-1);
98 const auto rows2 = _( j+jb, m);
99
100 const auto rows12 = _( j, m);
101
102 const auto cols0 = _( 1, j-1);
103 const auto cols1 = _( j, j+jb-1);
104 const auto cols2 = _( j+jb, n);
105 //
106 // Factor diagonal and subdiagonal blocks and test for exact
107 // singularity.
108 //
109 IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 // Adjust INFO and the pivot indices.
112 //
113 if ((info==0) && (_info>0)) {
114 info = _info + j - 1;
115 }
116 for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117 piv(i) += j-1;
118 }
119 //
120 // Apply interchanges to columns 1:J-1.
121 //
122 laswp(A(_,cols0), piv(rows1, j));
123
124 if (j+jb<=n) {
125 //
126 // Apply interchanges to columns J+JB:N.
127 //
128 laswp(A(_,cols2), piv(rows1, j));
129 //
130 // Compute block row of U.
131 //
132 blas::sm(Left, NoTrans, T(1),
133 A(rows1, cols1).lowerUnit(),
134 A(rows1, cols2));
135
136 if (j+jb<=m) {
137 //
138 // Update trailing submatrix.
139 //
140 blas::mm(NoTrans, NoTrans,
141 T(-1), A(rows2, cols1), A(rows1, cols2),
142 T( 1), A(rows2, cols2));
143 }
144 }
145 }
146 }
147 return info;
148 }
149
150 //== interface for native lapack ===============================================
151
152 #ifdef CHECK_CXXLAPACK
153
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &_A, DenseVector<VP> &piv)
157 {
158 typedef typename GeMatrix<MA>::ElementType T;
159
160 const INTEGER M = _A.numRows();
161 const INTEGER N = _A.numCols();
162 T *A = _A.data();
163 const INTEGER LDA = _A.leadingDimension();
164 INTEGER *IPIV = piv.data();
165 INTEGER INFO;
166
167 if (IsSame<T, DOUBLE>::value) {
168 LAPACK_IMPL(dgetrf)(&M, &N, A, &LDA, IPIV, &INFO);
169 } else {
170 ASSERT(0);
171 }
172
173 return INFO;
174 }
175
176 #endif // CHECK_CXXLAPACK
177
178 //== public interface ==========================================================
179
180 template <typename MA, typename VP>
181 typename GeMatrix<MA>::IndexType
182 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
183 {
184 typedef typename GeMatrix<MA>::IndexType IndexType;
185
186 //
187 // Test the input parameters
188 //
189 ASSERT(A.firstRow()==1);
190 ASSERT(A.firstCol()==1);
191 ASSERT((piv.inc()>0 && piv.firstIndex()==1)
192 || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
193
194 # ifdef CHECK_CXXLAPACK
195 //
196 // Make copies of output arguments
197 //
198 typename GeMatrix<MA>::NoView _A = A;
199 typename DenseVector<VP>::NoView _piv = piv;
200 # endif
201
202 //
203 // Call implementation
204 //
205 IndexType info = trf_generic(A, piv);
206
207 # ifdef CHECK_CXXLAPACK
208 //
209 // Compare results
210 //
211 IndexType _info = trf_native(_A, _piv);
212
213 bool failed = false;
214 if (! isIdentical(A, _A, " A", "_A")) {
215 std::cerr << "CXXLAPACK: A = " << A << std::endl;
216 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
217 failed = true;
218 }
219
220 if (! isIdentical(piv, _piv, " piv", "_piv")) {
221 std::cerr << "CXXLAPACK: piv = " << piv << std::endl;
222 std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
223 failed = true;
224 }
225
226 if (! isIdentical(info, _info, " info", "_info")) {
227 std::cerr << "CXXLAPACK: info = " << info << std::endl;
228 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
229 failed = true;
230 }
231
232 if (failed) {
233 ASSERT(0);
234 }
235
236 # endif
237
238 return info;
239 }
240
241 //-- forwarding ----------------------------------------------------------------
242 template <typename MA, typename VP>
243 typename MA::IndexType
244 trf(MA &&A, VP &&piv)
245 {
246 typedef typename MA::IndexType IndexType;
247
248 CHECKPOINT_ENTER;
249 IndexType info = trf(A, piv);
250 CHECKPOINT_LEAVE;
251
252 return info;
253 }
254
255 } } // namespace lapack, flens
256
257 #endif // FLENS_LAPACK_GESV_TRF_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
44 #define FLENS_LAPACK_GESV_TRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA, typename VP>
57 typename GeMatrix<MA>::IndexType
58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
59 {
60 using std::max;
61 using std::min;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const Underscore<IndexType> _;
67
68 const IndexType m = A.numRows();
69 const IndexType n = A.numCols();
70
71 IndexType info = 0;
72 //
73 // Quick return if possible
74 //
75 if ((m==0) || (n==0)) {
76 return info;
77 }
78 //
79 // Determine the block size for this environment.
80 //
81 IndexType bs = ilaenv<T>(1, "GETRF", "", m, n, -1, -1);
82
83 if ((bs<=1) || (bs>=min(m,n))) {
84 //
85 // Use unblocked code.
86 //
87 info = tf2(A, piv);
88 } else {
89 //
90 // Use blocked code.
91 //
92 for (IndexType j=1; j<=min(m,n); j+=bs) {
93 IndexType jb = min(min(m,n)-j+1, bs);
94 //
95 // Row and column partitioning of A
96 //
97 const auto rows1 = _( j, j+jb-1);
98 const auto rows2 = _( j+jb, m);
99
100 const auto rows12 = _( j, m);
101
102 const auto cols0 = _( 1, j-1);
103 const auto cols1 = _( j, j+jb-1);
104 const auto cols2 = _( j+jb, n);
105 //
106 // Factor diagonal and subdiagonal blocks and test for exact
107 // singularity.
108 //
109 IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 // Adjust INFO and the pivot indices.
112 //
113 if ((info==0) && (_info>0)) {
114 info = _info + j - 1;
115 }
116 for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117 piv(i) += j-1;
118 }
119 //
120 // Apply interchanges to columns 1:J-1.
121 //
122 laswp(A(_,cols0), piv(rows1, j));
123
124 if (j+jb<=n) {
125 //
126 // Apply interchanges to columns J+JB:N.
127 //
128 laswp(A(_,cols2), piv(rows1, j));
129 //
130 // Compute block row of U.
131 //
132 blas::sm(Left, NoTrans, T(1),
133 A(rows1, cols1).lowerUnit(),
134 A(rows1, cols2));
135
136 if (j+jb<=m) {
137 //
138 // Update trailing submatrix.
139 //
140 blas::mm(NoTrans, NoTrans,
141 T(-1), A(rows2, cols1), A(rows1, cols2),
142 T( 1), A(rows2, cols2));
143 }
144 }
145 }
146 }
147 return info;
148 }
149
150 //== interface for native lapack ===============================================
151
152 #ifdef CHECK_CXXLAPACK
153
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &_A, DenseVector<VP> &piv)
157 {
158 typedef typename GeMatrix<MA>::ElementType T;
159
160 const INTEGER M = _A.numRows();
161 const INTEGER N = _A.numCols();
162 T *A = _A.data();
163 const INTEGER LDA = _A.leadingDimension();
164 INTEGER *IPIV = piv.data();
165 INTEGER INFO;
166
167 if (IsSame<T, DOUBLE>::value) {
168 LAPACK_IMPL(dgetrf)(&M, &N, A, &LDA, IPIV, &INFO);
169 } else {
170 ASSERT(0);
171 }
172
173 return INFO;
174 }
175
176 #endif // CHECK_CXXLAPACK
177
178 //== public interface ==========================================================
179
180 template <typename MA, typename VP>
181 typename GeMatrix<MA>::IndexType
182 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
183 {
184 typedef typename GeMatrix<MA>::IndexType IndexType;
185
186 //
187 // Test the input parameters
188 //
189 ASSERT(A.firstRow()==1);
190 ASSERT(A.firstCol()==1);
191 ASSERT((piv.inc()>0 && piv.firstIndex()==1)
192 || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
193
194 # ifdef CHECK_CXXLAPACK
195 //
196 // Make copies of output arguments
197 //
198 typename GeMatrix<MA>::NoView _A = A;
199 typename DenseVector<VP>::NoView _piv = piv;
200 # endif
201
202 //
203 // Call implementation
204 //
205 IndexType info = trf_generic(A, piv);
206
207 # ifdef CHECK_CXXLAPACK
208 //
209 // Compare results
210 //
211 IndexType _info = trf_native(_A, _piv);
212
213 bool failed = false;
214 if (! isIdentical(A, _A, " A", "_A")) {
215 std::cerr << "CXXLAPACK: A = " << A << std::endl;
216 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
217 failed = true;
218 }
219
220 if (! isIdentical(piv, _piv, " piv", "_piv")) {
221 std::cerr << "CXXLAPACK: piv = " << piv << std::endl;
222 std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
223 failed = true;
224 }
225
226 if (! isIdentical(info, _info, " info", "_info")) {
227 std::cerr << "CXXLAPACK: info = " << info << std::endl;
228 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
229 failed = true;
230 }
231
232 if (failed) {
233 ASSERT(0);
234 }
235
236 # endif
237
238 return info;
239 }
240
241 //-- forwarding ----------------------------------------------------------------
242 template <typename MA, typename VP>
243 typename MA::IndexType
244 trf(MA &&A, VP &&piv)
245 {
246 typedef typename MA::IndexType IndexType;
247
248 CHECKPOINT_ENTER;
249 IndexType info = trf(A, piv);
250 CHECKPOINT_LEAVE;
251
252 return info;
253 }
254
255 } } // namespace lapack, flens
256
257 #endif // FLENS_LAPACK_GESV_TRF_TCC