1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
44 #define FLENS_LAPACK_GESV_TRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA, typename VP>
57 typename GeMatrix<MA>::IndexType
58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
59 {
60 using std::max;
61 using std::min;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const Underscore<IndexType> _;
67
68 const IndexType m = A.numRows();
69 const IndexType n = A.numCols();
70
71 IndexType info = 0;
72 //
73 // Quick return if possible
74 //
75 if ((m==0) || (n==0)) {
76 return info;
77 }
78 //
79 // Determine the block size for this environment.
80 //
81 IndexType bs = ilaenv<T>(1, "GETRF", "", m, n, -1, -1);
82
83 if ((bs<=1) || (bs>=min(m,n))) {
84 //
85 // Use unblocked code.
86 //
87 info = tf2(A, piv);
88 } else {
89 //
90 // Use blocked code.
91 //
92 for (IndexType j=1; j<=min(m,n); j+=bs) {
93 IndexType jb = min(min(m,n)-j+1, bs);
94 //
95 // Row and column partitioning of A
96 //
97 const auto rows1 = _( j, j+jb-1);
98 const auto rows2 = _( j+jb, m);
99
100 const auto rows12 = _( j, m);
101
102 const auto cols0 = _( 1, j-1);
103 const auto cols1 = _( j, j+jb-1);
104 const auto cols2 = _( j+jb, n);
105 //
106 // Factor diagonal and subdiagonal blocks and test for exact
107 // singularity.
108 //
109 IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 // Adjust INFO and the pivot indices.
112 //
113 if ((info==0) && (_info>0)) {
114 info = _info + j - 1;
115 }
116 for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117 piv(i) += j-1;
118 }
119 //
120 // Apply interchanges to columns 1:J-1.
121 //
122 laswp(A(_,cols0), piv(rows1, j));
123
124 if (j+jb<=n) {
125 //
126 // Apply interchanges to columns J+JB:N.
127 //
128 laswp(A(_,cols2), piv(rows1, j));
129 //
130 // Compute block row of U.
131 //
132 blas::sm(Left, NoTrans, T(1),
133 A(rows1, cols1).lowerUnit(),
134 A(rows1, cols2));
135
136 if (j+jb<=m) {
137 //
138 // Update trailing submatrix.
139 //
140 blas::mm(NoTrans, NoTrans,
141 T(-1), A(rows2, cols1), A(rows1, cols2),
142 T( 1), A(rows2, cols2));
143 }
144 }
145 }
146 }
147 return info;
148 }
149
150 //== interface for native lapack ===============================================
151
152 #ifdef CHECK_CXXLAPACK
153
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &A, DenseVector<VP> &piv)
157 {
158 return cxxlapack::getrf(A.numRows(), A.numCols(),
159 A.data(), A.leadingDimension(),
160 piv.data());
161 }
162
163 #endif // CHECK_CXXLAPACK
164
165 //== public interface ==========================================================
166
167 template <typename MA, typename VP>
168 typename GeMatrix<MA>::IndexType
169 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
170 {
171 typedef typename GeMatrix<MA>::IndexType IndexType;
172
173 //
174 // Test the input parameters
175 //
176 ASSERT(A.firstRow()==1);
177 ASSERT(A.firstCol()==1);
178 ASSERT((piv.inc()>0 && piv.firstIndex()==1)
179 || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
180
181 # ifdef CHECK_CXXLAPACK
182 //
183 // Make copies of output arguments
184 //
185 typename GeMatrix<MA>::NoView _A = A;
186 typename DenseVector<VP>::NoView _piv = piv;
187 # endif
188
189 //
190 // Call implementation
191 //
192 IndexType info = trf_generic(A, piv);
193
194 # ifdef CHECK_CXXLAPACK
195 //
196 // Compare results
197 //
198 IndexType _info = trf_native(_A, _piv);
199
200 bool failed = false;
201 if (! isIdentical(A, _A, " A", "_A")) {
202 std::cerr << "CXXLAPACK: A = " << A << std::endl;
203 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
204 failed = true;
205 }
206
207 if (! isIdentical(piv, _piv, " piv", "_piv")) {
208 std::cerr << "CXXLAPACK: piv = " << piv << std::endl;
209 std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
210 failed = true;
211 }
212
213 if (! isIdentical(info, _info, " info", "_info")) {
214 std::cerr << "CXXLAPACK: info = " << info << std::endl;
215 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
216 failed = true;
217 }
218
219 if (failed) {
220 ASSERT(0);
221 }
222
223 # endif
224
225 return info;
226 }
227
228 //-- forwarding ----------------------------------------------------------------
229 template <typename MA, typename VP>
230 typename MA::IndexType
231 trf(MA &&A, VP &&piv)
232 {
233 typedef typename MA::IndexType IndexType;
234
235 CHECKPOINT_ENTER;
236 IndexType info = trf(A, piv);
237 CHECKPOINT_LEAVE;
238
239 return info;
240 }
241
242 } } // namespace lapack, flens
243
244 #endif // FLENS_LAPACK_GESV_TRF_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_GESV_TRF_TCC
44 #define FLENS_LAPACK_GESV_TRF_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA, typename VP>
57 typename GeMatrix<MA>::IndexType
58 trf_generic(GeMatrix<MA> &A, DenseVector<VP> &piv)
59 {
60 using std::max;
61 using std::min;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const Underscore<IndexType> _;
67
68 const IndexType m = A.numRows();
69 const IndexType n = A.numCols();
70
71 IndexType info = 0;
72 //
73 // Quick return if possible
74 //
75 if ((m==0) || (n==0)) {
76 return info;
77 }
78 //
79 // Determine the block size for this environment.
80 //
81 IndexType bs = ilaenv<T>(1, "GETRF", "", m, n, -1, -1);
82
83 if ((bs<=1) || (bs>=min(m,n))) {
84 //
85 // Use unblocked code.
86 //
87 info = tf2(A, piv);
88 } else {
89 //
90 // Use blocked code.
91 //
92 for (IndexType j=1; j<=min(m,n); j+=bs) {
93 IndexType jb = min(min(m,n)-j+1, bs);
94 //
95 // Row and column partitioning of A
96 //
97 const auto rows1 = _( j, j+jb-1);
98 const auto rows2 = _( j+jb, m);
99
100 const auto rows12 = _( j, m);
101
102 const auto cols0 = _( 1, j-1);
103 const auto cols1 = _( j, j+jb-1);
104 const auto cols2 = _( j+jb, n);
105 //
106 // Factor diagonal and subdiagonal blocks and test for exact
107 // singularity.
108 //
109 IndexType _info = tf2(A(rows12, cols1), piv(rows12));
110 //
111 // Adjust INFO and the pivot indices.
112 //
113 if ((info==0) && (_info>0)) {
114 info = _info + j - 1;
115 }
116 for (IndexType i=j; i<=min(m,j+jb-1); ++i) {
117 piv(i) += j-1;
118 }
119 //
120 // Apply interchanges to columns 1:J-1.
121 //
122 laswp(A(_,cols0), piv(rows1, j));
123
124 if (j+jb<=n) {
125 //
126 // Apply interchanges to columns J+JB:N.
127 //
128 laswp(A(_,cols2), piv(rows1, j));
129 //
130 // Compute block row of U.
131 //
132 blas::sm(Left, NoTrans, T(1),
133 A(rows1, cols1).lowerUnit(),
134 A(rows1, cols2));
135
136 if (j+jb<=m) {
137 //
138 // Update trailing submatrix.
139 //
140 blas::mm(NoTrans, NoTrans,
141 T(-1), A(rows2, cols1), A(rows1, cols2),
142 T( 1), A(rows2, cols2));
143 }
144 }
145 }
146 }
147 return info;
148 }
149
150 //== interface for native lapack ===============================================
151
152 #ifdef CHECK_CXXLAPACK
153
154 template <typename MA, typename VP>
155 typename GeMatrix<MA>::IndexType
156 trf_native(GeMatrix<MA> &A, DenseVector<VP> &piv)
157 {
158 return cxxlapack::getrf(A.numRows(), A.numCols(),
159 A.data(), A.leadingDimension(),
160 piv.data());
161 }
162
163 #endif // CHECK_CXXLAPACK
164
165 //== public interface ==========================================================
166
167 template <typename MA, typename VP>
168 typename GeMatrix<MA>::IndexType
169 trf(GeMatrix<MA> &A, DenseVector<VP> &piv)
170 {
171 typedef typename GeMatrix<MA>::IndexType IndexType;
172
173 //
174 // Test the input parameters
175 //
176 ASSERT(A.firstRow()==1);
177 ASSERT(A.firstCol()==1);
178 ASSERT((piv.inc()>0 && piv.firstIndex()==1)
179 || (piv.inc()<0 && piv.firstIndex()==A.numRows()));
180
181 # ifdef CHECK_CXXLAPACK
182 //
183 // Make copies of output arguments
184 //
185 typename GeMatrix<MA>::NoView _A = A;
186 typename DenseVector<VP>::NoView _piv = piv;
187 # endif
188
189 //
190 // Call implementation
191 //
192 IndexType info = trf_generic(A, piv);
193
194 # ifdef CHECK_CXXLAPACK
195 //
196 // Compare results
197 //
198 IndexType _info = trf_native(_A, _piv);
199
200 bool failed = false;
201 if (! isIdentical(A, _A, " A", "_A")) {
202 std::cerr << "CXXLAPACK: A = " << A << std::endl;
203 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
204 failed = true;
205 }
206
207 if (! isIdentical(piv, _piv, " piv", "_piv")) {
208 std::cerr << "CXXLAPACK: piv = " << piv << std::endl;
209 std::cerr << "F77LAPACK: _piv = " << _piv << std::endl;
210 failed = true;
211 }
212
213 if (! isIdentical(info, _info, " info", "_info")) {
214 std::cerr << "CXXLAPACK: info = " << info << std::endl;
215 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
216 failed = true;
217 }
218
219 if (failed) {
220 ASSERT(0);
221 }
222
223 # endif
224
225 return info;
226 }
227
228 //-- forwarding ----------------------------------------------------------------
229 template <typename MA, typename VP>
230 typename MA::IndexType
231 trf(MA &&A, VP &&piv)
232 {
233 typedef typename MA::IndexType IndexType;
234
235 CHECKPOINT_ENTER;
236 IndexType info = trf(A, piv);
237 CHECKPOINT_LEAVE;
238
239 return info;
240 }
241
242 } } // namespace lapack, flens
243
244 #endif // FLENS_LAPACK_GESV_TRF_TCC