1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_QR_QRF_TCC
44 #define FLENS_LAPACK_QR_QRF_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename MA, typename VTAU, typename VWORK>
54 void
55 qrf_generic(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
56 {
57 using std::max;
58 using std::min;
59
60 typedef typename GeMatrix<MA>::ElementType T;
61 typedef typename GeMatrix<MA>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const IndexType m = A.numRows();
66 const IndexType n = A.numCols();
67 const IndexType k = min(m, n);
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "GEQRF", "", m, n);
72
73 const IndexType lWorkOpt = n*nb;
74 if (work.length()==0) {
75 work.resize(max(lWorkOpt,IndexType(1)));
76 work(1)=lWorkOpt;
77 }
78 //
79 // Quick return if possible
80 //
81 if (k==0) {
82 work(1) = 1;
83 return;
84 }
85
86 IndexType nbMin = 2;
87 IndexType nx = 0;
88 IndexType iws = n;
89 IndexType ldWork = -1;
90
91 if ((nb>1) && (nb<k)) {
92 //
93 // Determine when to cross over from blocked to unblocked code.
94 //
95 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "GEQRF", "", m, n)));
96 if (nx<k) {
97 //
98 // Determine if workspace is large enough for blocked code.
99 //
100 ldWork = n;
101 iws = ldWork*nb;
102 if (work.length()<iws) {
103 //
104 // Not enough workspace to use optimal NB: reduce NB and
105 // determine the minimum value of NB.
106 //
107 nb = work.length() / ldWork;
108 nbMin = max(IndexType(2),
109 IndexType(ilaenv<T>(2, "GEQRF", "", m, n)));
110 }
111 }
112 }
113
114 IndexType i;
115 if ((nb>=nbMin) && (nb<k) && (nx<k)) {
116 typename GeMatrix<MA>::View Work(n, nb, work);
117 //
118 // Use blocked code initially
119 //
120 for (i=1; i<=k-nx; i+=nb) {
121 const IndexType ib = min(k-i+1, nb);
122 //
123 // Compute the QR factorization of the current block
124 // A(i:m,i:i+ib-1)
125 //
126 qr2(A(_(i,m),_(i,i+ib-1)), tau(_(i,i+ib-1)), work(_(1,ib)));
127 if (i+ib<=n) {
128 //
129 // Form the triangular factor of the block reflector
130 // H = H(i) H(i+1) . . . H(i+ib-1)
131 //
132 auto Tr = Work(_(1,ib),_(1,ib)).upper();
133 larft(Forward, ColumnWise,
134 m-i+1,
135 A(_(i,m),_(i,i+ib-1)),
136 tau(_(i,i+ib-1)),
137 Tr);
138 //
139 // Apply H' to A(i:m,i+ib:n) from the left
140 //
141 larfb(Left, Trans, Forward, ColumnWise,
142 A(_(i,m),_(i,i+ib-1)),
143 Tr,
144 A(_(i,m),_(i+ib,n)),
145 Work(_(ib+1,n),_(1,ib)));
146 }
147 }
148 } else {
149 i = 1;
150 }
151
152 //
153 // Use unblocked code to factor the last or only block.
154 //
155 if (i<=k) {
156 qr2(A(_(i,m),_(i,n)), tau(_(i,k)), work(_(1,n-i+1)));
157 }
158 work(1) = iws;
159 }
160
161 //== interface for native lapack ===============================================
162
163 #ifdef CHECK_CXXLAPACK
164
165 template <typename MA, typename VTAU, typename VWORK>
166 void
167 qrf_native(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
168 {
169 typedef typename GeMatrix<MA>::ElementType T;
170
171 const INTEGER M = A.numRows();
172 const INTEGER N = A.numCols();
173 const INTEGER LDA = A.leadingDimension();
174 INTEGER LWORK = work.length();
175 INTEGER INFO;
176
177 if (IsSame<T, DOUBLE>::value) {
178 LAPACK_IMPL(dgeqrf)(&M, &N, A.data(), &LDA,
179 tau.data(),
180 work.data(), &LWORK,
181 &INFO);
182 assert(INFO==0);
183 } else {
184 ASSERT(0);
185 }
186 }
187
188 #endif // CHECK_CXXLAPACK
189
190 //== public interface ==========================================================
191
192 template <typename MA, typename VTAU, typename VWORK>
193 void
194 qrf(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
195 {
196 using std::min;
197 typedef typename GeMatrix<MA>::ElementType ElementType;
198 typedef typename GeMatrix<MA>::IndexType IndexType;
199
200 # ifndef NDEBUG
201 //
202 // Test the input parameters
203 //
204 ASSERT(A.firstRow()==1);
205 ASSERT(A.firstCol()==1);
206 ASSERT(tau.firstIndex()==1);
207 ASSERT(work.firstIndex()==1);
208
209 const IndexType m = A.numRows();
210 const IndexType n = A.numCols();
211 const IndexType k = min(m,n);
212
213 ASSERT(tau.length()==k);
214 ASSERT(work.length()>=n || work.length()==IndexType(0));
215 # endif
216
217 # ifdef CHECK_CXXLAPACK
218 //
219 // Make copies of output arguments
220 //
221 typename GeMatrix<MA>::NoView A_org = A;
222 typename DenseVector<VTAU>::NoView tau_org = tau;
223 typename DenseVector<VWORK>::NoView work_org = work;
224 # endif
225
226 //
227 // Call implementation
228 //
229 qrf_generic(A, tau, work);
230
231 # ifdef CHECK_CXXLAPACK
232 //
233 // Restore output arguments
234 //
235 typename GeMatrix<MA>::NoView A_generic = A;
236 typename DenseVector<VTAU>::NoView tau_generic = tau;
237 typename DenseVector<VWORK>::NoView work_generic = work;
238
239 A = A_org;
240 tau = tau_org;
241
242 // if the generic implementation resized work due to a work size query
243 // we must not restore the work array
244 if (work_org.length()>0) {
245 work = work_org;
246 } else {
247 work = 0;
248 }
249 //
250 // Compare results
251 //
252 qrf_native(A, tau, work);
253
254 bool failed = false;
255 if (! isIdentical(A_generic, A, "A_generic", "A")) {
256 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
257 std::cerr << "F77LAPACK: A = " << A << std::endl;
258 failed = true;
259 }
260
261 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
262 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
263 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
264 failed = true;
265 }
266
267 if (! isIdentical(work_generic, work, "work_generic", "work")) {
268 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
269 std::cerr << "F77LAPACK: work = " << work << std::endl;
270 failed = true;
271 }
272
273 if (failed) {
274 ASSERT(0);
275 }
276 # endif
277 }
278
279 //-- forwarding ----------------------------------------------------------------
280 template <typename MA, typename VTAU, typename VWORK>
281 void
282 qrf(MA &&A, VTAU &&tau, VWORK &&work)
283 {
284 CHECKPOINT_ENTER;
285 qrf(A, tau, work);
286 CHECKPOINT_LEAVE;
287 }
288
289 } } // namespace lapack, flens
290
291 #endif // FLENS_LAPACK_QR_QRF_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.2) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_QR_QRF_TCC
44 #define FLENS_LAPACK_QR_QRF_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename MA, typename VTAU, typename VWORK>
54 void
55 qrf_generic(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
56 {
57 using std::max;
58 using std::min;
59
60 typedef typename GeMatrix<MA>::ElementType T;
61 typedef typename GeMatrix<MA>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const IndexType m = A.numRows();
66 const IndexType n = A.numCols();
67 const IndexType k = min(m, n);
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "GEQRF", "", m, n);
72
73 const IndexType lWorkOpt = n*nb;
74 if (work.length()==0) {
75 work.resize(max(lWorkOpt,IndexType(1)));
76 work(1)=lWorkOpt;
77 }
78 //
79 // Quick return if possible
80 //
81 if (k==0) {
82 work(1) = 1;
83 return;
84 }
85
86 IndexType nbMin = 2;
87 IndexType nx = 0;
88 IndexType iws = n;
89 IndexType ldWork = -1;
90
91 if ((nb>1) && (nb<k)) {
92 //
93 // Determine when to cross over from blocked to unblocked code.
94 //
95 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "GEQRF", "", m, n)));
96 if (nx<k) {
97 //
98 // Determine if workspace is large enough for blocked code.
99 //
100 ldWork = n;
101 iws = ldWork*nb;
102 if (work.length()<iws) {
103 //
104 // Not enough workspace to use optimal NB: reduce NB and
105 // determine the minimum value of NB.
106 //
107 nb = work.length() / ldWork;
108 nbMin = max(IndexType(2),
109 IndexType(ilaenv<T>(2, "GEQRF", "", m, n)));
110 }
111 }
112 }
113
114 IndexType i;
115 if ((nb>=nbMin) && (nb<k) && (nx<k)) {
116 typename GeMatrix<MA>::View Work(n, nb, work);
117 //
118 // Use blocked code initially
119 //
120 for (i=1; i<=k-nx; i+=nb) {
121 const IndexType ib = min(k-i+1, nb);
122 //
123 // Compute the QR factorization of the current block
124 // A(i:m,i:i+ib-1)
125 //
126 qr2(A(_(i,m),_(i,i+ib-1)), tau(_(i,i+ib-1)), work(_(1,ib)));
127 if (i+ib<=n) {
128 //
129 // Form the triangular factor of the block reflector
130 // H = H(i) H(i+1) . . . H(i+ib-1)
131 //
132 auto Tr = Work(_(1,ib),_(1,ib)).upper();
133 larft(Forward, ColumnWise,
134 m-i+1,
135 A(_(i,m),_(i,i+ib-1)),
136 tau(_(i,i+ib-1)),
137 Tr);
138 //
139 // Apply H' to A(i:m,i+ib:n) from the left
140 //
141 larfb(Left, Trans, Forward, ColumnWise,
142 A(_(i,m),_(i,i+ib-1)),
143 Tr,
144 A(_(i,m),_(i+ib,n)),
145 Work(_(ib+1,n),_(1,ib)));
146 }
147 }
148 } else {
149 i = 1;
150 }
151
152 //
153 // Use unblocked code to factor the last or only block.
154 //
155 if (i<=k) {
156 qr2(A(_(i,m),_(i,n)), tau(_(i,k)), work(_(1,n-i+1)));
157 }
158 work(1) = iws;
159 }
160
161 //== interface for native lapack ===============================================
162
163 #ifdef CHECK_CXXLAPACK
164
165 template <typename MA, typename VTAU, typename VWORK>
166 void
167 qrf_native(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
168 {
169 typedef typename GeMatrix<MA>::ElementType T;
170
171 const INTEGER M = A.numRows();
172 const INTEGER N = A.numCols();
173 const INTEGER LDA = A.leadingDimension();
174 INTEGER LWORK = work.length();
175 INTEGER INFO;
176
177 if (IsSame<T, DOUBLE>::value) {
178 LAPACK_IMPL(dgeqrf)(&M, &N, A.data(), &LDA,
179 tau.data(),
180 work.data(), &LWORK,
181 &INFO);
182 assert(INFO==0);
183 } else {
184 ASSERT(0);
185 }
186 }
187
188 #endif // CHECK_CXXLAPACK
189
190 //== public interface ==========================================================
191
192 template <typename MA, typename VTAU, typename VWORK>
193 void
194 qrf(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
195 {
196 using std::min;
197 typedef typename GeMatrix<MA>::ElementType ElementType;
198 typedef typename GeMatrix<MA>::IndexType IndexType;
199
200 # ifndef NDEBUG
201 //
202 // Test the input parameters
203 //
204 ASSERT(A.firstRow()==1);
205 ASSERT(A.firstCol()==1);
206 ASSERT(tau.firstIndex()==1);
207 ASSERT(work.firstIndex()==1);
208
209 const IndexType m = A.numRows();
210 const IndexType n = A.numCols();
211 const IndexType k = min(m,n);
212
213 ASSERT(tau.length()==k);
214 ASSERT(work.length()>=n || work.length()==IndexType(0));
215 # endif
216
217 # ifdef CHECK_CXXLAPACK
218 //
219 // Make copies of output arguments
220 //
221 typename GeMatrix<MA>::NoView A_org = A;
222 typename DenseVector<VTAU>::NoView tau_org = tau;
223 typename DenseVector<VWORK>::NoView work_org = work;
224 # endif
225
226 //
227 // Call implementation
228 //
229 qrf_generic(A, tau, work);
230
231 # ifdef CHECK_CXXLAPACK
232 //
233 // Restore output arguments
234 //
235 typename GeMatrix<MA>::NoView A_generic = A;
236 typename DenseVector<VTAU>::NoView tau_generic = tau;
237 typename DenseVector<VWORK>::NoView work_generic = work;
238
239 A = A_org;
240 tau = tau_org;
241
242 // if the generic implementation resized work due to a work size query
243 // we must not restore the work array
244 if (work_org.length()>0) {
245 work = work_org;
246 } else {
247 work = 0;
248 }
249 //
250 // Compare results
251 //
252 qrf_native(A, tau, work);
253
254 bool failed = false;
255 if (! isIdentical(A_generic, A, "A_generic", "A")) {
256 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
257 std::cerr << "F77LAPACK: A = " << A << std::endl;
258 failed = true;
259 }
260
261 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
262 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
263 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
264 failed = true;
265 }
266
267 if (! isIdentical(work_generic, work, "work_generic", "work")) {
268 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
269 std::cerr << "F77LAPACK: work = " << work << std::endl;
270 failed = true;
271 }
272
273 if (failed) {
274 ASSERT(0);
275 }
276 # endif
277 }
278
279 //-- forwarding ----------------------------------------------------------------
280 template <typename MA, typename VTAU, typename VWORK>
281 void
282 qrf(MA &&A, VTAU &&tau, VWORK &&work)
283 {
284 CHECKPOINT_ENTER;
285 qrf(A, tau, work);
286 CHECKPOINT_LEAVE;
287 }
288
289 } } // namespace lapack, flens
290
291 #endif // FLENS_LAPACK_QR_QRF_TCC