1 /*
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_LQ_LQF_TCC
44 #define FLENS_LAPACK_LQ_LQF_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52 template <typename MA, typename VTAU, typename VWORK>
53 void
54 lqf_generic(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
55 {
56 using std::max;
57 using std::min;
58
59 typedef typename GeMatrix<MA>::ElementType T;
60 typedef typename GeMatrix<MA>::IndexType IndexType;
61
62 const Underscore<IndexType> _;
63
64 const IndexType m = A.numRows();
65 const IndexType n = A.numCols();
66 const IndexType k = min(m, n);
67
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "GELQF", "", m, n);
72
73 const IndexType lWorkOpt = m*nb;
74 if (work.length()==0) {
75 work.resize(max(lWorkOpt,IndexType(1)));
76 work(1)=lWorkOpt;
77 }
78
79 //
80 // Quick return if possible
81 //
82 if (k==0) {
83 work(1) = 1;
84 return;
85 }
86
87 IndexType nbMin = 2;
88 IndexType nx = 0;
89 IndexType iws = m;
90 IndexType ldWork = -1;
91
92 if ((nb>1) && (nb<k)) {
93 //
94 // Determine when to cross over from blocked to unblocked code.
95 //
96 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "GELQF", "", m, n)));
97 if (nx<k) {
98 //
99 // Determine if workspace is large enough for blocked code.
100 //
101 ldWork = m;
102 iws = ldWork*nb;
103 if (work.length()<iws) {
104 //
105 // Not enough workspace to use optimal NB: reduce NB and
106 // determine the minimum value of NB.
107 //
108 nb = work.length() / ldWork;
109 nbMin = max(IndexType(2),
110 IndexType(ilaenv<T>(2, "GELQF", 0, m, n)));
111 }
112 }
113 }
114
115 IndexType i;
116 if (nb>=nbMin && nb<k && nx<k) {
117 typename GeMatrix<MA>::View Work(m, nb, work);
118 //
119 // Use blocked code initially
120 //
121 for (i=1; i<=k-nx; i+=nb) {
122 const IndexType ib = min(k-i+1, nb);
123 //
124 // Compute the LQ factorization of the current block
125 // A(i:i+ib-1,i:n)
126 //
127 lq2(A(_(i,i+ib-1),_(i,n)), tau(_(i,i+ib-1)), work);
128 if (i+ib<=m) {
129 //
130 // Form the triangular factor of the block reflector
131 // H = H(i) H(i+1) . . . H(i+ib-1)
132 //
133 auto Tr = Work(_(1,ib),_(1,ib)).upper();
134 larft(Forward, RowWise,
135 n-i+1,
136 A(_(i,i+ib-1),_(i,n)),
137 tau(_(i,i+ib-1)),
138 Tr);
139 //
140 // Apply H' to A(i:m,i+ib:n) from the left
141 //
142 larfb(Right, NoTrans, Forward, RowWise,
143 A(_(i,i+ib-1),_(i,n)),
144 Tr,
145 A(_(i+ib,m),_(i,n)),
146 Work(_(ib+1,m),_(1,ib)));
147 }
148 }
149 } else {
150 i = 1;
151 }
152
153 //
154 // Use unblocked code to factor the last or only block.
155 //
156 if (i<=k) {
157 lq2(A(_(i,m),_(i,n)), tau(_(i,k)), work(_(1,m-i+1)));
158 }
159 work(1) = iws;
160 }
161
162 //== interface for native lapack ===============================================
163
164 #ifdef CHECK_CXXLAPACK
165
166 template <typename MA, typename VTAU, typename VWORK>
167 void
168 lqf_native(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
169 {
170 typedef typename GeMatrix<MA>::ElementType T;
171
172 const INTEGER M = A.numRows();
173 const INTEGER N = A.numCols();
174 const INTEGER LDA = A.leadingDimension();
175 INTEGER LWORK = work.length();
176 INTEGER INFO;
177
178 if (IsSame<T, DOUBLE>::value) {
179 LAPACK_IMPL(dgelqf)(&M, &N, A.data(), &LDA,
180 tau.data(),
181 work.data(), &LWORK,
182 &INFO);
183 assert(INFO==0);
184 } else {
185 ASSERT(0);
186 }
187 }
188
189 #endif // CHECK_CXXLAPACK
190
191 //== public interface ==========================================================
192
193 template <typename MA, typename VTAU, typename VWORK>
194 void
195 lqf(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
196 {
197 using std::min;
198 typedef typename GeMatrix<MA>::ElementType ElementType;
199 typedef typename GeMatrix<MA>::IndexType IndexType;
200 //
201 // Test the input parameters
202 //
203 # ifndef NDEBUG
204 ASSERT(A.firstRow()==1);
205 ASSERT(A.firstCol()==1);
206 ASSERT(tau.firstIndex()==1);
207 ASSERT(work.firstIndex()==1);
208
209 const IndexType m = A.numRows();
210 const IndexType n = A.numCols();
211 const IndexType k = min(m,n);
212
213 ASSERT(tau.length()==k);
214 ASSERT(work.length()>=m || work.length()==IndexType(0));
215 # endif
216
217 # ifdef CHECK_CXXLAPACK
218 //
219 // Make copies of output arguments
220 //
221 typename GeMatrix<MA>::NoView A_org = A;
222 typename DenseVector<VTAU>::NoView tau_org = tau;
223 typename DenseVector<VTAU>::NoView work_org = work;
224 # endif
225
226 //
227 // Call implementation
228 //
229 lqf_generic(A, tau, work);
230
231 # ifdef CHECK_CXXLAPACK
232 //
233 // Restore output parameters
234 //
235 typename GeMatrix<MA>::NoView A_generic = A;
236 typename DenseVector<VTAU>::NoView tau_generic = tau;
237 typename DenseVector<VTAU>::NoView work_generic = work;
238
239 A = A_org;
240 tau = tau_org;
241
242 // if the generic implementation resized work due to a work size query
243 // we must not restore the work array
244 if (work_org.length()>0) {
245 work = work_org;
246 } else {
247 work = 0;
248 }
249 //
250 // Compare results
251 //
252 lqf_native(A, tau, work);
253
254 bool failed = false;
255 if (! isIdentical(A_generic, A, "A_generic", "A")) {
256 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
257 std::cerr << "F77LAPACK: A = " << A << std::endl;
258 failed = true;
259 }
260
261 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
262 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
263 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
264 failed = true;
265 }
266
267 if (! isIdentical(work_generic, work, "work_generic", "work")) {
268 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
269 std::cerr << "F77LAPACK: work = " << work << std::endl;
270 failed = true;
271 }
272
273 if (failed) {
274 ASSERT(0);
275 }
276 # endif
277 }
278
279 //-- forwarding ----------------------------------------------------------------
280 template <typename MA, typename VTAU, typename VWORK>
281 void
282 lqf(MA &&A, VTAU &&tau, VWORK &&work)
283 {
284 CHECKPOINT_ENTER;
285 lqf(A, tau, work);
286 CHECKPOINT_LEAVE;
287 }
288
289 } } // namespace lapack, flens
290
291 #endif // FLENS_LAPACK_LQ_LQF_TCC
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_LQ_LQF_TCC
44 #define FLENS_LAPACK_LQ_LQF_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52 template <typename MA, typename VTAU, typename VWORK>
53 void
54 lqf_generic(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
55 {
56 using std::max;
57 using std::min;
58
59 typedef typename GeMatrix<MA>::ElementType T;
60 typedef typename GeMatrix<MA>::IndexType IndexType;
61
62 const Underscore<IndexType> _;
63
64 const IndexType m = A.numRows();
65 const IndexType n = A.numCols();
66 const IndexType k = min(m, n);
67
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "GELQF", "", m, n);
72
73 const IndexType lWorkOpt = m*nb;
74 if (work.length()==0) {
75 work.resize(max(lWorkOpt,IndexType(1)));
76 work(1)=lWorkOpt;
77 }
78
79 //
80 // Quick return if possible
81 //
82 if (k==0) {
83 work(1) = 1;
84 return;
85 }
86
87 IndexType nbMin = 2;
88 IndexType nx = 0;
89 IndexType iws = m;
90 IndexType ldWork = -1;
91
92 if ((nb>1) && (nb<k)) {
93 //
94 // Determine when to cross over from blocked to unblocked code.
95 //
96 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "GELQF", "", m, n)));
97 if (nx<k) {
98 //
99 // Determine if workspace is large enough for blocked code.
100 //
101 ldWork = m;
102 iws = ldWork*nb;
103 if (work.length()<iws) {
104 //
105 // Not enough workspace to use optimal NB: reduce NB and
106 // determine the minimum value of NB.
107 //
108 nb = work.length() / ldWork;
109 nbMin = max(IndexType(2),
110 IndexType(ilaenv<T>(2, "GELQF", 0, m, n)));
111 }
112 }
113 }
114
115 IndexType i;
116 if (nb>=nbMin && nb<k && nx<k) {
117 typename GeMatrix<MA>::View Work(m, nb, work);
118 //
119 // Use blocked code initially
120 //
121 for (i=1; i<=k-nx; i+=nb) {
122 const IndexType ib = min(k-i+1, nb);
123 //
124 // Compute the LQ factorization of the current block
125 // A(i:i+ib-1,i:n)
126 //
127 lq2(A(_(i,i+ib-1),_(i,n)), tau(_(i,i+ib-1)), work);
128 if (i+ib<=m) {
129 //
130 // Form the triangular factor of the block reflector
131 // H = H(i) H(i+1) . . . H(i+ib-1)
132 //
133 auto Tr = Work(_(1,ib),_(1,ib)).upper();
134 larft(Forward, RowWise,
135 n-i+1,
136 A(_(i,i+ib-1),_(i,n)),
137 tau(_(i,i+ib-1)),
138 Tr);
139 //
140 // Apply H' to A(i:m,i+ib:n) from the left
141 //
142 larfb(Right, NoTrans, Forward, RowWise,
143 A(_(i,i+ib-1),_(i,n)),
144 Tr,
145 A(_(i+ib,m),_(i,n)),
146 Work(_(ib+1,m),_(1,ib)));
147 }
148 }
149 } else {
150 i = 1;
151 }
152
153 //
154 // Use unblocked code to factor the last or only block.
155 //
156 if (i<=k) {
157 lq2(A(_(i,m),_(i,n)), tau(_(i,k)), work(_(1,m-i+1)));
158 }
159 work(1) = iws;
160 }
161
162 //== interface for native lapack ===============================================
163
164 #ifdef CHECK_CXXLAPACK
165
166 template <typename MA, typename VTAU, typename VWORK>
167 void
168 lqf_native(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
169 {
170 typedef typename GeMatrix<MA>::ElementType T;
171
172 const INTEGER M = A.numRows();
173 const INTEGER N = A.numCols();
174 const INTEGER LDA = A.leadingDimension();
175 INTEGER LWORK = work.length();
176 INTEGER INFO;
177
178 if (IsSame<T, DOUBLE>::value) {
179 LAPACK_IMPL(dgelqf)(&M, &N, A.data(), &LDA,
180 tau.data(),
181 work.data(), &LWORK,
182 &INFO);
183 assert(INFO==0);
184 } else {
185 ASSERT(0);
186 }
187 }
188
189 #endif // CHECK_CXXLAPACK
190
191 //== public interface ==========================================================
192
193 template <typename MA, typename VTAU, typename VWORK>
194 void
195 lqf(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
196 {
197 using std::min;
198 typedef typename GeMatrix<MA>::ElementType ElementType;
199 typedef typename GeMatrix<MA>::IndexType IndexType;
200 //
201 // Test the input parameters
202 //
203 # ifndef NDEBUG
204 ASSERT(A.firstRow()==1);
205 ASSERT(A.firstCol()==1);
206 ASSERT(tau.firstIndex()==1);
207 ASSERT(work.firstIndex()==1);
208
209 const IndexType m = A.numRows();
210 const IndexType n = A.numCols();
211 const IndexType k = min(m,n);
212
213 ASSERT(tau.length()==k);
214 ASSERT(work.length()>=m || work.length()==IndexType(0));
215 # endif
216
217 # ifdef CHECK_CXXLAPACK
218 //
219 // Make copies of output arguments
220 //
221 typename GeMatrix<MA>::NoView A_org = A;
222 typename DenseVector<VTAU>::NoView tau_org = tau;
223 typename DenseVector<VTAU>::NoView work_org = work;
224 # endif
225
226 //
227 // Call implementation
228 //
229 lqf_generic(A, tau, work);
230
231 # ifdef CHECK_CXXLAPACK
232 //
233 // Restore output parameters
234 //
235 typename GeMatrix<MA>::NoView A_generic = A;
236 typename DenseVector<VTAU>::NoView tau_generic = tau;
237 typename DenseVector<VTAU>::NoView work_generic = work;
238
239 A = A_org;
240 tau = tau_org;
241
242 // if the generic implementation resized work due to a work size query
243 // we must not restore the work array
244 if (work_org.length()>0) {
245 work = work_org;
246 } else {
247 work = 0;
248 }
249 //
250 // Compare results
251 //
252 lqf_native(A, tau, work);
253
254 bool failed = false;
255 if (! isIdentical(A_generic, A, "A_generic", "A")) {
256 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
257 std::cerr << "F77LAPACK: A = " << A << std::endl;
258 failed = true;
259 }
260
261 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
262 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
263 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
264 failed = true;
265 }
266
267 if (! isIdentical(work_generic, work, "work_generic", "work")) {
268 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
269 std::cerr << "F77LAPACK: work = " << work << std::endl;
270 failed = true;
271 }
272
273 if (failed) {
274 ASSERT(0);
275 }
276 # endif
277 }
278
279 //-- forwarding ----------------------------------------------------------------
280 template <typename MA, typename VTAU, typename VWORK>
281 void
282 lqf(MA &&A, VTAU &&tau, VWORK &&work)
283 {
284 CHECKPOINT_ENTER;
285 lqf(A, tau, work);
286 CHECKPOINT_LEAVE;
287 }
288
289 } } // namespace lapack, flens
290
291 #endif // FLENS_LAPACK_LQ_LQF_TCC