1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
35 *
36 * -- LAPACK routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_QR_ORGQR_TCC
43 #define FLENS_LAPACK_QR_ORGQR_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
53 void
54 orgqr_generic(IndexType k,
55 GeMatrix<MA> &A,
56 const DenseVector<VTAU> &tau,
57 DenseVector<VWORK> &work)
58 {
59 using std::max;
60 using std::min;
61
62 typedef typename GeMatrix<MA>::ElementType T;
63 const T Zero(0);
64
65 const Underscore<IndexType> _;
66 const IndexType m = A.numRows();
67 const IndexType n = A.numCols();
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "ORGQR", "", m, n, k);
72 const IndexType lWorkOpt = max(IndexType(1), n) * nb;
73 if (work.length()==0) {
74 work.resize(max(lWorkOpt,IndexType(1)));
75 work(1)=lWorkOpt;
76 }
77 //
78 // Quick return if possible
79 //
80 if (n<=0) {
81 work(1) = 1;
82 return;
83 }
84
85 IndexType nbMin = 2;
86 IndexType nx = 0;
87 IndexType iws = n;
88 IndexType ldWork = 1;
89
90 if ((nb>1) && (nb<k)) {
91 //
92 // Determine when to cross over from blocked to unblocked code.
93 //
94 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "ORGQR", "", m, n, k)));
95 if (nx<k) {
96 //
97 // Determine if workspace is large enough for blocked code.
98 //
99 ldWork = n;
100 iws = ldWork *nb;
101 if (work.length()<iws) {
102 //
103 // Not enough workspace to use optimal NB: reduce NB and
104 // determine the minimum value of NB.
105 //
106 nb = work.length() / ldWork;
107 nbMin = max(IndexType(2),
108 IndexType(ilaenv<T>(2, "ORGQR", "", m, n, k)));
109 }
110 }
111 }
112
113 IndexType ki = -1,
114 kk = -1;
115
116 if ((nb>=nbMin) && (nb<k) && (nx<k)) {
117 //
118 // Use blocked code after the last block.
119 // The first kk columns are handled by the block method.
120 //
121 ki = ((k-nx-1)/nb)*nb;
122 kk = min(k, ki+nb);
123 //
124 // Set A(1:kk,kk+1:n) to zero.
125 //
126 A(_(1,kk),_(kk+1,n)) = Zero;
127 } else {
128 kk = 0;
129 }
130
131 //
132 // Use unblocked code for the last or only block.
133 //
134 if (kk<n) {
135 org2r(k-kk, A(_(kk+1,m),_(kk+1,n)), tau(_(kk+1, k)), work(_(1,n-kk)));
136 }
137
138 if (kk>0) {
139 typename GeMatrix<MA>::View Work(n, nb, work);
140 //
141 // Use blocked code
142 //
143 for (IndexType i=ki+1; i>=1; i-=nb) {
144 const IndexType ib = min(nb, k-i+1);
145 if (i+ib<=n) {
146 //
147 // Form the triangular factor of the block reflector
148 // H = H(i) H(i+1) . . . H(i+ib-1)
149 //
150 auto Tr = Work(_(1,ib),_(1,ib)).upper();
151 larft(Forward, ColumnWise,
152 m-i+1,
153 A(_(i,m),_(i,i+ib-1)),
154 tau(_(i,i+ib-1)),
155 Tr);
156 //
157 // Apply H to A(i:m,i+ib:n) from the left
158 //
159 larfb(Left, NoTrans, Forward, ColumnWise,
160 A(_(i,m),_(i,i+ib-1)),
161 Tr,
162 A(_(i,m),_(i+ib,n)),
163 Work(_(ib+1,n),_(1,ib)));
164 }
165 //
166 // Apply H to rows i:m of current block
167 //
168 org2r(ib, A(_(i,m),_(i,i+ib-1)), tau(_(i,i+ib-1)), work(_(1,ib)));
169 //
170 // Set rows 1:i-1 of current block to zero
171 //
172 A(_(1,i-1),_(i,i+ib-1)) = Zero;
173 }
174 }
175 work(1) = iws;
176 }
177
178 //== interface for native lapack ===============================================
179
180 #ifdef CHECK_CXXLAPACK
181
182 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
183 void
184 orgqr_native(IndexType k,
185 GeMatrix<MA> &A,
186 const DenseVector<VTAU> &tau,
187 DenseVector<VWORK> &work)
188 {
189 typedef typename GeMatrix<MA>::ElementType T;
190
191 const INTEGER M = A.numRows();
192 const INTEGER N = A.numCols();
193 const INTEGER K = k;
194 const INTEGER LDA = A.leadingDimension();
195 const INTEGER LWORK = work.length();
196 INTEGER INFO;
197
198 if (IsSame<T,DOUBLE>::value) {
199 LAPACK_IMPL(dorgqr)(&M,
200 &N,
201 &K,
202 A.data(),
203 &LDA,
204 tau.data(),
205 work.data(),
206 &LWORK,
207 &INFO);
208 } else {
209 ASSERT(0);
210 }
211 ASSERT(INFO==0);
212 }
213
214 #endif // CHECK_CXXLAPACK
215
216 //== public interface ==========================================================
217
218 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
219 void
220 orgqr(IndexType k,
221 GeMatrix<MA> &A,
222 const DenseVector<VTAU> &tau,
223 DenseVector<VWORK> &work)
224 {
225 typedef typename GeMatrix<MA>::ElementType ElementType;
226 //
227 // Test the input parameters
228 //
229 # ifndef NDEBUG
230 ASSERT(A.firstRow()==IndexType(1));
231 ASSERT(A.firstCol()==IndexType(1));
232 ASSERT(tau.firstIndex()==IndexType(1));
233 ASSERT(tau.length()==k);
234 ASSERT((work.length()==0) || (work.length()>=A.numCols()));
235
236 const IndexType m = A.numRows();
237 const IndexType n = A.numCols();
238
239 ASSERT(n<=m);
240 ASSERT(k<=n);
241 ASSERT(0<=k);
242 # endif
243
244 //
245 // Make copies of output arguments
246 //
247 # ifdef CHECK_CXXLAPACK
248 typename GeMatrix<MA>::NoView A_org = A;
249 typename DenseVector<VWORK>::NoView work_org = work;
250 # endif
251
252 //
253 // Call implementation
254 //
255 orgqr_generic(k, A, tau, work);
256
257 # ifdef CHECK_CXXLAPACK
258 //
259 // Restore output arguments
260 //
261 typename GeMatrix<MA>::NoView A_generic = A;
262 typename DenseVector<VWORK>::NoView work_generic = work;
263
264 A = A_org;
265
266 if (work_org.length()!=0) {
267 work = work_org;
268 } else {
269 work = ElementType(0);
270 }
271 //
272 // Compare results
273 //
274 orgqr_native(k, A, tau, work);
275
276 bool failed = false;
277 if (! isIdentical(A_generic, A, "A_generic", "A")) {
278 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
279 std::cerr << "F77LAPACK: A = " << A << std::endl;
280 failed = true;
281 }
282
283 if (! isIdentical(work_generic, work, "work_generic", "work")) {
284 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
285 std::cerr << "F77LAPACK: work = " << work << std::endl;
286 failed = true;
287 }
288
289 if (failed) {
290 std::cerr << "error in: orgqr.tcc" << std::endl;
291 ASSERT(0);
292 } else {
293 // std::cerr << "passed: orgqr.tcc" << std::endl;
294 }
295 # endif
296 }
297
298 //-- forwarding ----------------------------------------------------------------
299 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
300 void
301 orgqr(IndexType k,
302 MA &&A,
303 const VTAU &tau,
304 VWORK &&work)
305 {
306 orgqr(k, A, tau, work);
307 }
308
309 } } // namespace lapack, flens
310
311 #endif // FLENS_LAPACK_QR_ORGQR_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
35 *
36 * -- LAPACK routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_QR_ORGQR_TCC
43 #define FLENS_LAPACK_QR_ORGQR_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
53 void
54 orgqr_generic(IndexType k,
55 GeMatrix<MA> &A,
56 const DenseVector<VTAU> &tau,
57 DenseVector<VWORK> &work)
58 {
59 using std::max;
60 using std::min;
61
62 typedef typename GeMatrix<MA>::ElementType T;
63 const T Zero(0);
64
65 const Underscore<IndexType> _;
66 const IndexType m = A.numRows();
67 const IndexType n = A.numCols();
68 //
69 // Perform and apply workspace query
70 //
71 IndexType nb = ilaenv<T>(1, "ORGQR", "", m, n, k);
72 const IndexType lWorkOpt = max(IndexType(1), n) * nb;
73 if (work.length()==0) {
74 work.resize(max(lWorkOpt,IndexType(1)));
75 work(1)=lWorkOpt;
76 }
77 //
78 // Quick return if possible
79 //
80 if (n<=0) {
81 work(1) = 1;
82 return;
83 }
84
85 IndexType nbMin = 2;
86 IndexType nx = 0;
87 IndexType iws = n;
88 IndexType ldWork = 1;
89
90 if ((nb>1) && (nb<k)) {
91 //
92 // Determine when to cross over from blocked to unblocked code.
93 //
94 nx = max(IndexType(0), IndexType(ilaenv<T>(3, "ORGQR", "", m, n, k)));
95 if (nx<k) {
96 //
97 // Determine if workspace is large enough for blocked code.
98 //
99 ldWork = n;
100 iws = ldWork *nb;
101 if (work.length()<iws) {
102 //
103 // Not enough workspace to use optimal NB: reduce NB and
104 // determine the minimum value of NB.
105 //
106 nb = work.length() / ldWork;
107 nbMin = max(IndexType(2),
108 IndexType(ilaenv<T>(2, "ORGQR", "", m, n, k)));
109 }
110 }
111 }
112
113 IndexType ki = -1,
114 kk = -1;
115
116 if ((nb>=nbMin) && (nb<k) && (nx<k)) {
117 //
118 // Use blocked code after the last block.
119 // The first kk columns are handled by the block method.
120 //
121 ki = ((k-nx-1)/nb)*nb;
122 kk = min(k, ki+nb);
123 //
124 // Set A(1:kk,kk+1:n) to zero.
125 //
126 A(_(1,kk),_(kk+1,n)) = Zero;
127 } else {
128 kk = 0;
129 }
130
131 //
132 // Use unblocked code for the last or only block.
133 //
134 if (kk<n) {
135 org2r(k-kk, A(_(kk+1,m),_(kk+1,n)), tau(_(kk+1, k)), work(_(1,n-kk)));
136 }
137
138 if (kk>0) {
139 typename GeMatrix<MA>::View Work(n, nb, work);
140 //
141 // Use blocked code
142 //
143 for (IndexType i=ki+1; i>=1; i-=nb) {
144 const IndexType ib = min(nb, k-i+1);
145 if (i+ib<=n) {
146 //
147 // Form the triangular factor of the block reflector
148 // H = H(i) H(i+1) . . . H(i+ib-1)
149 //
150 auto Tr = Work(_(1,ib),_(1,ib)).upper();
151 larft(Forward, ColumnWise,
152 m-i+1,
153 A(_(i,m),_(i,i+ib-1)),
154 tau(_(i,i+ib-1)),
155 Tr);
156 //
157 // Apply H to A(i:m,i+ib:n) from the left
158 //
159 larfb(Left, NoTrans, Forward, ColumnWise,
160 A(_(i,m),_(i,i+ib-1)),
161 Tr,
162 A(_(i,m),_(i+ib,n)),
163 Work(_(ib+1,n),_(1,ib)));
164 }
165 //
166 // Apply H to rows i:m of current block
167 //
168 org2r(ib, A(_(i,m),_(i,i+ib-1)), tau(_(i,i+ib-1)), work(_(1,ib)));
169 //
170 // Set rows 1:i-1 of current block to zero
171 //
172 A(_(1,i-1),_(i,i+ib-1)) = Zero;
173 }
174 }
175 work(1) = iws;
176 }
177
178 //== interface for native lapack ===============================================
179
180 #ifdef CHECK_CXXLAPACK
181
182 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
183 void
184 orgqr_native(IndexType k,
185 GeMatrix<MA> &A,
186 const DenseVector<VTAU> &tau,
187 DenseVector<VWORK> &work)
188 {
189 typedef typename GeMatrix<MA>::ElementType T;
190
191 const INTEGER M = A.numRows();
192 const INTEGER N = A.numCols();
193 const INTEGER K = k;
194 const INTEGER LDA = A.leadingDimension();
195 const INTEGER LWORK = work.length();
196 INTEGER INFO;
197
198 if (IsSame<T,DOUBLE>::value) {
199 LAPACK_IMPL(dorgqr)(&M,
200 &N,
201 &K,
202 A.data(),
203 &LDA,
204 tau.data(),
205 work.data(),
206 &LWORK,
207 &INFO);
208 } else {
209 ASSERT(0);
210 }
211 ASSERT(INFO==0);
212 }
213
214 #endif // CHECK_CXXLAPACK
215
216 //== public interface ==========================================================
217
218 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
219 void
220 orgqr(IndexType k,
221 GeMatrix<MA> &A,
222 const DenseVector<VTAU> &tau,
223 DenseVector<VWORK> &work)
224 {
225 typedef typename GeMatrix<MA>::ElementType ElementType;
226 //
227 // Test the input parameters
228 //
229 # ifndef NDEBUG
230 ASSERT(A.firstRow()==IndexType(1));
231 ASSERT(A.firstCol()==IndexType(1));
232 ASSERT(tau.firstIndex()==IndexType(1));
233 ASSERT(tau.length()==k);
234 ASSERT((work.length()==0) || (work.length()>=A.numCols()));
235
236 const IndexType m = A.numRows();
237 const IndexType n = A.numCols();
238
239 ASSERT(n<=m);
240 ASSERT(k<=n);
241 ASSERT(0<=k);
242 # endif
243
244 //
245 // Make copies of output arguments
246 //
247 # ifdef CHECK_CXXLAPACK
248 typename GeMatrix<MA>::NoView A_org = A;
249 typename DenseVector<VWORK>::NoView work_org = work;
250 # endif
251
252 //
253 // Call implementation
254 //
255 orgqr_generic(k, A, tau, work);
256
257 # ifdef CHECK_CXXLAPACK
258 //
259 // Restore output arguments
260 //
261 typename GeMatrix<MA>::NoView A_generic = A;
262 typename DenseVector<VWORK>::NoView work_generic = work;
263
264 A = A_org;
265
266 if (work_org.length()!=0) {
267 work = work_org;
268 } else {
269 work = ElementType(0);
270 }
271 //
272 // Compare results
273 //
274 orgqr_native(k, A, tau, work);
275
276 bool failed = false;
277 if (! isIdentical(A_generic, A, "A_generic", "A")) {
278 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
279 std::cerr << "F77LAPACK: A = " << A << std::endl;
280 failed = true;
281 }
282
283 if (! isIdentical(work_generic, work, "work_generic", "work")) {
284 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
285 std::cerr << "F77LAPACK: work = " << work << std::endl;
286 failed = true;
287 }
288
289 if (failed) {
290 std::cerr << "error in: orgqr.tcc" << std::endl;
291 ASSERT(0);
292 } else {
293 // std::cerr << "passed: orgqr.tcc" << std::endl;
294 }
295 # endif
296 }
297
298 //-- forwarding ----------------------------------------------------------------
299 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
300 void
301 orgqr(IndexType k,
302 MA &&A,
303 const VTAU &tau,
304 VWORK &&work)
305 {
306 orgqr(k, A, tau, work);
307 }
308
309 } } // namespace lapack, flens
310
311 #endif // FLENS_LAPACK_QR_ORGQR_TCC