1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2009 --
41 */
42
43 #ifndef FLENS_LAPACK_EIG_HRD_TCC
44 #define FLENS_LAPACK_EIG_HRD_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename IndexType, typename MA>
54 IndexType
55 hrd_generic_wsq(IndexType iLo, IndexType iHi, const GeMatrix<MA> &A)
56 {
57 using std::min;
58
59 typedef typename GeMatrix<MA>::ElementType T;
60
61 //
62 // Perform and apply workspace query
63 //
64 const IndexType nbMax = 64;
65 const IndexType n = A.numCols();
66
67 return n*min(nbMax, IndexType(ilaenv<T>(1,"GEHRD", "", n, iLo, iHi)));
68 }
69
70 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
71 void
72 hrd_generic(IndexType iLo,
73 IndexType iHi,
74 GeMatrix<MA> &A,
75 DenseVector<VTAU> &tau,
76 DenseVector<VWORK> &work)
77 {
78 using std::max;
79 using std::min;
80
81 typedef typename GeMatrix<MA>::ElementType T;
82 typedef typename GeMatrix<MA>::View GeView;
83 typedef typename GeView::Engine GeViewEngine;
84
85 const Underscore<IndexType> _;
86
87 //
88 // Paramters, e.g. maximum block size and buffer on stack for TrMatrix Tr.
89 //
90 const T One(1);
91 const IndexType nbMax = 64;
92 const IndexType ldt = nbMax + 1;
93 T trBuffer[nbMax*ldt];
94 GeView _Tr = GeViewEngine(ldt, nbMax, trBuffer, ldt);
95 //
96 // Perform and apply workspace query
97 //
98 const IndexType n = A.numCols();
99 IndexType nb = min(nbMax,
100 IndexType(ilaenv<T>(1,"GEHRD", "", n, iLo, iHi)));
101
102 const IndexType lWorkOpt = n*nb;
103 if (work.length()==0) {
104 work.resize(lWorkOpt);
105 work(1) = lWorkOpt;
106 }
107 //
108 // Set elements 1:iLo-1 and iHi:n-1 of tau to zero
109 //
110 tau(_(1,iLo-1)) = 0;
111 tau(_(max(IndexType(1),iHi),n-1)) = 0;
112 //
113 // Quick return if possible
114 //
115 const IndexType nh = iHi - iLo + 1;
116 if (nh<=1) {
117 work(1) = 1;
118 return;
119 }
120 //
121 // Determine the block size
122 //
123 IndexType nbMin = 2;
124 IndexType iws = 1;
125 IndexType nx = 0;
126
127 if (nb>1 && nb<nh) {
128 //
129 // Determine when to cross over from blocked to unblocked code
130 // (last block is always handled by unblocked code)
131 //
132 nx = max(nb, IndexType(ilaenv<T>(3, "GEHRD", "", n, iLo, iHi)));
133 if (nx<nh) {
134 //
135 // Determine if workspace is large enough for blocked code
136 //
137 iws = n*nb;
138 if (work.length()<iws) {
139 //
140 // Not enough workspace to use optimal NB: determine the
141 // minimum value of NB, and reduce NB or force use of
142 // unblocked code
143 //
144 nbMin = max(2, ilaenv<T>(2, "GEHRD", "", n, iLo, iHi));
145 if (work.length()>=n*nbMin) {
146 nb = work.length() / n;
147 } else {
148 nb = 1;
149 }
150 }
151 }
152 }
153 IndexType ldWork = n;
154 IndexType i = iLo;
155 if (nb<nbMin || nb>=nh) {
156 //
157 // Use unblocked code below
158 //
159 } else {
160 //
161 // Use blocked code
162 //
163 for (i=iLo; i<=iHi-1-nx; i+=nb) {
164 const IndexType ib = min(nb, iHi-i);
165 auto Tr = _Tr(_(1,ib),_(1,ib)).upper();
166 GeView Y(iHi, ib, work, ldWork);
167 //
168 // Reduce columns i:i+ib-1 to Hessenberg form, returning the
169 // matrices V and T of the block reflector H = I - V*T*V**T
170 // which performs the reduction, and also the matrix Y = A*V*T
171 //
172 lahr2(i, ib, A(_(1,iHi),_(i,iHi)), tau(_(i,i+ib-1)), Tr, Y);
173 //
174 // Partition V and the parts of matrix A that get modified
175 //
176 auto V1 = A(_(i+1, i+ib-1),_(i,i+ib-2));
177 auto V2 = A(_(i+ib, iHi),_(i,i+ib-1));
178 //
179 // Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
180 // right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set
181 // to 1
182 //
183 T ei = A(i+ib,i+ib-1);
184 A(i+ib,i+ib-1) = One;
185 blas::mm(NoTrans, Trans, -One, Y, V2, One, A(_(1,iHi),_(i+ib,iHi)));
186 A(i+ib,i+ib-1) = ei;
187 //
188 // Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
189 // right
190 //
191 blas::mm(Right, Trans, One, V1.lowerUnit(), Y(_(1,i),_(1,ib-1)));
192 A(_(1,i),_(i+1,i+ib-1)) -= Y(_(1,i),_(1,ib-1));
193 //
194 // Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
195 // left
196 //
197 GeView Work(n-i-ib+1, ib, work, ldWork);
198 larfb(Left, Trans, Forward, ColumnWise,
199 A(_(i+1,iHi),_(i,i+ib-1)),
200 Tr,
201 A(_(i+1,iHi),_(i+ib,n)),
202 Work);
203 }
204 }
205 //
206 // Use unblocked code to reduce the rest of the matrix
207 //
208 hd2(i, iHi, A, tau, work);
209 work(1) = iws;
210 }
211
212 //== interface for native lapack ===============================================
213
214 #ifdef CHECK_CXXLAPACK
215
216 template <typename IndexType, typename MA>
217 IndexType
218 hrd_native_wsq(IndexType iLo,
219 IndexType iHi,
220 const GeMatrix<MA> &A)
221 {
222 typedef typename GeMatrix<MA>::ElementType T;
223
224 const INTEGER N = A.numRows();
225 const INTEGER ILO = iLo;
226 const INTEGER IHI = iHi;
227 const INTEGER LDA = A.leadingDimension();
228 T WORK;
229 T DUMMY;
230 const INTEGER LWORK = -1;
231 INTEGER INFO;
232
233 if (IsSame<T, DOUBLE>::value) {
234 LAPACK_IMPL(dgehrd)(&N,
235 &ILO,
236 &IHI,
237 &DUMMY,
238 &LDA,
239 &DUMMY,
240 &WORK,
241 &LWORK,
242 &INFO);
243 } else {
244 ASSERT(0);
245 }
246 ASSERT(INFO==0);
247 return WORK;
248 }
249
250 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
251 void
252 hrd_native(IndexType iLo,
253 IndexType iHi,
254 GeMatrix<MA> &A,
255 DenseVector<VTAU> &tau,
256 DenseVector<VWORK> &work)
257 {
258 typedef typename GeMatrix<MA>::ElementType T;
259
260 const INTEGER N = A.numRows();
261 const INTEGER ILO = iLo;
262 const INTEGER IHI = iHi;
263 const INTEGER LDA = A.leadingDimension();
264 const INTEGER LWORK = work.length();
265 INTEGER INFO;
266
267 if (IsSame<T, DOUBLE>::value) {
268 LAPACK_IMPL(dgehrd)(&N,
269 &ILO,
270 &IHI,
271 A.data(),
272 &LDA,
273 tau.data(),
274 work.data(),
275 &LWORK,
276 &INFO);
277 } else {
278 ASSERT(0);
279 }
280
281 ASSERT(INFO==0);
282 }
283
284 #endif // CHECK_CXXLAPACK
285
286 //== public interface ==========================================================
287
288 template <typename IndexType, typename MA>
289 IndexType
290 hrd_wsq(IndexType iLo,
291 IndexType iHi,
292 const GeMatrix<MA> &A)
293 {
294 LAPACK_DEBUG_OUT("hrd_wsq");
295
296 //
297 // Test the input parameters
298 //
299 # ifndef NDEBUG
300 ASSERT(A.firstRow()==1);
301 ASSERT(A.firstCol()==1);
302 ASSERT(A.numRows()==A.numCols());
303 # endif
304
305 //
306 // Call implementation
307 //
308 IndexType info = hrd_generic_wsq(iLo, iHi, A);
309
310 # ifdef CHECK_CXXLAPACK
311 //
312 // Compare results
313 //
314 IndexType _info = hrd_native_wsq(iLo, iHi, A);
315
316 if (! isIdentical(info, _info, " info", "_info")) {
317 ASSERT(0);
318 }
319 # endif
320
321 return info;
322 }
323
324 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
325 void
326 hrd(IndexType iLo,
327 IndexType iHi,
328 GeMatrix<MA> &A,
329 DenseVector<VTAU> &tau,
330 DenseVector<VWORK> &work)
331 {
332 LAPACK_DEBUG_OUT("hrd");
333
334 using std::max;
335 //
336 // Test the input parameters
337 //
338 # ifndef NDEBUG
339 ASSERT(A.firstRow()==1);
340 ASSERT(A.firstCol()==1);
341 ASSERT(A.numRows()==A.numCols());
342 ASSERT(tau.firstIndex()==1);
343
344 const IndexType n = A.numCols();
345
346 if (n==0) {
347 ASSERT(iLo==1);
348 ASSERT(iHi==0);
349 } else {
350 ASSERT(1<=iLo);
351 ASSERT(iLo<=iHi);
352 ASSERT(iHi<=n);
353 }
354
355 ASSERT(tau.length()==(n-1));
356 const bool lQuery = (work.length()==0);
357 ASSERT(lQuery || (work.length()>=max(IndexType(1),n)));
358 # endif
359
360 //
361 // Make copies of output arguments
362 //
363 # ifdef CHECK_CXXLAPACK
364 typename GeMatrix<MA>::NoView _A = A;
365 typename DenseVector<VTAU>::NoView _tau = tau;
366 typename DenseVector<VWORK>::NoView _work = work;
367 # endif
368
369 //
370 // Call implementation
371 //
372 hrd_generic(iLo, iHi, A, tau, work);
373
374 # ifdef CHECK_CXXLAPACK
375 //
376 // Compare results
377 //
378 if (_work.length()==0) {
379 _work.resize(work.length());
380 }
381 hrd_native(iLo, iHi, _A, _tau, _work);
382
383 bool failed = false;
384 if (! isIdentical(A, _A, " A", "_A")) {
385 std::cerr << "CXXLAPACK: A = " << A << std::endl;
386 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
387 failed = true;
388 }
389
390 if (! isIdentical(tau, _tau, " tau", "_tau")) {
391 std::cerr << "CXXLAPACK: tau = " << tau << std::endl;
392 std::cerr << "F77LAPACK: _tau = " << _tau << std::endl;
393 failed = true;
394 }
395
396 if (! isIdentical(work, _work, " work", "_work")) {
397 std::cerr << "CXXLAPACK: work = " << work << std::endl;
398 std::cerr << "F77LAPACK: _work = " << _work << std::endl;
399 failed = true;
400 }
401
402 if (failed) {
403 std::cerr << "error in: hrd.tcc" << std::endl;
404 ASSERT(0);
405 } else {
406 // std::cerr << "passed: hrd.tcc" << std::endl;
407 }
408
409 # endif
410 }
411
412 //-- forwarding ----------------------------------------------------------------
413 template <typename IndexType, typename MA>
414 IndexType
415 hrd_wsq(IndexType iLo,
416 IndexType iHi,
417 const MA &&A)
418 {
419 CHECKPOINT_ENTER;
420 const IndexType info = hrd_wsq(iLo, iHi, A);
421 CHECKPOINT_LEAVE;
422
423 return info;
424 }
425
426 template <typename IndexType, typename MA,
427 typename VTAU, typename VWORK>
428 void
429 hrd(IndexType iLo,
430 IndexType iHi,
431 MA &&A,
432 VTAU &&tau,
433 VWORK &&work)
434 {
435 CHECKPOINT_ENTER;
436 hrd(iLo, iHi, A, tau, work);
437 CHECKPOINT_LEAVE;
438 }
439
440 } } // namespace lapack, flens
441
442 #endif // FLENS_LAPACK_EIG_HRD_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
36 *
37 * -- LAPACK routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2009 --
41 */
42
43 #ifndef FLENS_LAPACK_EIG_HRD_TCC
44 #define FLENS_LAPACK_EIG_HRD_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename IndexType, typename MA>
54 IndexType
55 hrd_generic_wsq(IndexType iLo, IndexType iHi, const GeMatrix<MA> &A)
56 {
57 using std::min;
58
59 typedef typename GeMatrix<MA>::ElementType T;
60
61 //
62 // Perform and apply workspace query
63 //
64 const IndexType nbMax = 64;
65 const IndexType n = A.numCols();
66
67 return n*min(nbMax, IndexType(ilaenv<T>(1,"GEHRD", "", n, iLo, iHi)));
68 }
69
70 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
71 void
72 hrd_generic(IndexType iLo,
73 IndexType iHi,
74 GeMatrix<MA> &A,
75 DenseVector<VTAU> &tau,
76 DenseVector<VWORK> &work)
77 {
78 using std::max;
79 using std::min;
80
81 typedef typename GeMatrix<MA>::ElementType T;
82 typedef typename GeMatrix<MA>::View GeView;
83 typedef typename GeView::Engine GeViewEngine;
84
85 const Underscore<IndexType> _;
86
87 //
88 // Paramters, e.g. maximum block size and buffer on stack for TrMatrix Tr.
89 //
90 const T One(1);
91 const IndexType nbMax = 64;
92 const IndexType ldt = nbMax + 1;
93 T trBuffer[nbMax*ldt];
94 GeView _Tr = GeViewEngine(ldt, nbMax, trBuffer, ldt);
95 //
96 // Perform and apply workspace query
97 //
98 const IndexType n = A.numCols();
99 IndexType nb = min(nbMax,
100 IndexType(ilaenv<T>(1,"GEHRD", "", n, iLo, iHi)));
101
102 const IndexType lWorkOpt = n*nb;
103 if (work.length()==0) {
104 work.resize(lWorkOpt);
105 work(1) = lWorkOpt;
106 }
107 //
108 // Set elements 1:iLo-1 and iHi:n-1 of tau to zero
109 //
110 tau(_(1,iLo-1)) = 0;
111 tau(_(max(IndexType(1),iHi),n-1)) = 0;
112 //
113 // Quick return if possible
114 //
115 const IndexType nh = iHi - iLo + 1;
116 if (nh<=1) {
117 work(1) = 1;
118 return;
119 }
120 //
121 // Determine the block size
122 //
123 IndexType nbMin = 2;
124 IndexType iws = 1;
125 IndexType nx = 0;
126
127 if (nb>1 && nb<nh) {
128 //
129 // Determine when to cross over from blocked to unblocked code
130 // (last block is always handled by unblocked code)
131 //
132 nx = max(nb, IndexType(ilaenv<T>(3, "GEHRD", "", n, iLo, iHi)));
133 if (nx<nh) {
134 //
135 // Determine if workspace is large enough for blocked code
136 //
137 iws = n*nb;
138 if (work.length()<iws) {
139 //
140 // Not enough workspace to use optimal NB: determine the
141 // minimum value of NB, and reduce NB or force use of
142 // unblocked code
143 //
144 nbMin = max(2, ilaenv<T>(2, "GEHRD", "", n, iLo, iHi));
145 if (work.length()>=n*nbMin) {
146 nb = work.length() / n;
147 } else {
148 nb = 1;
149 }
150 }
151 }
152 }
153 IndexType ldWork = n;
154 IndexType i = iLo;
155 if (nb<nbMin || nb>=nh) {
156 //
157 // Use unblocked code below
158 //
159 } else {
160 //
161 // Use blocked code
162 //
163 for (i=iLo; i<=iHi-1-nx; i+=nb) {
164 const IndexType ib = min(nb, iHi-i);
165 auto Tr = _Tr(_(1,ib),_(1,ib)).upper();
166 GeView Y(iHi, ib, work, ldWork);
167 //
168 // Reduce columns i:i+ib-1 to Hessenberg form, returning the
169 // matrices V and T of the block reflector H = I - V*T*V**T
170 // which performs the reduction, and also the matrix Y = A*V*T
171 //
172 lahr2(i, ib, A(_(1,iHi),_(i,iHi)), tau(_(i,i+ib-1)), Tr, Y);
173 //
174 // Partition V and the parts of matrix A that get modified
175 //
176 auto V1 = A(_(i+1, i+ib-1),_(i,i+ib-2));
177 auto V2 = A(_(i+ib, iHi),_(i,i+ib-1));
178 //
179 // Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
180 // right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set
181 // to 1
182 //
183 T ei = A(i+ib,i+ib-1);
184 A(i+ib,i+ib-1) = One;
185 blas::mm(NoTrans, Trans, -One, Y, V2, One, A(_(1,iHi),_(i+ib,iHi)));
186 A(i+ib,i+ib-1) = ei;
187 //
188 // Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
189 // right
190 //
191 blas::mm(Right, Trans, One, V1.lowerUnit(), Y(_(1,i),_(1,ib-1)));
192 A(_(1,i),_(i+1,i+ib-1)) -= Y(_(1,i),_(1,ib-1));
193 //
194 // Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
195 // left
196 //
197 GeView Work(n-i-ib+1, ib, work, ldWork);
198 larfb(Left, Trans, Forward, ColumnWise,
199 A(_(i+1,iHi),_(i,i+ib-1)),
200 Tr,
201 A(_(i+1,iHi),_(i+ib,n)),
202 Work);
203 }
204 }
205 //
206 // Use unblocked code to reduce the rest of the matrix
207 //
208 hd2(i, iHi, A, tau, work);
209 work(1) = iws;
210 }
211
212 //== interface for native lapack ===============================================
213
214 #ifdef CHECK_CXXLAPACK
215
216 template <typename IndexType, typename MA>
217 IndexType
218 hrd_native_wsq(IndexType iLo,
219 IndexType iHi,
220 const GeMatrix<MA> &A)
221 {
222 typedef typename GeMatrix<MA>::ElementType T;
223
224 const INTEGER N = A.numRows();
225 const INTEGER ILO = iLo;
226 const INTEGER IHI = iHi;
227 const INTEGER LDA = A.leadingDimension();
228 T WORK;
229 T DUMMY;
230 const INTEGER LWORK = -1;
231 INTEGER INFO;
232
233 if (IsSame<T, DOUBLE>::value) {
234 LAPACK_IMPL(dgehrd)(&N,
235 &ILO,
236 &IHI,
237 &DUMMY,
238 &LDA,
239 &DUMMY,
240 &WORK,
241 &LWORK,
242 &INFO);
243 } else {
244 ASSERT(0);
245 }
246 ASSERT(INFO==0);
247 return WORK;
248 }
249
250 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
251 void
252 hrd_native(IndexType iLo,
253 IndexType iHi,
254 GeMatrix<MA> &A,
255 DenseVector<VTAU> &tau,
256 DenseVector<VWORK> &work)
257 {
258 typedef typename GeMatrix<MA>::ElementType T;
259
260 const INTEGER N = A.numRows();
261 const INTEGER ILO = iLo;
262 const INTEGER IHI = iHi;
263 const INTEGER LDA = A.leadingDimension();
264 const INTEGER LWORK = work.length();
265 INTEGER INFO;
266
267 if (IsSame<T, DOUBLE>::value) {
268 LAPACK_IMPL(dgehrd)(&N,
269 &ILO,
270 &IHI,
271 A.data(),
272 &LDA,
273 tau.data(),
274 work.data(),
275 &LWORK,
276 &INFO);
277 } else {
278 ASSERT(0);
279 }
280
281 ASSERT(INFO==0);
282 }
283
284 #endif // CHECK_CXXLAPACK
285
286 //== public interface ==========================================================
287
288 template <typename IndexType, typename MA>
289 IndexType
290 hrd_wsq(IndexType iLo,
291 IndexType iHi,
292 const GeMatrix<MA> &A)
293 {
294 LAPACK_DEBUG_OUT("hrd_wsq");
295
296 //
297 // Test the input parameters
298 //
299 # ifndef NDEBUG
300 ASSERT(A.firstRow()==1);
301 ASSERT(A.firstCol()==1);
302 ASSERT(A.numRows()==A.numCols());
303 # endif
304
305 //
306 // Call implementation
307 //
308 IndexType info = hrd_generic_wsq(iLo, iHi, A);
309
310 # ifdef CHECK_CXXLAPACK
311 //
312 // Compare results
313 //
314 IndexType _info = hrd_native_wsq(iLo, iHi, A);
315
316 if (! isIdentical(info, _info, " info", "_info")) {
317 ASSERT(0);
318 }
319 # endif
320
321 return info;
322 }
323
324 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
325 void
326 hrd(IndexType iLo,
327 IndexType iHi,
328 GeMatrix<MA> &A,
329 DenseVector<VTAU> &tau,
330 DenseVector<VWORK> &work)
331 {
332 LAPACK_DEBUG_OUT("hrd");
333
334 using std::max;
335 //
336 // Test the input parameters
337 //
338 # ifndef NDEBUG
339 ASSERT(A.firstRow()==1);
340 ASSERT(A.firstCol()==1);
341 ASSERT(A.numRows()==A.numCols());
342 ASSERT(tau.firstIndex()==1);
343
344 const IndexType n = A.numCols();
345
346 if (n==0) {
347 ASSERT(iLo==1);
348 ASSERT(iHi==0);
349 } else {
350 ASSERT(1<=iLo);
351 ASSERT(iLo<=iHi);
352 ASSERT(iHi<=n);
353 }
354
355 ASSERT(tau.length()==(n-1));
356 const bool lQuery = (work.length()==0);
357 ASSERT(lQuery || (work.length()>=max(IndexType(1),n)));
358 # endif
359
360 //
361 // Make copies of output arguments
362 //
363 # ifdef CHECK_CXXLAPACK
364 typename GeMatrix<MA>::NoView _A = A;
365 typename DenseVector<VTAU>::NoView _tau = tau;
366 typename DenseVector<VWORK>::NoView _work = work;
367 # endif
368
369 //
370 // Call implementation
371 //
372 hrd_generic(iLo, iHi, A, tau, work);
373
374 # ifdef CHECK_CXXLAPACK
375 //
376 // Compare results
377 //
378 if (_work.length()==0) {
379 _work.resize(work.length());
380 }
381 hrd_native(iLo, iHi, _A, _tau, _work);
382
383 bool failed = false;
384 if (! isIdentical(A, _A, " A", "_A")) {
385 std::cerr << "CXXLAPACK: A = " << A << std::endl;
386 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
387 failed = true;
388 }
389
390 if (! isIdentical(tau, _tau, " tau", "_tau")) {
391 std::cerr << "CXXLAPACK: tau = " << tau << std::endl;
392 std::cerr << "F77LAPACK: _tau = " << _tau << std::endl;
393 failed = true;
394 }
395
396 if (! isIdentical(work, _work, " work", "_work")) {
397 std::cerr << "CXXLAPACK: work = " << work << std::endl;
398 std::cerr << "F77LAPACK: _work = " << _work << std::endl;
399 failed = true;
400 }
401
402 if (failed) {
403 std::cerr << "error in: hrd.tcc" << std::endl;
404 ASSERT(0);
405 } else {
406 // std::cerr << "passed: hrd.tcc" << std::endl;
407 }
408
409 # endif
410 }
411
412 //-- forwarding ----------------------------------------------------------------
413 template <typename IndexType, typename MA>
414 IndexType
415 hrd_wsq(IndexType iLo,
416 IndexType iHi,
417 const MA &&A)
418 {
419 CHECKPOINT_ENTER;
420 const IndexType info = hrd_wsq(iLo, iHi, A);
421 CHECKPOINT_LEAVE;
422
423 return info;
424 }
425
426 template <typename IndexType, typename MA,
427 typename VTAU, typename VWORK>
428 void
429 hrd(IndexType iLo,
430 IndexType iHi,
431 MA &&A,
432 VTAU &&tau,
433 VWORK &&work)
434 {
435 CHECKPOINT_ENTER;
436 hrd(iLo, iHi, A, tau, work);
437 CHECKPOINT_LEAVE;
438 }
439
440 } } // namespace lapack, flens
441
442 #endif // FLENS_LAPACK_EIG_HRD_TCC