1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,
36 $ INFO )
37 *
38 * -- LAPACK auxiliary routine (version 3.2.2) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * June 2010
42 */
43
44 #ifndef FLENS_LAPACK_EIG_LAEXC_TCC
45 #define FLENS_LAPACK_EIG_LAEXC_TCC 1
46
47 #include <flens/aux/aux.h>
48 #include <flens/blas/blas.h>
49 #include <flens/lapack/lapack.h>
50
51 namespace flens { namespace lapack {
52
53 //== generic lapack implementation =============================================
54
55 template <typename MT, typename MQ, typename IndexType, typename VWORK>
56 IndexType
57 laexc_generic(bool computeQ,
58 GeMatrix<MT> &T,
59 GeMatrix<MQ> &Q,
60 IndexType j1,
61 IndexType n1,
62 IndexType n2,
63 DenseVector<VWORK> &work)
64 {
65 using std::abs;
66
67 typedef typename GeMatrix<MT>::View GeMatrixView;
68 typedef typename GeMatrix<MT>::VectorView DenseVectorView;
69 typedef typename GeMatrix<MT>::ElementType ElementType;
70
71 const ElementType Zero(0), One(1), Ten(10);
72
73 const IndexType n = T.numRows();
74
75 const Underscore<IndexType> _;
76
77 //
78 // .. Local Arrays ..
79 //
80 ElementType _dData[16], _xData[4];
81 GeMatrixView D = typename GeMatrixView::Engine(4, 4, _dData, 4);
82 GeMatrixView X = typename GeMatrixView::Engine(2, 2, _xData, 2);
83
84 ElementType _uData[3], _u1Data[3], _u2Data[3];
85 DenseVectorView u = typename DenseVectorView::Engine(3, _uData);
86 DenseVectorView u1 = typename DenseVectorView::Engine(3, _u1Data);
87 DenseVectorView u2 = typename DenseVectorView::Engine(3, _u2Data);
88 //
89 // Quick return if possible
90 //
91 if (n==0 || n1==0 || n2==0) {
92 return 0;
93 }
94 if (j1+n1>n) {
95 return 0;
96 }
97
98 const IndexType j2 = j1 + 1;
99 const IndexType j3 = j1 + 2;
100 const IndexType j4 = j1 + 3;
101
102 ElementType t11, t22, t33;
103
104 if (n1==1 && n2==1) {
105 //
106 // Swap two 1-by-1 blocks.
107 //
108 t11 = T(j1,j1);
109 t22 = T(j2,j2);
110 //
111 // Determine the transformation to perform the interchange.
112 //
113 ElementType cs, sn, temp;
114 lartg(T(j1,j2), t22-t11, cs, sn, temp);
115 //
116 // Apply transformation to the matrix T.
117 //
118 if (j3<=n) {
119 blas::rot(T(j1,_(j3,n)), T(j2,_(j3,n)), cs, sn);
120 }
121 blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
122
123 T(j1,j1) = t22;
124 T(j2,j2) = t11;
125
126 if (computeQ) {
127 //
128 // Accumulate transformation in the matrix Q.
129 //
130 blas::rot(Q(_,j1), Q(_,j2), cs, sn);
131 }
132
133 } else {
134 //
135 // Swapping involves at least one 2-by-2 block.
136 //
137 // Copy the diagonal block of order N1+N2 to the local array D
138 // and compute its norm.
139 //
140 const IndexType nd = n1 + n2;
141 auto _D = D(_(1,nd),_(1,nd));
142
143 _D = T(_(j1,j1+nd-1),_(j1,j1+nd-1));
144 ElementType normD = lan(MaximumNorm, _D);
145
146 ElementType cs, sn, wr1, wr2, wi1, wi2;
147
148 ElementType scale, normX, tau, tau1, tau2;
149 //
150 // Compute machine-dependent threshold for test for accepting
151 // swap.
152 //
153 const ElementType eps = lamch<ElementType>(Precision);
154 const ElementType smallNum = lamch<ElementType>(SafeMin) / eps;
155 const ElementType thresh = max(Ten*eps*normD, smallNum);
156 //
157 // Solve T11*X - X*T22 = scale*T12 for X.
158 //
159 const auto T11 = D(_(1,n1),_(1,n1));
160 const auto T12 = D(_(1,n1),_(n1+1,nd));
161 const auto T22 = D(_(n1+1,nd),_(n1+1,nd));
162
163 auto _X = X(_(1,n1),_(1,n2));
164
165 lasy2(false, false, IndexType(-1), T11, T22, T12, scale, _X, normX);
166 //
167 // Swap the adjacent diagonal blocks.
168 //
169 const IndexType k = n1 + n1 + n2 - 3;
170
171 switch (k) {
172 //
173 // N1 = 1, N2 = 2: generate elementary reflector H so that:
174 //
175 // ( scale, X11, X12 ) H = ( 0, 0, * )
176 //
177 case 1:
178 u(1) = scale;
179 u(2) = X(1,1);
180 u(3) = X(1,2);
181 larfg(IndexType(3), u(3), u(_(1,2)), tau);
182 u(3) = One;
183 t11 = T(j1,j1);
184 //
185 // Perform swap provisionally on diagonal block in D.
186 //
187 larfx(Left, u, tau, _D, work(_(1,3)));
188 larfx(Right, u, tau, _D, work(_(1,3)));
189 //
190 // Test whether to reject swap.
191 //
192 if (max(abs(D(3,1)), abs(D(3,2)), abs(D(3,3)-t11))>thresh) {
193 //
194 // Return 1 if swap was rejected.
195 //
196 return 1;
197 }
198 //
199 // Accept swap: apply transformation to the entire matrix T.
200 //
201 larfx(Left, u, tau, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
202 larfx(Right, u, tau, T(_(1,j2),_(j1,j1+3-1)), work(_(1,j2)));
203
204 T(j3,j1) = Zero;
205 T(j3,j2) = Zero;
206 T(j3,j3) = t11;
207
208 if (computeQ) {
209 //
210 // Accumulate transformation in the matrix Q.
211 //
212 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
213 }
214 break;
215
216 case 2:
217 //
218 // N1 = 2, N2 = 1: generate elementary reflector H so that:
219 //
220 // H ( -X11 ) = ( * )
221 // ( -X21 ) = ( 0 )
222 // ( scale ) = ( 0 )
223 //
224 u(1) = -X(1,1);
225 u(2) = -X(2,1);
226 u(3) = scale;
227 larfg(IndexType(3), u(1), u(_(2,3)), tau);
228 u(1) = One;
229 t33 = T(j3,j3);
230 //
231 // Perform swap provisionally on diagonal block in D.
232 //
233 larfx(Left, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
234 larfx(Right, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
235 //
236 // Test whether to reject swap.
237 //
238 if (max(abs(D(2,1)), abs(D(3,1)), abs(D(1,1)-t33))>thresh) {
239 //
240 // Return 1 if swap was rejected.
241 //
242 return 1;
243 }
244 //
245 // Accept swap: apply transformation to the entire matrix T.
246 //
247 larfx(Right, u, tau, T(_(1,j3),_(j1, j1+3-1)), work(_(1,j3)));
248 larfx(Left, u, tau, T(_(j1,j1+3-1),_(j2,n)), work(_(1,n-j1)));
249
250 T(j1,j1) = t33;
251 T(j2,j1) = Zero;
252 T(j3,j1) = Zero;
253
254 if (computeQ) {
255 //
256 // Accumulate transformation in the matrix Q.
257 //
258 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
259 }
260 break;
261
262 case 3:
263 //
264 // N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
265 // that:
266 //
267 // H(2) H(1) ( -X11 -X12 ) = ( * * )
268 // ( -X21 -X22 ) ( 0 * )
269 // ( scale 0 ) ( 0 0 )
270 // ( 0 scale ) ( 0 0 )
271 //
272 u1(1) = -X(1,1);
273 u1(2) = -X(2,1);
274 u1(3) = scale;
275 larfg(IndexType(3), u1(1), u1(_(2,3)), tau1);
276 u1(1) = One;
277
278 const ElementType temp = -tau1*(X(1,2)+u1(2)*X(2,2));
279 u2(1) = -temp*u1(2) - X(2,2);
280 u2(2) = -temp*u1(3);
281 u2(3) = scale;
282 larfg(IndexType(3), u2(1), u2(_(2,3)), tau2);
283 u2(1) = One;
284 //
285 // Perform swap provisionally on diagonal block in D.
286 //
287 larfx(Left, u1, tau1, D(_(1,3),_(1,4)), work(_(1,4)));
288 larfx(Right, u1, tau1, D(_(1,4),_(1,3)), work(_(1,4)));
289 larfx(Left, u2, tau2, D(_(2,4),_(1,4)), work(_(1,4)));
290 larfx(Right, u2, tau2, D(_(1,4),_(2,4)), work(_(1,4)));
291 //
292 // Test whether to reject swap.
293 //
294 if (max(abs(D(3,1)), abs(D(3,2)), abs(D(4,1)), abs(D(4,2)))>thresh)
295 {
296 //
297 // Return 1 if swap was rejected.
298 //
299 return 1;
300 }
301 //
302 // Accept swap: apply transformation to the entire matrix T.
303 //
304 larfx(Left, u1, tau1, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
305 larfx(Right, u1, tau1, T(_(1,j4),_(j1,j1+3-1)), work(_(1,j4)));
306 larfx(Left, u2, tau2, T(_(j2,j2+3-1),_(j1,n)), work(_(1,n-j1+1)));
307 larfx(Right, u2, tau2, T(_(1,j4),_(j2,j2+3-1)), work(_(1,j4)));
308
309 T(j3,j1) = Zero;
310 T(j3,j2) = Zero;
311 T(j4,j1) = Zero;
312 T(j4,j2) = Zero;
313
314 if (computeQ) {
315 //
316 // Accumulate transformation in the matrix Q.
317 //
318 larfx(Right, u1, tau1, Q(_,_(j1,j1+3-1)), work);
319 larfx(Right, u2, tau2, Q(_,_(j2,j2+3-1)), work);
320 }
321 }
322
323 if (n2==2) {
324 //
325 // Standardize new 2-by-2 block T11
326 //
327 lanv2(T(j1,j1), T(j1,j2), T(j2,j1), T(j2,j2),
328 wr1, wi1, wr2, wi2, cs, sn);
329 blas::rot(T(j1,_(j1+2,n)), T(j2,_(j1+2,n)), cs, sn);
330 blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
331 if (computeQ) {
332 blas::rot(Q(_,j1), Q(_,j2), cs, sn);
333 }
334 }
335
336 if (n1==2) {
337 //
338 // Standardize new 2-by-2 block T22
339 //
340 const IndexType j3 = j1 + n2;
341 const IndexType j4 = j3 + 1;
342 lanv2(T(j3,j3), T(j3,j4), T(j4,j3), T(j4,j4),
343 wr1, wi1, wr2, wi2, cs, sn);
344 if (j3+2<=n) {
345 blas::rot(T(j3,_(j3+2,n)), T(j4,_(j3+2,n)), cs, sn);
346 }
347 blas::rot(T(_(1,j3-1),j3), T(_(1,j3-1),j4), cs, sn);
348 if (computeQ) {
349 blas::rot(Q(_,j3), Q(_,j4), cs, sn);
350 }
351 }
352 }
353 return 0;
354 }
355
356 //== interface for native lapack ===============================================
357
358 #ifdef CHECK_CXXLAPACK
359
360 template <typename MT, typename MQ, typename IndexType, typename VWORK>
361 IndexType
362 laexc_native(bool computeQ,
363 GeMatrix<MT> &T,
364 GeMatrix<MQ> &Q,
365 IndexType j1,
366 IndexType n1,
367 IndexType n2,
368 DenseVector<VWORK> &work)
369 {
370 typedef typename GeMatrix<MT>::ElementType ElementType;
371
372 const LOGICAL WANTQ = computeQ;
373 const INTEGER N = T.numRows();
374 const INTEGER LDT = T.leadingDimension();
375 const INTEGER LDQ = Q.leadingDimension();
376 const INTEGER J1 = j1;
377 const INTEGER N1 = n1;
378 const INTEGER N2 = n2;
379 INTEGER INFO;
380
381 if (IsSame<ElementType,DOUBLE>::value) {
382 LAPACK_IMPL(dlaexc)(&WANTQ,
383 &N,
384 T.data(),
385 &LDT,
386 Q.data(),
387 &LDQ,
388 &J1,
389 &N1,
390 &N2,
391 work.data(),
392 &INFO);
393 } else {
394 ASSERT(0);
395 }
396 ASSERT(INFO>=0);
397
398 return INFO;
399 }
400
401 #endif // CHECK_CXXLAPACK
402
403 //== public interface ==========================================================
404
405 template <typename MT, typename MQ, typename IndexType, typename VWORK>
406 IndexType
407 laexc(bool computeQ,
408 GeMatrix<MT> &T,
409 GeMatrix<MQ> &Q,
410 IndexType j1,
411 IndexType n1,
412 IndexType n2,
413 DenseVector<VWORK> &work)
414 {
415 LAPACK_DEBUG_OUT("BEGIN: laexc");
416
417 //
418 // Test the input parameters
419 //
420 # ifndef NDEBUG
421 ASSERT(T.firstRow()==1);
422 ASSERT(T.firstCol()==1);
423 ASSERT(T.numRows()==T.numCols());
424
425 const IndexType n = T.numRows();
426
427 if (computeQ) {
428 ASSERT(Q.firstRow()==1);
429 ASSERT(Q.firstCol()==1);
430 ASSERT(Q.numRows()==Q.numCols());
431 ASSERT(Q.numRows()==n);
432 }
433
434 ASSERT(j1>=1);
435 ASSERT((n1==0) || (n1==1) || (n1==2));
436 ASSERT((n2==0) || (n2==1) || (n2==2));
437
438 ASSERT(work.firstIndex()==1);
439 ASSERT(work.length()==n);
440 # endif
441
442 # ifdef CHECK_CXXLAPACK
443 //
444 // Make copies of output arguments
445 //
446 typename GeMatrix<MT>::NoView T_org = T;
447 typename GeMatrix<MQ>::NoView Q_org = Q;
448 typename DenseVector<VWORK>::NoView work_org = work;
449 # endif
450
451 //
452 // Call implementation
453 //
454 IndexType info = laexc_generic(computeQ, T, Q, j1, n1, n2, work);
455
456 # ifdef CHECK_CXXLAPACK
457 //
458 // Make copies of results computed by the generic implementation
459 //
460 typename GeMatrix<MT>::NoView T_generic = T;
461 typename GeMatrix<MQ>::NoView Q_generic = Q;
462 typename DenseVector<VWORK>::NoView work_generic = work;
463
464 //
465 // restore output arguments
466 //
467 T = T_org;
468 Q = Q_org;
469 work = work_org;
470
471 //
472 // Compare generic results with results from the native implementation
473 //
474
475 IndexType _info = laexc_native(computeQ, T, Q, j1, n1, n2, work);
476
477 bool failed = false;
478 if (! isIdentical(T_generic, T, "T_generic", "T")) {
479 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
480 std::cerr << "F77LAPACK: T = " << T << std::endl;
481 failed = true;
482 }
483
484 if (! isIdentical(Q_generic, Q, "Q_generic", "Q")) {
485 std::cerr << "CXXLAPACK: Q_generic = " << Q_generic << std::endl;
486 std::cerr << "F77LAPACK: Q = " << Q << std::endl;
487 failed = true;
488 }
489
490 if (! isIdentical(work_generic, work, "work_generic", "work")) {
491 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
492 std::cerr << "F77LAPACK: work = " << work << std::endl;
493 failed = true;
494 }
495
496 if (! isIdentical(info, _info, " info", "_info")) {
497 std::cerr << "CXXLAPACK: info = " << info << std::endl;
498 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
499 failed = true;
500 }
501
502 if (failed) {
503 ASSERT(0);
504 }
505 # endif
506
507 LAPACK_DEBUG_OUT("END: laexc");
508
509 return info;
510 }
511
512 //-- forwarding ----------------------------------------------------------------
513 template <typename MT, typename MQ, typename IndexType, typename VWORK>
514 IndexType
515 laexc(bool computeQ,
516 MT &&T,
517 MQ &&Q,
518 IndexType j1,
519 IndexType n1,
520 IndexType n2,
521 VWORK &&work)
522 {
523 CHECKPOINT_ENTER;
524 const IndexType info = laexc(computeQ, T, Q, j1, n1, n2, work);
525 CHECKPOINT_LEAVE;
526
527 return info;
528 }
529
530 } } // namespace lapack, flens
531
532 #endif // FLENS_LAPACK_EIG_LAEVC_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,
36 $ INFO )
37 *
38 * -- LAPACK auxiliary routine (version 3.2.2) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * June 2010
42 */
43
44 #ifndef FLENS_LAPACK_EIG_LAEXC_TCC
45 #define FLENS_LAPACK_EIG_LAEXC_TCC 1
46
47 #include <flens/aux/aux.h>
48 #include <flens/blas/blas.h>
49 #include <flens/lapack/lapack.h>
50
51 namespace flens { namespace lapack {
52
53 //== generic lapack implementation =============================================
54
55 template <typename MT, typename MQ, typename IndexType, typename VWORK>
56 IndexType
57 laexc_generic(bool computeQ,
58 GeMatrix<MT> &T,
59 GeMatrix<MQ> &Q,
60 IndexType j1,
61 IndexType n1,
62 IndexType n2,
63 DenseVector<VWORK> &work)
64 {
65 using std::abs;
66
67 typedef typename GeMatrix<MT>::View GeMatrixView;
68 typedef typename GeMatrix<MT>::VectorView DenseVectorView;
69 typedef typename GeMatrix<MT>::ElementType ElementType;
70
71 const ElementType Zero(0), One(1), Ten(10);
72
73 const IndexType n = T.numRows();
74
75 const Underscore<IndexType> _;
76
77 //
78 // .. Local Arrays ..
79 //
80 ElementType _dData[16], _xData[4];
81 GeMatrixView D = typename GeMatrixView::Engine(4, 4, _dData, 4);
82 GeMatrixView X = typename GeMatrixView::Engine(2, 2, _xData, 2);
83
84 ElementType _uData[3], _u1Data[3], _u2Data[3];
85 DenseVectorView u = typename DenseVectorView::Engine(3, _uData);
86 DenseVectorView u1 = typename DenseVectorView::Engine(3, _u1Data);
87 DenseVectorView u2 = typename DenseVectorView::Engine(3, _u2Data);
88 //
89 // Quick return if possible
90 //
91 if (n==0 || n1==0 || n2==0) {
92 return 0;
93 }
94 if (j1+n1>n) {
95 return 0;
96 }
97
98 const IndexType j2 = j1 + 1;
99 const IndexType j3 = j1 + 2;
100 const IndexType j4 = j1 + 3;
101
102 ElementType t11, t22, t33;
103
104 if (n1==1 && n2==1) {
105 //
106 // Swap two 1-by-1 blocks.
107 //
108 t11 = T(j1,j1);
109 t22 = T(j2,j2);
110 //
111 // Determine the transformation to perform the interchange.
112 //
113 ElementType cs, sn, temp;
114 lartg(T(j1,j2), t22-t11, cs, sn, temp);
115 //
116 // Apply transformation to the matrix T.
117 //
118 if (j3<=n) {
119 blas::rot(T(j1,_(j3,n)), T(j2,_(j3,n)), cs, sn);
120 }
121 blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
122
123 T(j1,j1) = t22;
124 T(j2,j2) = t11;
125
126 if (computeQ) {
127 //
128 // Accumulate transformation in the matrix Q.
129 //
130 blas::rot(Q(_,j1), Q(_,j2), cs, sn);
131 }
132
133 } else {
134 //
135 // Swapping involves at least one 2-by-2 block.
136 //
137 // Copy the diagonal block of order N1+N2 to the local array D
138 // and compute its norm.
139 //
140 const IndexType nd = n1 + n2;
141 auto _D = D(_(1,nd),_(1,nd));
142
143 _D = T(_(j1,j1+nd-1),_(j1,j1+nd-1));
144 ElementType normD = lan(MaximumNorm, _D);
145
146 ElementType cs, sn, wr1, wr2, wi1, wi2;
147
148 ElementType scale, normX, tau, tau1, tau2;
149 //
150 // Compute machine-dependent threshold for test for accepting
151 // swap.
152 //
153 const ElementType eps = lamch<ElementType>(Precision);
154 const ElementType smallNum = lamch<ElementType>(SafeMin) / eps;
155 const ElementType thresh = max(Ten*eps*normD, smallNum);
156 //
157 // Solve T11*X - X*T22 = scale*T12 for X.
158 //
159 const auto T11 = D(_(1,n1),_(1,n1));
160 const auto T12 = D(_(1,n1),_(n1+1,nd));
161 const auto T22 = D(_(n1+1,nd),_(n1+1,nd));
162
163 auto _X = X(_(1,n1),_(1,n2));
164
165 lasy2(false, false, IndexType(-1), T11, T22, T12, scale, _X, normX);
166 //
167 // Swap the adjacent diagonal blocks.
168 //
169 const IndexType k = n1 + n1 + n2 - 3;
170
171 switch (k) {
172 //
173 // N1 = 1, N2 = 2: generate elementary reflector H so that:
174 //
175 // ( scale, X11, X12 ) H = ( 0, 0, * )
176 //
177 case 1:
178 u(1) = scale;
179 u(2) = X(1,1);
180 u(3) = X(1,2);
181 larfg(IndexType(3), u(3), u(_(1,2)), tau);
182 u(3) = One;
183 t11 = T(j1,j1);
184 //
185 // Perform swap provisionally on diagonal block in D.
186 //
187 larfx(Left, u, tau, _D, work(_(1,3)));
188 larfx(Right, u, tau, _D, work(_(1,3)));
189 //
190 // Test whether to reject swap.
191 //
192 if (max(abs(D(3,1)), abs(D(3,2)), abs(D(3,3)-t11))>thresh) {
193 //
194 // Return 1 if swap was rejected.
195 //
196 return 1;
197 }
198 //
199 // Accept swap: apply transformation to the entire matrix T.
200 //
201 larfx(Left, u, tau, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
202 larfx(Right, u, tau, T(_(1,j2),_(j1,j1+3-1)), work(_(1,j2)));
203
204 T(j3,j1) = Zero;
205 T(j3,j2) = Zero;
206 T(j3,j3) = t11;
207
208 if (computeQ) {
209 //
210 // Accumulate transformation in the matrix Q.
211 //
212 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
213 }
214 break;
215
216 case 2:
217 //
218 // N1 = 2, N2 = 1: generate elementary reflector H so that:
219 //
220 // H ( -X11 ) = ( * )
221 // ( -X21 ) = ( 0 )
222 // ( scale ) = ( 0 )
223 //
224 u(1) = -X(1,1);
225 u(2) = -X(2,1);
226 u(3) = scale;
227 larfg(IndexType(3), u(1), u(_(2,3)), tau);
228 u(1) = One;
229 t33 = T(j3,j3);
230 //
231 // Perform swap provisionally on diagonal block in D.
232 //
233 larfx(Left, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
234 larfx(Right, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
235 //
236 // Test whether to reject swap.
237 //
238 if (max(abs(D(2,1)), abs(D(3,1)), abs(D(1,1)-t33))>thresh) {
239 //
240 // Return 1 if swap was rejected.
241 //
242 return 1;
243 }
244 //
245 // Accept swap: apply transformation to the entire matrix T.
246 //
247 larfx(Right, u, tau, T(_(1,j3),_(j1, j1+3-1)), work(_(1,j3)));
248 larfx(Left, u, tau, T(_(j1,j1+3-1),_(j2,n)), work(_(1,n-j1)));
249
250 T(j1,j1) = t33;
251 T(j2,j1) = Zero;
252 T(j3,j1) = Zero;
253
254 if (computeQ) {
255 //
256 // Accumulate transformation in the matrix Q.
257 //
258 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
259 }
260 break;
261
262 case 3:
263 //
264 // N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
265 // that:
266 //
267 // H(2) H(1) ( -X11 -X12 ) = ( * * )
268 // ( -X21 -X22 ) ( 0 * )
269 // ( scale 0 ) ( 0 0 )
270 // ( 0 scale ) ( 0 0 )
271 //
272 u1(1) = -X(1,1);
273 u1(2) = -X(2,1);
274 u1(3) = scale;
275 larfg(IndexType(3), u1(1), u1(_(2,3)), tau1);
276 u1(1) = One;
277
278 const ElementType temp = -tau1*(X(1,2)+u1(2)*X(2,2));
279 u2(1) = -temp*u1(2) - X(2,2);
280 u2(2) = -temp*u1(3);
281 u2(3) = scale;
282 larfg(IndexType(3), u2(1), u2(_(2,3)), tau2);
283 u2(1) = One;
284 //
285 // Perform swap provisionally on diagonal block in D.
286 //
287 larfx(Left, u1, tau1, D(_(1,3),_(1,4)), work(_(1,4)));
288 larfx(Right, u1, tau1, D(_(1,4),_(1,3)), work(_(1,4)));
289 larfx(Left, u2, tau2, D(_(2,4),_(1,4)), work(_(1,4)));
290 larfx(Right, u2, tau2, D(_(1,4),_(2,4)), work(_(1,4)));
291 //
292 // Test whether to reject swap.
293 //
294 if (max(abs(D(3,1)), abs(D(3,2)), abs(D(4,1)), abs(D(4,2)))>thresh)
295 {
296 //
297 // Return 1 if swap was rejected.
298 //
299 return 1;
300 }
301 //
302 // Accept swap: apply transformation to the entire matrix T.
303 //
304 larfx(Left, u1, tau1, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
305 larfx(Right, u1, tau1, T(_(1,j4),_(j1,j1+3-1)), work(_(1,j4)));
306 larfx(Left, u2, tau2, T(_(j2,j2+3-1),_(j1,n)), work(_(1,n-j1+1)));
307 larfx(Right, u2, tau2, T(_(1,j4),_(j2,j2+3-1)), work(_(1,j4)));
308
309 T(j3,j1) = Zero;
310 T(j3,j2) = Zero;
311 T(j4,j1) = Zero;
312 T(j4,j2) = Zero;
313
314 if (computeQ) {
315 //
316 // Accumulate transformation in the matrix Q.
317 //
318 larfx(Right, u1, tau1, Q(_,_(j1,j1+3-1)), work);
319 larfx(Right, u2, tau2, Q(_,_(j2,j2+3-1)), work);
320 }
321 }
322
323 if (n2==2) {
324 //
325 // Standardize new 2-by-2 block T11
326 //
327 lanv2(T(j1,j1), T(j1,j2), T(j2,j1), T(j2,j2),
328 wr1, wi1, wr2, wi2, cs, sn);
329 blas::rot(T(j1,_(j1+2,n)), T(j2,_(j1+2,n)), cs, sn);
330 blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
331 if (computeQ) {
332 blas::rot(Q(_,j1), Q(_,j2), cs, sn);
333 }
334 }
335
336 if (n1==2) {
337 //
338 // Standardize new 2-by-2 block T22
339 //
340 const IndexType j3 = j1 + n2;
341 const IndexType j4 = j3 + 1;
342 lanv2(T(j3,j3), T(j3,j4), T(j4,j3), T(j4,j4),
343 wr1, wi1, wr2, wi2, cs, sn);
344 if (j3+2<=n) {
345 blas::rot(T(j3,_(j3+2,n)), T(j4,_(j3+2,n)), cs, sn);
346 }
347 blas::rot(T(_(1,j3-1),j3), T(_(1,j3-1),j4), cs, sn);
348 if (computeQ) {
349 blas::rot(Q(_,j3), Q(_,j4), cs, sn);
350 }
351 }
352 }
353 return 0;
354 }
355
356 //== interface for native lapack ===============================================
357
358 #ifdef CHECK_CXXLAPACK
359
360 template <typename MT, typename MQ, typename IndexType, typename VWORK>
361 IndexType
362 laexc_native(bool computeQ,
363 GeMatrix<MT> &T,
364 GeMatrix<MQ> &Q,
365 IndexType j1,
366 IndexType n1,
367 IndexType n2,
368 DenseVector<VWORK> &work)
369 {
370 typedef typename GeMatrix<MT>::ElementType ElementType;
371
372 const LOGICAL WANTQ = computeQ;
373 const INTEGER N = T.numRows();
374 const INTEGER LDT = T.leadingDimension();
375 const INTEGER LDQ = Q.leadingDimension();
376 const INTEGER J1 = j1;
377 const INTEGER N1 = n1;
378 const INTEGER N2 = n2;
379 INTEGER INFO;
380
381 if (IsSame<ElementType,DOUBLE>::value) {
382 LAPACK_IMPL(dlaexc)(&WANTQ,
383 &N,
384 T.data(),
385 &LDT,
386 Q.data(),
387 &LDQ,
388 &J1,
389 &N1,
390 &N2,
391 work.data(),
392 &INFO);
393 } else {
394 ASSERT(0);
395 }
396 ASSERT(INFO>=0);
397
398 return INFO;
399 }
400
401 #endif // CHECK_CXXLAPACK
402
403 //== public interface ==========================================================
404
405 template <typename MT, typename MQ, typename IndexType, typename VWORK>
406 IndexType
407 laexc(bool computeQ,
408 GeMatrix<MT> &T,
409 GeMatrix<MQ> &Q,
410 IndexType j1,
411 IndexType n1,
412 IndexType n2,
413 DenseVector<VWORK> &work)
414 {
415 LAPACK_DEBUG_OUT("BEGIN: laexc");
416
417 //
418 // Test the input parameters
419 //
420 # ifndef NDEBUG
421 ASSERT(T.firstRow()==1);
422 ASSERT(T.firstCol()==1);
423 ASSERT(T.numRows()==T.numCols());
424
425 const IndexType n = T.numRows();
426
427 if (computeQ) {
428 ASSERT(Q.firstRow()==1);
429 ASSERT(Q.firstCol()==1);
430 ASSERT(Q.numRows()==Q.numCols());
431 ASSERT(Q.numRows()==n);
432 }
433
434 ASSERT(j1>=1);
435 ASSERT((n1==0) || (n1==1) || (n1==2));
436 ASSERT((n2==0) || (n2==1) || (n2==2));
437
438 ASSERT(work.firstIndex()==1);
439 ASSERT(work.length()==n);
440 # endif
441
442 # ifdef CHECK_CXXLAPACK
443 //
444 // Make copies of output arguments
445 //
446 typename GeMatrix<MT>::NoView T_org = T;
447 typename GeMatrix<MQ>::NoView Q_org = Q;
448 typename DenseVector<VWORK>::NoView work_org = work;
449 # endif
450
451 //
452 // Call implementation
453 //
454 IndexType info = laexc_generic(computeQ, T, Q, j1, n1, n2, work);
455
456 # ifdef CHECK_CXXLAPACK
457 //
458 // Make copies of results computed by the generic implementation
459 //
460 typename GeMatrix<MT>::NoView T_generic = T;
461 typename GeMatrix<MQ>::NoView Q_generic = Q;
462 typename DenseVector<VWORK>::NoView work_generic = work;
463
464 //
465 // restore output arguments
466 //
467 T = T_org;
468 Q = Q_org;
469 work = work_org;
470
471 //
472 // Compare generic results with results from the native implementation
473 //
474
475 IndexType _info = laexc_native(computeQ, T, Q, j1, n1, n2, work);
476
477 bool failed = false;
478 if (! isIdentical(T_generic, T, "T_generic", "T")) {
479 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
480 std::cerr << "F77LAPACK: T = " << T << std::endl;
481 failed = true;
482 }
483
484 if (! isIdentical(Q_generic, Q, "Q_generic", "Q")) {
485 std::cerr << "CXXLAPACK: Q_generic = " << Q_generic << std::endl;
486 std::cerr << "F77LAPACK: Q = " << Q << std::endl;
487 failed = true;
488 }
489
490 if (! isIdentical(work_generic, work, "work_generic", "work")) {
491 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
492 std::cerr << "F77LAPACK: work = " << work << std::endl;
493 failed = true;
494 }
495
496 if (! isIdentical(info, _info, " info", "_info")) {
497 std::cerr << "CXXLAPACK: info = " << info << std::endl;
498 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
499 failed = true;
500 }
501
502 if (failed) {
503 ASSERT(0);
504 }
505 # endif
506
507 LAPACK_DEBUG_OUT("END: laexc");
508
509 return info;
510 }
511
512 //-- forwarding ----------------------------------------------------------------
513 template <typename MT, typename MQ, typename IndexType, typename VWORK>
514 IndexType
515 laexc(bool computeQ,
516 MT &&T,
517 MQ &&Q,
518 IndexType j1,
519 IndexType n1,
520 IndexType n2,
521 VWORK &&work)
522 {
523 CHECKPOINT_ENTER;
524 const IndexType info = laexc(computeQ, T, Q, j1, n1, n2, work);
525 CHECKPOINT_LEAVE;
526
527 return info;
528 }
529
530 } } // namespace lapack, flens
531
532 #endif // FLENS_LAPACK_EIG_LAEVC_TCC