1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,
 36       $                   INFO )
 37  *
 38  *  -- LAPACK auxiliary routine (version 3.2.2) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *     June 2010
 42  */
 43 
 44 #ifndef FLENS_LAPACK_EIG_LAEXC_TCC
 45 #define FLENS_LAPACK_EIG_LAEXC_TCC 1
 46 
 47 #include <flens/aux/aux.h>
 48 #include <flens/blas/blas.h>
 49 #include <flens/lapack/lapack.h>
 50 
 51 namespace flens { namespace lapack {
 52 
 53 //== generic lapack implementation =============================================
 54 
 55 template <typename MT, typename MQ, typename IndexType, typename VWORK>
 56 IndexType
 57 laexc_generic(bool                          computeQ,
 58               GeMatrix<MT>                  &T,
 59               GeMatrix<MQ>                  &Q,
 60               IndexType                     j1,
 61               IndexType                     n1,
 62               IndexType                     n2,
 63               DenseVector<VWORK>            &work)
 64 {
 65     using std::abs;
 66 
 67     typedef typename GeMatrix<MT>::View         GeMatrixView;
 68     typedef typename GeMatrix<MT>::VectorView   DenseVectorView;
 69     typedef typename GeMatrix<MT>::ElementType  ElementType;
 70 
 71     const ElementType Zero(0), One(1), Ten(10);
 72 
 73     const IndexType n = T.numRows();
 74 
 75     const Underscore<IndexType>  _;
 76 
 77 //
 78 //    .. Local Arrays ..
 79 //
 80     ElementType _dData[16], _xData[4];
 81     GeMatrixView  D = typename GeMatrixView::Engine(44, _dData, 4);
 82     GeMatrixView  X = typename GeMatrixView::Engine(22, _xData, 2);
 83 
 84     ElementType _uData[3], _u1Data[3], _u2Data[3];
 85     DenseVectorView u  = typename DenseVectorView::Engine(3, _uData);
 86     DenseVectorView u1 = typename DenseVectorView::Engine(3, _u1Data);
 87     DenseVectorView u2 = typename DenseVectorView::Engine(3, _u2Data);
 88 //
 89 //  Quick return if possible
 90 //
 91     if (n==0 || n1==0 || n2==0) {
 92         return 0;
 93     }
 94     if (j1+n1>n) {
 95         return 0;
 96     }
 97 
 98     const IndexType j2 = j1 + 1;
 99     const IndexType j3 = j1 + 2;
100     const IndexType j4 = j1 + 3;
101 
102     ElementType t11, t22, t33;
103 
104     if (n1==1 && n2==1) {
105 //
106 //      Swap two 1-by-1 blocks.
107 //
108         t11 = T(j1,j1);
109         t22 = T(j2,j2);
110 //
111 //      Determine the transformation to perform the interchange.
112 //
113         ElementType cs, sn, temp;
114         lartg(T(j1,j2), t22-t11, cs, sn, temp);
115 //
116 //      Apply transformation to the matrix T.
117 //
118         if (j3<=n) {
119             blas::rot(T(j1,_(j3,n)), T(j2,_(j3,n)), cs, sn);
120         }
121         blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
122 
123         T(j1,j1) = t22;
124         T(j2,j2) = t11;
125 
126         if (computeQ) {
127 //
128 //          Accumulate transformation in the matrix Q.
129 //
130             blas::rot(Q(_,j1), Q(_,j2), cs, sn);
131         }
132 
133     } else {
134 //
135 //      Swapping involves at least one 2-by-2 block.
136 //
137 //      Copy the diagonal block of order N1+N2 to the local array D
138 //      and compute its norm.
139 //
140         const IndexType nd = n1 + n2;
141         auto _D = D(_(1,nd),_(1,nd));
142 
143         _D = T(_(j1,j1+nd-1),_(j1,j1+nd-1));
144         ElementType normD = lan(MaximumNorm, _D);
145 
146         ElementType cs, sn, wr1, wr2, wi1, wi2;
147 
148         ElementType scale, normX, tau, tau1, tau2;
149 //
150 //      Compute machine-dependent threshold for test for accepting
151 //      swap.
152 //
153         const ElementType eps = lamch<ElementType>(Precision);
154         const ElementType smallNum = lamch<ElementType>(SafeMin) / eps;
155         const ElementType thresh = max(Ten*eps*normD, smallNum);
156 //
157 //      Solve T11*X - X*T22 = scale*T12 for X.
158 //
159         const auto T11 = D(_(1,n1),_(1,n1));
160         const auto T12 = D(_(1,n1),_(n1+1,nd));
161         const auto T22 = D(_(n1+1,nd),_(n1+1,nd));
162 
163         auto _X = X(_(1,n1),_(1,n2));
164 
165         lasy2(falsefalse, IndexType(-1), T11, T22, T12, scale, _X, normX);
166 //
167 //      Swap the adjacent diagonal blocks.
168 //
169         const IndexType k = n1 + n1 + n2 - 3;
170 
171         switch (k) {
172 //
173 //          N1 = 1, N2 = 2: generate elementary reflector H so that:
174 //
175 //          ( scale, X11, X12 ) H = ( 0, 0, * )
176 //
177         case 1:
178             u(1) = scale;
179             u(2) = X(1,1);
180             u(3) = X(1,2);
181             larfg(IndexType(3), u(3), u(_(1,2)), tau);
182             u(3) = One;
183             t11 = T(j1,j1);
184 //
185 //          Perform swap provisionally on diagonal block in D.
186 //
187             larfx(Left, u, tau, _D, work(_(1,3)));
188             larfx(Right, u, tau, _D, work(_(1,3)));
189 //
190 //          Test whether to reject swap.
191 //
192             if (max(abs(D(3,1)), abs(D(3,2)), abs(D(3,3)-t11))>thresh) {
193 //
194 //              Return 1 if swap was rejected.
195 //
196                 return 1;
197             }
198 //
199 //          Accept swap: apply transformation to the entire matrix T.
200 //
201             larfx(Left, u, tau, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
202             larfx(Right, u, tau, T(_(1,j2),_(j1,j1+3-1)), work(_(1,j2)));
203 
204             T(j3,j1) = Zero;
205             T(j3,j2) = Zero;
206             T(j3,j3) = t11;
207 
208             if (computeQ) {
209 //
210 //              Accumulate transformation in the matrix Q.
211 //
212                 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
213             }
214             break;
215 
216         case 2:
217 //
218 //          N1 = 2, N2 = 1: generate elementary reflector H so that:
219 //
220 //          H (  -X11 ) = ( * )
221 //            (  -X21 ) = ( 0 )
222 //            ( scale ) = ( 0 )
223 //
224             u(1) = -X(1,1);
225             u(2) = -X(2,1);
226             u(3) = scale;
227             larfg(IndexType(3), u(1), u(_(2,3)), tau);
228             u(1) = One;
229             t33 = T(j3,j3);
230 //
231 //          Perform swap provisionally on diagonal block in D.
232 //
233             larfx(Left, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
234             larfx(Right, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));
235 //
236 //          Test whether to reject swap.
237 //
238             if (max(abs(D(2,1)), abs(D(3,1)), abs(D(1,1)-t33))>thresh) {
239 //
240 //              Return 1 if swap was rejected.
241 //
242                 return 1;
243             }
244 //
245 //          Accept swap: apply transformation to the entire matrix T.
246 //
247             larfx(Right, u, tau, T(_(1,j3),_(j1, j1+3-1)), work(_(1,j3)));
248             larfx(Left, u, tau, T(_(j1,j1+3-1),_(j2,n)), work(_(1,n-j1)));
249 
250             T(j1,j1) = t33;
251             T(j2,j1) = Zero;
252             T(j3,j1) = Zero;
253 
254             if (computeQ) {
255 //
256 //              Accumulate transformation in the matrix Q.
257 //
258                 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);
259             }
260             break;
261 
262         case 3:
263 //
264 //          N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
265 //          that:
266 //
267 //          H(2) H(1) (  -X11  -X12 ) = (  *  * )
268 //                    (  -X21  -X22 )   (  0  * )
269 //                    ( scale    0  )   (  0  0 )
270 //                    (    0  scale )   (  0  0 )
271 //
272             u1(1) = -X(1,1);
273             u1(2) = -X(2,1);
274             u1(3) = scale;
275             larfg(IndexType(3), u1(1), u1(_(2,3)), tau1);
276             u1(1) = One;
277 
278             const ElementType temp = -tau1*(X(1,2)+u1(2)*X(2,2));
279             u2(1) = -temp*u1(2) - X(2,2);
280             u2(2) = -temp*u1(3);
281             u2(3) = scale;
282             larfg(IndexType(3), u2(1), u2(_(2,3)), tau2);
283             u2(1) = One;
284 //
285 //          Perform swap provisionally on diagonal block in D.
286 //
287             larfx(Left,  u1, tau1, D(_(1,3),_(1,4)), work(_(1,4)));
288             larfx(Right, u1, tau1, D(_(1,4),_(1,3)), work(_(1,4)));
289             larfx(Left,  u2, tau2, D(_(2,4),_(1,4)), work(_(1,4)));
290             larfx(Right, u2, tau2, D(_(1,4),_(2,4)), work(_(1,4)));
291 //
292 //          Test whether to reject swap.
293 //
294             if (max(abs(D(3,1)), abs(D(3,2)), abs(D(4,1)), abs(D(4,2)))>thresh)
295             {
296 //
297 //              Return 1 if swap was rejected.
298 //
299                 return 1;
300             }
301 //
302 //          Accept swap: apply transformation to the entire matrix T.
303 //
304             larfx(Left,  u1, tau1, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));
305             larfx(Right, u1, tau1, T(_(1,j4),_(j1,j1+3-1)), work(_(1,j4)));
306             larfx(Left,  u2, tau2, T(_(j2,j2+3-1),_(j1,n)), work(_(1,n-j1+1)));
307             larfx(Right, u2, tau2, T(_(1,j4),_(j2,j2+3-1)), work(_(1,j4)));
308 
309             T(j3,j1) = Zero;
310             T(j3,j2) = Zero;
311             T(j4,j1) = Zero;
312             T(j4,j2) = Zero;
313 
314             if (computeQ) {
315 //
316 //              Accumulate transformation in the matrix Q.
317 //
318                 larfx(Right, u1, tau1, Q(_,_(j1,j1+3-1)), work);
319                 larfx(Right, u2, tau2, Q(_,_(j2,j2+3-1)), work);
320             }
321         }
322 
323         if (n2==2) {
324 //
325 //          Standardize new 2-by-2 block T11
326 //
327             lanv2(T(j1,j1), T(j1,j2), T(j2,j1), T(j2,j2),
328                   wr1, wi1, wr2, wi2, cs, sn);
329             blas::rot(T(j1,_(j1+2,n)), T(j2,_(j1+2,n)), cs, sn);
330             blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);
331             if (computeQ) {
332                 blas::rot(Q(_,j1), Q(_,j2), cs, sn);
333             }
334         }
335 
336         if (n1==2) {
337 //
338 //          Standardize new 2-by-2 block T22
339 //
340             const IndexType j3 = j1 + n2;
341             const IndexType j4 = j3 + 1;
342             lanv2(T(j3,j3), T(j3,j4), T(j4,j3), T(j4,j4),
343                   wr1, wi1, wr2, wi2, cs, sn);
344             if (j3+2<=n) {
345                 blas::rot(T(j3,_(j3+2,n)), T(j4,_(j3+2,n)), cs, sn);
346             }
347             blas::rot(T(_(1,j3-1),j3), T(_(1,j3-1),j4), cs, sn);
348             if (computeQ) {
349                 blas::rot(Q(_,j3), Q(_,j4), cs, sn);
350             }
351         }
352     }
353     return 0;
354 }
355 
356 //== interface for native lapack ===============================================
357 
358 #ifdef CHECK_CXXLAPACK
359 
360 template <typename MT, typename MQ, typename IndexType, typename VWORK>
361 IndexType
362 laexc_native(bool                          computeQ,
363              GeMatrix<MT>                  &T,
364              GeMatrix<MQ>                  &Q,
365              IndexType                     j1,
366              IndexType                     n1,
367              IndexType                     n2,
368              DenseVector<VWORK>            &work)
369 {
370     typedef typename GeMatrix<MT>::ElementType ElementType;
371 
372     const LOGICAL   WANTQ  = computeQ;
373     const INTEGER   N      = T.numRows();
374     const INTEGER   LDT    = T.leadingDimension();
375     const INTEGER   LDQ    = Q.leadingDimension();
376     const INTEGER   J1     = j1;
377     const INTEGER   N1     = n1;
378     const INTEGER   N2     = n2;
379     INTEGER         INFO;
380 
381     if (IsSame<ElementType,DOUBLE>::value) {
382         LAPACK_IMPL(dlaexc)(&WANTQ,
383                             &N,
384                             T.data(),
385                             &LDT,
386                             Q.data(),
387                             &LDQ,
388                             &J1,
389                             &N1,
390                             &N2,
391                             work.data(),
392                             &INFO);
393     } else {
394         ASSERT(0);
395     }
396     ASSERT(INFO>=0);
397 
398     return INFO;
399 }
400 
401 #endif // CHECK_CXXLAPACK
402 
403 //== public interface ==========================================================
404 
405 template <typename MT, typename MQ, typename IndexType, typename VWORK>
406 IndexType
407 laexc(bool                          computeQ,
408       GeMatrix<MT>                  &T,
409       GeMatrix<MQ>                  &Q,
410       IndexType                     j1,
411       IndexType                     n1,
412       IndexType                     n2,
413       DenseVector<VWORK>            &work)
414 {
415     LAPACK_DEBUG_OUT("BEGIN: laexc");
416 
417 //
418 //  Test the input parameters
419 //
420 #   ifndef NDEBUG
421     ASSERT(T.firstRow()==1);
422     ASSERT(T.firstCol()==1);
423     ASSERT(T.numRows()==T.numCols());
424 
425     const IndexType n = T.numRows();
426 
427     if (computeQ) {
428         ASSERT(Q.firstRow()==1);
429         ASSERT(Q.firstCol()==1);
430         ASSERT(Q.numRows()==Q.numCols());
431         ASSERT(Q.numRows()==n);
432     }
433 
434     ASSERT(j1>=1);
435     ASSERT((n1==0) || (n1==1) || (n1==2));
436     ASSERT((n2==0) || (n2==1) || (n2==2));
437 
438     ASSERT(work.firstIndex()==1);
439     ASSERT(work.length()==n);
440 #   endif
441 
442 #   ifdef CHECK_CXXLAPACK
443 //
444 //  Make copies of output arguments
445 //
446     typename GeMatrix<MT>::NoView           T_org      = T;
447     typename GeMatrix<MQ>::NoView           Q_org      = Q;
448     typename DenseVector<VWORK>::NoView     work_org   = work;
449 #   endif
450 
451 //
452 //  Call implementation
453 //
454     IndexType info = laexc_generic(computeQ, T, Q, j1, n1, n2, work);
455 
456 #   ifdef CHECK_CXXLAPACK
457 //
458 //  Make copies of results computed by the generic implementation
459 //
460     typename GeMatrix<MT>::NoView           T_generic       = T;
461     typename GeMatrix<MQ>::NoView           Q_generic       = Q;
462     typename DenseVector<VWORK>::NoView     work_generic    = work;
463 
464 //
465 //  restore output arguments
466 //
467     T = T_org;
468     Q = Q_org;
469     work = work_org;
470 
471 //
472 //  Compare generic results with results from the native implementation
473 //
474 
475     IndexType _info = laexc_native(computeQ, T, Q, j1, n1, n2, work);
476 
477     bool failed = false;
478     if (! isIdentical(T_generic, T, "T_generic""T")) {
479         std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
480         std::cerr << "F77LAPACK: T = " << T << std::endl;
481         failed = true;
482     }
483 
484     if (! isIdentical(Q_generic, Q, "Q_generic""Q")) {
485         std::cerr << "CXXLAPACK: Q_generic = " << Q_generic << std::endl;
486         std::cerr << "F77LAPACK: Q = " << Q << std::endl;
487         failed = true;
488     }
489 
490     if (! isIdentical(work_generic, work, "work_generic""work")) {
491         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
492         std::cerr << "F77LAPACK: work = " << work << std::endl;
493         failed = true;
494     }
495 
496     if (! isIdentical(info, _info, " info""_info")) {
497         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
498         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
499         failed = true;
500     }
501 
502     if (failed) {
503         ASSERT(0);
504     }
505 #   endif
506 
507     LAPACK_DEBUG_OUT("END: laexc");
508 
509     return info;
510 }
511 
512 //-- forwarding ----------------------------------------------------------------
513 template <typename MT, typename MQ, typename IndexType, typename VWORK>
514 IndexType
515 laexc(bool              computeQ,
516       MT                &&T,
517       MQ                &&Q,
518       IndexType         j1,
519       IndexType         n1,
520       IndexType         n2,
521       VWORK             &&work)
522 {
523     CHECKPOINT_ENTER;
524     const IndexType info = laexc(computeQ, T, Q, j1, n1, n2, work);
525     CHECKPOINT_LEAVE;
526 
527     return info;
528 }
529 
530 } } // namespace lapack, flens
531 
532 #endif // FLENS_LAPACK_EIG_LAEVC_TCC