1 /*
   2  *   Copyright (c) 2011, Michael Lehn
   3  *
   4  *   All rights reserved.
   5  *
   6  *   Redistribution and use in source and binary forms, with or without
   7  *   modification, are permitted provided that the following conditions
   8  *   are met:
   9  *
  10  *   1) Redistributions of source code must retain the above copyright
  11  *      notice, this list of conditions and the following disclaimer.
  12  *   2) Redistributions in binary form must reproduce the above copyright
  13  *      notice, this list of conditions and the following disclaimer in
  14  *      the documentation and/or other materials provided with the
  15  *      distribution.
  16  *   3) Neither the name of the FLENS development group nor the names of
  17  *      its contributors may be used to endorse or promote products derived
  18  *      from this software without specific prior written permission.
  19  *
  20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32 
  33 /* Based on
  34  *
  35        SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
  36       $                   LDC, SCALE, INFO )
  37  *
  38  *  -- LAPACK routine (version 3.3.1) --
  39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  41  *  -- April 2011                                                      --
  42  *
  43  */
  44 
  45 #ifndef FLENS_LAPACK_EIG_TRSYL_TCC
  46 #define FLENS_LAPACK_EIG_TRSYL_TCC 1
  47 
  48 #include <cmath>
  49 
  50 namespace flens { namespace lapack {
  51 
  52 
  53 //== generic lapack implementation =============================================
  54 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
  55 typename GeMatrix<MC>::IndexType
  56 trsyl_generic(Transpose             transA,
  57               Transpose             transB,
  58               ISGN                  iSign,
  59               const GeMatrix<MA>    &A,
  60               const GeMatrix<MB>    &B,
  61               GeMatrix<MC>          &C,
  62               SCALE                 &scale)
  63 {
  64     using std::abs;
  65 
  66     typedef typename GeMatrix<MC>::ElementType  T;
  67     typedef typename GeMatrix<MC>::IndexType    IndexType;
  68 
  69     const Underscore<IndexType> _;
  70     const IndexType m = C.numRows();
  71     const IndexType n = C.numCols();
  72 
  73     const T Zero(0), One(1);
  74 //
  75 //  .. Local Arrays ..
  76 //
  77     T   _vecData[4], _xData[4];
  78     GeMatrixView<T>
  79         VEC    = typename GeMatrixView<T>::Engine(22, _vecData, 2),
  80         X      = typename GeMatrixView<T>::Engine(22, _xData, 2);
  81 
  82 //
  83 //  Decode and Test input parameters
  84 //
  85     const bool noTransA = (transA==NoTrans);
  86     const bool noTransB = (transB==NoTrans);
  87 
  88     IndexType info = 0;
  89 //
  90 //  Quick return if possible
  91 //
  92     scale = One;
  93     if (m==0 || n==0) {
  94         return info;
  95     }
  96 //
  97 //  Set constants to control overflow
  98 //
  99     const T eps = lamch<T>(Precision);
 100     T smallNum = lamch<T>(SafeMin);
 101     T bigNum = One / smallNum;
 102     labad(smallNum, bigNum);
 103     smallNum = smallNum*T(m*n) / eps;
 104     bigNum = One / smallNum;
 105 
 106     const T sMin = max(smallNum,
 107                        eps*lan(MaximumNorm, A),
 108                        eps*lan(MaximumNorm, B));
 109 
 110     const T sign = iSign;
 111 
 112     IndexType   lNext, l1, l2, kNext, k1, k2;
 113 
 114     T           sumL, sumR, scaleC, A11, DA11, DB, xNorm;
 115 
 116     if (noTransA && noTransB) {
 117 //
 118 //      Solve    A*X + ISGN*X*B = scale*C.
 119 //
 120 //      The (K,L)th block of X is determined starting from
 121 //      bottom-left corner column by column by
 122 //
 123 //       A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
 124 //
 125 //      Where
 126 //                 M                         L-1
 127 //       R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
 128 //               I=K+1                       J=1
 129 //
 130 //      Start column loop (index = L)
 131 //      L1 (L2) : column index of the first (first) row of X(K,L).
 132 //
 133         lNext = 1;
 134         for (IndexType l=1; l<=n; ++l) {
 135             if (l<lNext) {
 136                 continue;
 137             }
 138             if (l==n) {
 139                 l1 = l2 = l;
 140             } else {
 141                 if (B(l+1,l)!=Zero) {
 142                     l1 = l;
 143                     l2 = l + 1;
 144                     lNext = l + 2;
 145                 } else {
 146                     l1 = l2 = l;
 147                     lNext = l + 1;
 148                 }
 149             }
 150 //
 151 //          Start row loop (index = K)
 152 //          K1 (K2): row index of the first (last) row of X(K,L).
 153 //
 154             kNext = m;
 155             for (IndexType k=m; k>=1; --k) {
 156                 if (k>kNext) {
 157                     continue;
 158                 }
 159                 if (k==1) {
 160                     k1 = k2 = k;
 161                 } else {
 162                     if (A(k,k-1)!=Zero) {
 163                         k1 = k - 1;
 164                         k2 = k;
 165                         kNext = k - 2;
 166                     } else {
 167                         k1 = k2 = k;
 168                         kNext = k - 1;
 169                     }
 170                 }
 171 
 172                 if (l1==l2 && k1==k2) {
 173 
 174                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
 175                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 176                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 177                     scaleC = One;
 178 
 179                     A11 = A(k1,k1) + sign*B(l1,l1);
 180                     DA11 = abs(A11);
 181                     if (DA11<=sMin) {
 182                         A11 = sMin;
 183                         DA11 = sMin;
 184                         info = 1;
 185                     }
 186                     DB = abs(VEC(1,1));
 187                     if (DA11<One && DB>One) {
 188                         if (DB>bigNum*DA11 ) {
 189                             scaleC = One / DB;
 190                         }
 191                     }
 192                     X(1,1) = (VEC(1,1)*scaleC) / A11;
 193 
 194                     if (scaleC!=One) {
 195                         C     *=  scaleC;
 196                         scale *= scaleC;
 197                     }
 198                     C(k1,l1) = X(1,1);
 199 
 200                 } else if (l1==l2 && k1!=k2) {
 201 
 202                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
 203                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 204                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 205 
 206                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
 207                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
 208                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 209 
 210                     IndexType iErr = laln2(false1, sMin, One,
 211                                            A(_(k1,k1+1),_(k1,k1+1)),
 212                                            One, One, VEC(_,_(1,1)),
 213                                            -sign*B(l1,l1), Zero,
 214                                            X(_,_(1,1)), scaleC, xNorm);
 215                     if (iErr!=0) {
 216                         info = 1;
 217                     }
 218 
 219                     if (scaleC!=One) {
 220                         C     *=  scaleC;
 221                         scale *= scaleC;
 222                     }
 223                     C(k1,l1) = X(1,1);
 224                     C(k2,l1) = X(2,1);
 225 
 226                 } else if (l1!=l2 && k1==k2) {
 227 
 228                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
 229                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 230                     VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
 231 
 232                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
 233                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
 234                     VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
 235 
 236                     IndexType iErr = laln2(true1, sMin, One,
 237                                            B(_(l1,l1+1),_(l1,l1+1)),
 238                                            One, One, VEC(_,_(1,1)),
 239                                            -sign*A(k1,k1), Zero,
 240                                            X(_,_(1,1)), scaleC, xNorm);
 241                     if (iErr!=0) {
 242                         info = 1;
 243                     }
 244 
 245                     if (scaleC!=One) {
 246                         C     *=  scaleC;
 247                         scale *= scaleC;
 248                     }
 249 
 250                     C(k1,l1) = X(1,1);
 251                     C(k1,l2) = X(2,1);
 252 
 253                 } else if (l1!=l2 && k1!=k2) {
 254 
 255                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
 256                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 257                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 258 
 259                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
 260                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
 261                     VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
 262 
 263                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
 264                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
 265                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 266 
 267                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
 268                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
 269                     VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
 270 
 271                     IndexType iErr = lasy2(falsefalse, iSign,
 272                                            A(_(k1,k1+1),_(k1,k1+1)),
 273                                            B(_(l1,l1+1),_(l1,l1+1)),
 274                                            VEC, scaleC, X, xNorm);
 275 
 276                     if (iErr!=0) {
 277                         info = 1;
 278                     }
 279 
 280                     if (scaleC!=One) {
 281                         C     *=  scaleC;
 282                         scale *= scaleC;
 283                     }
 284                     C(k1,l1) = X(1,1);
 285                     C(k1,l2) = X(1,2);
 286                     C(k2,l1) = X(2,1);
 287                     C(k2,l2) = X(2,2);
 288                 }
 289 
 290             }
 291 
 292         }
 293 
 294     } else if(!noTransA && noTransB) {
 295 //
 296 //      Solve    A**T *X + ISGN*X*B = scale*C.
 297 //
 298 //      The (K,L)th block of X is determined starting from
 299 //      upper-left corner column by column by
 300 //
 301 //        A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
 302 //
 303 //      Where
 304 //                  K-1                          L-1
 305 //         R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
 306 //                  I=1                          J=1
 307 //
 308 //      Start column loop (index = L)
 309 //      L1 (L2): column index of the first (last) row of X(K,L)
 310 //
 311         lNext = 1;
 312         for (IndexType l=1; l<=n; ++l) {
 313             if (l<lNext) {
 314                 continue;
 315             }
 316             if (l==n){
 317                 l1 = l2 = l;
 318             } else {
 319                 if (B(l+1,l)!=Zero) {
 320                     l1 = l;
 321                     l2 = l + 1;
 322                     lNext = l + 2;
 323                 } else {
 324                     l1 = l2 = l;
 325                     lNext = l + 1;
 326                 }
 327             }
 328 //
 329 //          Start row loop (index = K)
 330 //          K1 (K2): row index of the first (last) row of X(K,L)
 331 //
 332             kNext = 1;
 333             for (IndexType k=1; k<=m; ++k) {
 334                 if (k<kNext) {
 335                     continue;
 336                 }
 337                 if (k==m) {
 338                     k1 = k2 = k;
 339                 } else {
 340                     if (A(k+1,k)!=Zero) {
 341                         k1 = k;
 342                         k2 = k + 1;
 343                         kNext = k + 2;
 344                     } else {
 345                         k1 = k2 = k;
 346                         kNext = k + 1;
 347                     }
 348                 }
 349 
 350                 if (l1==l2 && k1==k2) {
 351                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 352                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 353                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 354                     scaleC = One;
 355 
 356                     A11 = A(k1,k1) + sign*B(l1,l1);
 357                     DA11 = abs(A11);
 358                     if (DA11<=sMin) {
 359                         A11 = sMin;
 360                         DA11 = sMin;
 361                         info = 1;
 362                     }
 363                     DB = abs(VEC(1,1));
 364                     if (DA11<One && DB>One) {
 365                         if (DB>bigNum*DA11) {
 366                             scaleC = One/DB;
 367                         }
 368                     }
 369                     X(1,1) = (VEC(1,1)*scaleC) / A11;
 370 
 371                     if (scaleC!=One) {
 372                         C     *= scaleC;
 373                         scale *= scaleC;
 374                     }
 375                     C(k1,l1) = X(1,1);
 376 
 377                 } else if (l1==l2 && k1!=k2) {
 378 
 379                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 380                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 381                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 382 
 383                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
 384                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
 385                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 386 
 387                     IndexType iErr = laln2(true1, sMin, One,
 388                                            A(_(k1,k1+1),_(k1,k1+1)),
 389                                            One, One, VEC(_,_(1,1)),
 390                                            -sign*B(l1,l1), Zero,
 391                                            X(_,_(1,1)), scaleC, xNorm);
 392                     if (iErr!=0) {
 393                         info = 1;
 394                     }
 395 
 396                     if (scaleC!=One) {
 397                         C     *= scaleC;
 398                         scale *= scaleC;
 399                     }
 400                     C(k1,l1) = X(1,1);
 401                     C(k2,l1) = X(2,1);
 402 
 403                 } else if (l1!=l2 && k1==k2) {
 404 
 405                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 406                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 407                     VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
 408 
 409                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
 410                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
 411                     VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
 412 
 413                     IndexType iErr = laln2(true1, sMin, One,
 414                                            B(_(l1,l1+1),_(l1,l1+1)),
 415                                            One, One, VEC(_,_(1,1)),
 416                                            -sign*A(k1,k1), Zero,
 417                                            X(_,_(1,1)), scaleC, xNorm);
 418                     if (iErr!=0) {
 419                         info = 1;
 420                     }
 421 
 422                     if (scaleC!=One) {
 423                         C     *= scaleC;
 424                         scale *= scaleC;
 425                     }
 426                     C(k1,l1) = X(1,1);
 427                     C(k1,l2) = X(2,1);
 428 
 429                 } else if (l1!=l2 && k1!=k2) {
 430 
 431                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 432                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
 433                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 434 
 435                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
 436                     sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
 437                     VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
 438 
 439                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
 440                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
 441                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 442 
 443                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
 444                     sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
 445                     VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
 446 
 447                     IndexType iErr = lasy2(truefalse, iSign,
 448                                            A(_(k1,k1+1),_(k1,k1+1)),
 449                                            B(_(l1,l1+1),_(l1,l1+1)),
 450                                            VEC, scaleC, X, xNorm);
 451 
 452                     if (iErr!=0) {
 453                         info = 1;
 454                     }
 455 
 456                     if (scaleC!=One) {
 457                         C     *= scaleC;
 458                         scale *= scaleC;
 459                     }
 460                     C(k1,l1) = X(1,1);
 461                     C(k1,l2) = X(1,2);
 462                     C(k2,l1) = X(2,1);
 463                     C(k2,l2) = X(2,2);
 464                 }
 465 
 466             }
 467         }
 468 
 469     } else if (!noTransA && !noTransB) {
 470 //
 471 //      Solve    A**T*X + ISGN*X*B**T = scale*C.
 472 //
 473 //      The (K,L)th block of X is determined starting from
 474 //      top-right corner column by column by
 475 //
 476 //         A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
 477 //
 478 //      Where
 479 //                   K-1                            N
 480 //          R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
 481 //                   I=1                          J=L+1
 482 //
 483 //      Start column loop (index = L)
 484 //      L1 (L2): column index of the first (last) row of X(K,L)
 485 //
 486         lNext = n;
 487         for (IndexType l=n; l>=1; --l) {
 488             if (l>lNext) {
 489                 continue;
 490             }
 491             if (l==1) {
 492                 l1 = l2 = l;
 493             } else {
 494                 if (B(l,l-1)!=Zero) {
 495                     l1 = l-1;
 496                     l2 = l;
 497                     lNext = l - 2;
 498                 } else {
 499                     l1 = l2 = l;
 500                     lNext = l - 1;
 501                 }
 502             }
 503 //
 504 //          Start row loop (index = K)
 505 //          K1 (K2): row index of the first (last) row of X(K,L)
 506 //
 507             kNext = 1;
 508             for (IndexType k=1; k<=m; ++k) {
 509                 if (k<kNext) {
 510                     continue;
 511                 }
 512                 if (k==m) {
 513                     k1 = k2 = k;
 514                 } else {
 515                     if (A(k+1,k)!=Zero) {
 516                         k1 = k;
 517                         k2 = k + 1;
 518                         kNext = k + 2;
 519                     } else {
 520                         k1 = k2 = k;
 521                         kNext = k + 1;
 522                     }
 523                 }
 524 
 525                 if (l1==l2 && k1==k2) {
 526                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 527                     sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
 528                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 529                     scaleC = One;
 530 
 531                     A11 = A(k1,k1) + sign*B(l1,l1);
 532                     DA11 = abs(A11);
 533                     if (DA11<=sMin) {
 534                         A11 = sMin;
 535                         DA11 = sMin;
 536                         info = 1;
 537                     }
 538                     DB = abs(VEC(1,1));
 539                     if (DA11<One && DB>One) {
 540                         if (DB>bigNum*DA11) {
 541                             scaleC = One / DB;
 542                         }
 543                     }
 544                     X(1,1) = (VEC(1,1)*scaleC) / A11;
 545 
 546                     if (scaleC!=One) {
 547                         C     *= scaleC;
 548                         scale *= scaleC;
 549                     }
 550                     C(k1,l1) = X(1,1);
 551 
 552                 } else if (l1==l2 && k1!=k2) {
 553 
 554                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 555                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 556                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 557 
 558                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
 559                     sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
 560                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 561 
 562                     IndexType iErr = laln2(true1, sMin, One,
 563                                            A(_(k1,k1+1),_(k1,k1+1)),
 564                                            One, One, VEC(_,_(1,1)),
 565                                            -sign*B(l1,l1), Zero,
 566                                            X(_,_(1,1)), scaleC, xNorm);
 567 
 568                     if (iErr!=0) {
 569                         info = 1;
 570                     }
 571 
 572                     if (scaleC!=One) {
 573                         C     *= scaleC;
 574                         scale *= scaleC;
 575                     }
 576                     C(k1,l1) = X(1,1);
 577                     C(k2,l1) = X(2,1);
 578 
 579                 } else if (l1!=l2 && k1==k2) {
 580 
 581                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 582                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 583                     VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
 584 
 585                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
 586                     sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
 587                     VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
 588 
 589                     IndexType iErr = laln2(false1, sMin, One,
 590                                            B(_(l1,l1+1),_(l1,l1+1)),
 591                                            One, One, VEC(_,_(1,1)),
 592                                            -sign*A(k1,k1), Zero,
 593                                            X(_,_(1,1)), scaleC, xNorm);
 594 
 595                     if (iErr!=0) {
 596                         info = 1;
 597                     }
 598 
 599                     if (scaleC!=One) {
 600                         C     *= scaleC;
 601                         scale *= scaleC;
 602                     }
 603                     C(k1,l1) = X(1,1);
 604                     C(k1,l2) = X(2,1);
 605 
 606                 } else if (l1!=l2 && k1!=k2) {
 607 
 608                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
 609                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 610                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 611 
 612                     sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
 613                     sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
 614                     VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
 615 
 616                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
 617                     sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
 618                     VEC(2,1) = C(k2,l1) - ( sumL+sign*sumR);
 619 
 620                     sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
 621                     sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
 622                     VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
 623 
 624                     IndexType iErr = lasy2(truetrue, iSign,
 625                                            A(_(k1,k1+1),_(k1,k1+1)),
 626                                            B(_(l1,l1+1),_(l1,l1+1)),
 627                                            VEC, scaleC, X, xNorm);
 628 
 629                     if (iErr!=0) {
 630                         info = 1;
 631                     }
 632 
 633                     if (scaleC!=One) {
 634                         C     *= scaleC;
 635                         scale *= scaleC;
 636                     }
 637                     C(k1,l1) = X(1,1);
 638                     C(k1,l2) = X(1,2);
 639                     C(k2,l1) = X(2,1);
 640                     C(k2,l2) = X(2,2);
 641                 }
 642 
 643             }
 644         }
 645 
 646     } else if (noTransA && !noTransB) {
 647 //
 648 //      Solve    A*X + ISGN*X*B**T = scale*C.
 649 //
 650 //      The (K,L)th block of X is determined starting from
 651 //      bottom-right corner column by column by
 652 //
 653 //          A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
 654 //
 655 //      Where
 656 //                    M                          N
 657 //          R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
 658 //                  I=K+1                      J=L+1
 659 //
 660 //      Start column loop (index = L)
 661 //      L1 (L2): column index of the first (last) row of X(K,L)
 662 //
 663         lNext = n;
 664         for (IndexType l=n; l>=1; --l) {
 665             if (l>lNext) {
 666                 continue;
 667             }
 668             if (l==1) {
 669                 l1 = l2 = l;
 670             } else {
 671                 if (B(l,l-1)!=Zero) {
 672                     l1 = l - 1;
 673                     l2 = l;
 674                     lNext = l - 2;
 675                 } else {
 676                     l1 = l2 = l;
 677                     lNext = l - 1;
 678                 }
 679             }
 680 //
 681 //          Start row loop (index = K)
 682 //          K1 (K2): row index of the first (last) row of X(K,L)
 683 //
 684             kNext = m;
 685             for (IndexType k=m; k>=1; --k) {
 686                 if (k>kNext) {
 687                     continue;
 688                 }
 689                 if (k==1) {
 690                     k1 = k2 = k;
 691                 } else {
 692                     if (A(k,k-1)!=Zero) {
 693                         k1 = k - 1;
 694                         k2 = k;
 695                         kNext = k - 2;
 696                     } else {
 697                         k1 = k2 = k;
 698                         kNext = k - 1;
 699                     }
 700                 }
 701 
 702                 if (l1==l2 && k1==k2) {
 703                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
 704                     sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
 705                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 706                     scaleC = One;
 707 
 708                     A11 = A(k1,k1) + sign*B(l1,l1);
 709                     DA11 = abs(A11);
 710                     if (DA11<=sMin) {
 711                         A11 = sMin;
 712                         DA11 = sMin;
 713                         info = 1;
 714                     }
 715                     DB = abs(VEC(1,1));
 716                     if (DA11<One && DB>One) {
 717                         if (DB>bigNum*DA11) {
 718                             scaleC = One / DB;
 719                         }
 720                     }
 721                     X(1,1) = (VEC(1,1)*scaleC) / A11;
 722 
 723                     if (scaleC!=One) {
 724                         C     *= scaleC;
 725                         scale *= scaleC;
 726                     }
 727                     C(k1,l1) = X(1,1);
 728 
 729                 } else if (l1==l2 && k1!=k2) {
 730 
 731                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
 732                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 733                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 734 
 735                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
 736                     sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
 737                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 738 
 739                     IndexType iErr = laln2(false1, sMin, One,
 740                                            A(_(k1,k1+1),_(k1,k1+1)),
 741                                            One, One, VEC(_,_(1,1)),
 742                                            -sign*B(l1,l1), Zero,
 743                                            X(_,_(1,1)), scaleC, xNorm);
 744 
 745                     if (iErr!=0) {
 746                         info = 1;
 747                     }
 748 
 749                     if (scaleC!=One) {
 750                         C     *= scaleC;
 751                         scale *= scaleC;
 752                     }
 753                     C(k1,l1) = X(1,1);
 754                     C(k2,l1) = X(2,1);
 755 
 756                 } else if (l1!=l2 && k1==k2) {
 757 
 758                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
 759                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 760                     VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
 761 
 762                     sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
 763                     sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
 764                     VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
 765 
 766                     IndexType iErr = laln2(false1, sMin, One,
 767                                            B(_(l1,l1+1),_(l1,l1+1)),
 768                                            One, One, VEC(_,_(1,1)),
 769                                            -sign*A(k1,k1), Zero,
 770                                            X(_,_(1,1)), scaleC, xNorm);
 771 
 772                     if (iErr!=0) {
 773                         info = 1;
 774                     }
 775 
 776                     if (scaleC!=One) {
 777                         C     *= scaleC;
 778                         scale *= scaleC;
 779                     }
 780                     C(k1,l1) = X(1,1);
 781                     C(k1,l2) = X(2,1);
 782 
 783                 } else if (l1!=l2 && k1!=k2) {
 784 
 785                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
 786                     sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
 787                     VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
 788 
 789                     sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
 790                     sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
 791                     VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
 792 
 793                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
 794                     sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
 795                     VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
 796 
 797                     sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
 798                     sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
 799                     VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
 800 
 801                     IndexType iErr = lasy2(falsetrue, iSign,
 802                                            A(_(k1,k1+1),_(k1,k1+1)),
 803                                            B(_(l1,l1+1),_(l1,l1+1)),
 804                                            VEC, scaleC, X, xNorm);
 805 
 806                     if (iErr!=0) {
 807                         info = 1;
 808                     }
 809 
 810                     if (scaleC!=One) {
 811                         C     *= scaleC;
 812                         scale *= scaleC;
 813                     }
 814                     C(k1,l1) = X(1,1);
 815                     C(k1,l2) = X(1,2);
 816                     C(k2,l1) = X(2,1);
 817                     C(k2,l2) = X(2,2);
 818                 }
 819 
 820             }
 821         }
 822 
 823     }
 824     return info;
 825 }
 826 
 827 //== interface for native lapack ===============================================
 828 
 829 #ifdef CHECK_CXXLAPACK
 830 
 831 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
 832 typename GeMatrix<MC>::IndexType
 833 trsyl_native(Transpose             transA,
 834              Transpose             transB,
 835              ISGN                  iSign,
 836              const GeMatrix<MA>    &A,
 837              const GeMatrix<MB>    &B,
 838              GeMatrix<MC>          &C,
 839              SCALE                 &scale)
 840 {
 841     typedef typename GeMatrix<MC>::ElementType   ElementType;
 842 
 843     const char       TRANA  = getF77LapackChar(transA);
 844     const char       TRANB  = getF77LapackChar(transB);
 845     const INTEGER    _ISGN  = iSign;
 846     const INTEGER    M      = C.numRows();
 847     const INTEGER    N      = C.numCols();
 848     const INTEGER    LDA    = A.leadingDimension();
 849     const INTEGER    LDB    = B.leadingDimension();
 850     const INTEGER    LDC    = C.leadingDimension();
 851     ElementType      _SCALE = scale;
 852     INTEGER          INFO;
 853 
 854     if (IsSame<ElementType,DOUBLE>::value) {
 855         LAPACK_IMPL(dtrsyl)(&TRANA,
 856                             &TRANB,
 857                             &_ISGN,
 858                             &M,
 859                             &N,
 860                             A.data(),
 861                             &LDA,
 862                             B.data(),
 863                             &LDB,
 864                             C.data(),
 865                             &LDC,
 866                             &_SCALE,
 867                             &INFO);
 868     } else {
 869         ASSERT(0);
 870     }
 871     scale = _SCALE;
 872     return INFO;
 873 }
 874 
 875 #endif // CHECK_CXXLAPACK
 876 
 877 //== public interface ==========================================================
 878 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
 879 typename GeMatrix<MC>::IndexType
 880 trsyl(Transpose             transA,
 881       Transpose             transB,
 882       ISGN                  iSign,
 883       const GeMatrix<MA>    &A,
 884       const GeMatrix<MB>    &B,
 885       GeMatrix<MC>          &C,
 886       SCALE                 &scale)
 887 {
 888     LAPACK_DEBUG_OUT("trsyl");
 889 
 890     typedef typename GeMatrix<MC>::IndexType    IndexType;
 891 
 892 //
 893 //  Test the input parameters
 894 //
 895 #   ifndef NDEBUG
 896     ASSERT(iSign==1 || iSign==-1);
 897     ASSERT(A.firstRow()==1);
 898     ASSERT(A.firstCol()==1);
 899     ASSERT(A.numRows()==A.numCols());
 900 
 901     ASSERT(B.firstRow()==1);
 902     ASSERT(B.firstCol()==1);
 903     ASSERT(B.numRows()==B.numCols());
 904 
 905     const IndexType m = A.numRows();
 906     const IndexType n = B.numRows();
 907 
 908     ASSERT(C.firstRow()==1);
 909     ASSERT(C.firstCol()==1);
 910     ASSERT(C.numRows()==m);
 911     ASSERT(C.numCols()==n);
 912 #   endif
 913 
 914 #   ifdef CHECK_CXXLAPACK
 915 //
 916 //  Make copies of output arguments
 917 //
 918     const typename GeMatrix<MC>::NoView   C_org       = C;
 919     SCALE                                 scale_org   = scale;
 920 #   endif
 921 
 922 //
 923 //  Call implementation
 924 //
 925     IndexType info = trsyl_generic(transA, transB, iSign, A, B, C, scale);
 926 
 927 #   ifdef CHECK_CXXLAPACK
 928 //
 929 //  Make copies of results computed by generic implementation
 930 //
 931     const typename GeMatrix<MC>::NoView   C_generic       = C;
 932     SCALE                                 scale_generic   = scale;
 933 //
 934 //  restore output arguments
 935 //
 936     C       = C_org;
 937     scale   = scale_org;
 938 //
 939 //  Compare generic results with results from the native implementation
 940 //
 941     IndexType _info = trsyl_native(transA, transB, iSign, A, B, C, scale);
 942 
 943     bool failed = false;
 944     if (! isIdentical(C_generic, C, "C_generic""C")) {
 945         std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
 946         std::cerr << "F77LAPACK: C = " << C << std::endl;
 947         failed = true;
 948     }
 949     if (! isIdentical(scale_generic, scale, "scale_generic""scale")) {
 950         std::cerr << "CXXLAPACK: scale_generic = "
 951                   << scale_generic << std::endl;
 952         std::cerr << "F77LAPACK: scale = "
 953                   << scale << std::endl;
 954         failed = true;
 955     }
 956     if (! isIdentical(info, _info, "info""_info")) {
 957         std::cerr << "CXXLAPACK: info = " << info << std::endl;
 958         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
 959         failed = true;
 960     }
 961     if (failed) {
 962         std::cerr << "transA = " << transA << std::endl;
 963         std::cerr << "transB = " << transB << std::endl;
 964         std::cerr << "iSign =  " << iSign << std::endl;
 965         std::cerr << "A =  " << A << std::endl;
 966         std::cerr << "B =  " << B << std::endl;
 967 
 968         std::cerr << "error in: trsyl.tcc" << std::endl;
 969         ASSERT(0);
 970     } else {
 971 //      std::cerr << "passed: trsyl.tcc" << std::endl;
 972     }
 973 #   endif
 974 
 975     return info;
 976 }
 977 
 978 //-- forwarding ----------------------------------------------------------------
 979 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
 980 typename MC::IndexType
 981 trsyl(Transpose             transA,
 982       Transpose             transB,
 983       ISGN                  iSign,
 984       const GeMatrix<MA>    &A,
 985       const GeMatrix<MB>    &B,
 986       MC                    &&C,
 987       SCALE                 &&scale)
 988 {
 989     typedef  typename GeMatrix<MC>::IndexType   IndexType;
 990 
 991     CHECKPOINT_ENTER;
 992     IndexType info = trsyl(transA, transB, iSign, A, B, C, scale);
 993     CHECKPOINT_LEAVE;
 994 
 995     return info;
 996 }
 997 
 998 } } // namespace lapack, flens
 999 
1000 #endif // FLENS_LAPACK_EIG_TRSYL_TCC