1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
36 $ LDC, SCALE, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 *
43 */
44
45 #ifndef FLENS_LAPACK_EIG_TRSYL_TCC
46 #define FLENS_LAPACK_EIG_TRSYL_TCC 1
47
48 #include <cmath>
49
50 namespace flens { namespace lapack {
51
52
53 //== generic lapack implementation =============================================
54 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
55 typename GeMatrix<MC>::IndexType
56 trsyl_generic(Transpose transA,
57 Transpose transB,
58 ISGN iSign,
59 const GeMatrix<MA> &A,
60 const GeMatrix<MB> &B,
61 GeMatrix<MC> &C,
62 SCALE &scale)
63 {
64 using std::abs;
65
66 typedef typename GeMatrix<MC>::ElementType T;
67 typedef typename GeMatrix<MC>::IndexType IndexType;
68
69 const Underscore<IndexType> _;
70 const IndexType m = C.numRows();
71 const IndexType n = C.numCols();
72
73 const T Zero(0), One(1);
74 //
75 // .. Local Arrays ..
76 //
77 T _vecData[4], _xData[4];
78 GeMatrixView<T>
79 VEC = typename GeMatrixView<T>::Engine(2, 2, _vecData, 2),
80 X = typename GeMatrixView<T>::Engine(2, 2, _xData, 2);
81
82 //
83 // Decode and Test input parameters
84 //
85 const bool noTransA = (transA==NoTrans);
86 const bool noTransB = (transB==NoTrans);
87
88 IndexType info = 0;
89 //
90 // Quick return if possible
91 //
92 scale = One;
93 if (m==0 || n==0) {
94 return info;
95 }
96 //
97 // Set constants to control overflow
98 //
99 const T eps = lamch<T>(Precision);
100 T smallNum = lamch<T>(SafeMin);
101 T bigNum = One / smallNum;
102 labad(smallNum, bigNum);
103 smallNum = smallNum*T(m*n) / eps;
104 bigNum = One / smallNum;
105
106 const T sMin = max(smallNum,
107 eps*lan(MaximumNorm, A),
108 eps*lan(MaximumNorm, B));
109
110 const T sign = iSign;
111
112 IndexType lNext, l1, l2, kNext, k1, k2;
113
114 T sumL, sumR, scaleC, A11, DA11, DB, xNorm;
115
116 if (noTransA && noTransB) {
117 //
118 // Solve A*X + ISGN*X*B = scale*C.
119 //
120 // The (K,L)th block of X is determined starting from
121 // bottom-left corner column by column by
122 //
123 // A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
124 //
125 // Where
126 // M L-1
127 // R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
128 // I=K+1 J=1
129 //
130 // Start column loop (index = L)
131 // L1 (L2) : column index of the first (first) row of X(K,L).
132 //
133 lNext = 1;
134 for (IndexType l=1; l<=n; ++l) {
135 if (l<lNext) {
136 continue;
137 }
138 if (l==n) {
139 l1 = l2 = l;
140 } else {
141 if (B(l+1,l)!=Zero) {
142 l1 = l;
143 l2 = l + 1;
144 lNext = l + 2;
145 } else {
146 l1 = l2 = l;
147 lNext = l + 1;
148 }
149 }
150 //
151 // Start row loop (index = K)
152 // K1 (K2): row index of the first (last) row of X(K,L).
153 //
154 kNext = m;
155 for (IndexType k=m; k>=1; --k) {
156 if (k>kNext) {
157 continue;
158 }
159 if (k==1) {
160 k1 = k2 = k;
161 } else {
162 if (A(k,k-1)!=Zero) {
163 k1 = k - 1;
164 k2 = k;
165 kNext = k - 2;
166 } else {
167 k1 = k2 = k;
168 kNext = k - 1;
169 }
170 }
171
172 if (l1==l2 && k1==k2) {
173
174 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
175 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
176 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
177 scaleC = One;
178
179 A11 = A(k1,k1) + sign*B(l1,l1);
180 DA11 = abs(A11);
181 if (DA11<=sMin) {
182 A11 = sMin;
183 DA11 = sMin;
184 info = 1;
185 }
186 DB = abs(VEC(1,1));
187 if (DA11<One && DB>One) {
188 if (DB>bigNum*DA11 ) {
189 scaleC = One / DB;
190 }
191 }
192 X(1,1) = (VEC(1,1)*scaleC) / A11;
193
194 if (scaleC!=One) {
195 C *= scaleC;
196 scale *= scaleC;
197 }
198 C(k1,l1) = X(1,1);
199
200 } else if (l1==l2 && k1!=k2) {
201
202 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
203 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
204 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
205
206 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
207 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
208 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
209
210 IndexType iErr = laln2(false, 1, sMin, One,
211 A(_(k1,k1+1),_(k1,k1+1)),
212 One, One, VEC(_,_(1,1)),
213 -sign*B(l1,l1), Zero,
214 X(_,_(1,1)), scaleC, xNorm);
215 if (iErr!=0) {
216 info = 1;
217 }
218
219 if (scaleC!=One) {
220 C *= scaleC;
221 scale *= scaleC;
222 }
223 C(k1,l1) = X(1,1);
224 C(k2,l1) = X(2,1);
225
226 } else if (l1!=l2 && k1==k2) {
227
228 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
229 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
230 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
231
232 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
233 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
234 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
235
236 IndexType iErr = laln2(true, 1, sMin, One,
237 B(_(l1,l1+1),_(l1,l1+1)),
238 One, One, VEC(_,_(1,1)),
239 -sign*A(k1,k1), Zero,
240 X(_,_(1,1)), scaleC, xNorm);
241 if (iErr!=0) {
242 info = 1;
243 }
244
245 if (scaleC!=One) {
246 C *= scaleC;
247 scale *= scaleC;
248 }
249
250 C(k1,l1) = X(1,1);
251 C(k1,l2) = X(2,1);
252
253 } else if (l1!=l2 && k1!=k2) {
254
255 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
256 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
257 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
258
259 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
260 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
261 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
262
263 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
264 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
265 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
266
267 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
268 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
269 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
270
271 IndexType iErr = lasy2(false, false, iSign,
272 A(_(k1,k1+1),_(k1,k1+1)),
273 B(_(l1,l1+1),_(l1,l1+1)),
274 VEC, scaleC, X, xNorm);
275
276 if (iErr!=0) {
277 info = 1;
278 }
279
280 if (scaleC!=One) {
281 C *= scaleC;
282 scale *= scaleC;
283 }
284 C(k1,l1) = X(1,1);
285 C(k1,l2) = X(1,2);
286 C(k2,l1) = X(2,1);
287 C(k2,l2) = X(2,2);
288 }
289
290 }
291
292 }
293
294 } else if(!noTransA && noTransB) {
295 //
296 // Solve A**T *X + ISGN*X*B = scale*C.
297 //
298 // The (K,L)th block of X is determined starting from
299 // upper-left corner column by column by
300 //
301 // A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
302 //
303 // Where
304 // K-1 L-1
305 // R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
306 // I=1 J=1
307 //
308 // Start column loop (index = L)
309 // L1 (L2): column index of the first (last) row of X(K,L)
310 //
311 lNext = 1;
312 for (IndexType l=1; l<=n; ++l) {
313 if (l<lNext) {
314 continue;
315 }
316 if (l==n){
317 l1 = l2 = l;
318 } else {
319 if (B(l+1,l)!=Zero) {
320 l1 = l;
321 l2 = l + 1;
322 lNext = l + 2;
323 } else {
324 l1 = l2 = l;
325 lNext = l + 1;
326 }
327 }
328 //
329 // Start row loop (index = K)
330 // K1 (K2): row index of the first (last) row of X(K,L)
331 //
332 kNext = 1;
333 for (IndexType k=1; k<=m; ++k) {
334 if (k<kNext) {
335 continue;
336 }
337 if (k==m) {
338 k1 = k2 = k;
339 } else {
340 if (A(k+1,k)!=Zero) {
341 k1 = k;
342 k2 = k + 1;
343 kNext = k + 2;
344 } else {
345 k1 = k2 = k;
346 kNext = k + 1;
347 }
348 }
349
350 if (l1==l2 && k1==k2) {
351 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
352 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
353 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
354 scaleC = One;
355
356 A11 = A(k1,k1) + sign*B(l1,l1);
357 DA11 = abs(A11);
358 if (DA11<=sMin) {
359 A11 = sMin;
360 DA11 = sMin;
361 info = 1;
362 }
363 DB = abs(VEC(1,1));
364 if (DA11<One && DB>One) {
365 if (DB>bigNum*DA11) {
366 scaleC = One/DB;
367 }
368 }
369 X(1,1) = (VEC(1,1)*scaleC) / A11;
370
371 if (scaleC!=One) {
372 C *= scaleC;
373 scale *= scaleC;
374 }
375 C(k1,l1) = X(1,1);
376
377 } else if (l1==l2 && k1!=k2) {
378
379 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
380 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
381 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
382
383 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
384 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
385 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
386
387 IndexType iErr = laln2(true, 1, sMin, One,
388 A(_(k1,k1+1),_(k1,k1+1)),
389 One, One, VEC(_,_(1,1)),
390 -sign*B(l1,l1), Zero,
391 X(_,_(1,1)), scaleC, xNorm);
392 if (iErr!=0) {
393 info = 1;
394 }
395
396 if (scaleC!=One) {
397 C *= scaleC;
398 scale *= scaleC;
399 }
400 C(k1,l1) = X(1,1);
401 C(k2,l1) = X(2,1);
402
403 } else if (l1!=l2 && k1==k2) {
404
405 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
406 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
407 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
408
409 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
410 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
411 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
412
413 IndexType iErr = laln2(true, 1, sMin, One,
414 B(_(l1,l1+1),_(l1,l1+1)),
415 One, One, VEC(_,_(1,1)),
416 -sign*A(k1,k1), Zero,
417 X(_,_(1,1)), scaleC, xNorm);
418 if (iErr!=0) {
419 info = 1;
420 }
421
422 if (scaleC!=One) {
423 C *= scaleC;
424 scale *= scaleC;
425 }
426 C(k1,l1) = X(1,1);
427 C(k1,l2) = X(2,1);
428
429 } else if (l1!=l2 && k1!=k2) {
430
431 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
432 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
433 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
434
435 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
436 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
437 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
438
439 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
440 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
441 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
442
443 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
444 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
445 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
446
447 IndexType iErr = lasy2(true, false, iSign,
448 A(_(k1,k1+1),_(k1,k1+1)),
449 B(_(l1,l1+1),_(l1,l1+1)),
450 VEC, scaleC, X, xNorm);
451
452 if (iErr!=0) {
453 info = 1;
454 }
455
456 if (scaleC!=One) {
457 C *= scaleC;
458 scale *= scaleC;
459 }
460 C(k1,l1) = X(1,1);
461 C(k1,l2) = X(1,2);
462 C(k2,l1) = X(2,1);
463 C(k2,l2) = X(2,2);
464 }
465
466 }
467 }
468
469 } else if (!noTransA && !noTransB) {
470 //
471 // Solve A**T*X + ISGN*X*B**T = scale*C.
472 //
473 // The (K,L)th block of X is determined starting from
474 // top-right corner column by column by
475 //
476 // A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
477 //
478 // Where
479 // K-1 N
480 // R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
481 // I=1 J=L+1
482 //
483 // Start column loop (index = L)
484 // L1 (L2): column index of the first (last) row of X(K,L)
485 //
486 lNext = n;
487 for (IndexType l=n; l>=1; --l) {
488 if (l>lNext) {
489 continue;
490 }
491 if (l==1) {
492 l1 = l2 = l;
493 } else {
494 if (B(l,l-1)!=Zero) {
495 l1 = l-1;
496 l2 = l;
497 lNext = l - 2;
498 } else {
499 l1 = l2 = l;
500 lNext = l - 1;
501 }
502 }
503 //
504 // Start row loop (index = K)
505 // K1 (K2): row index of the first (last) row of X(K,L)
506 //
507 kNext = 1;
508 for (IndexType k=1; k<=m; ++k) {
509 if (k<kNext) {
510 continue;
511 }
512 if (k==m) {
513 k1 = k2 = k;
514 } else {
515 if (A(k+1,k)!=Zero) {
516 k1 = k;
517 k2 = k + 1;
518 kNext = k + 2;
519 } else {
520 k1 = k2 = k;
521 kNext = k + 1;
522 }
523 }
524
525 if (l1==l2 && k1==k2) {
526 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
527 sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
528 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
529 scaleC = One;
530
531 A11 = A(k1,k1) + sign*B(l1,l1);
532 DA11 = abs(A11);
533 if (DA11<=sMin) {
534 A11 = sMin;
535 DA11 = sMin;
536 info = 1;
537 }
538 DB = abs(VEC(1,1));
539 if (DA11<One && DB>One) {
540 if (DB>bigNum*DA11) {
541 scaleC = One / DB;
542 }
543 }
544 X(1,1) = (VEC(1,1)*scaleC) / A11;
545
546 if (scaleC!=One) {
547 C *= scaleC;
548 scale *= scaleC;
549 }
550 C(k1,l1) = X(1,1);
551
552 } else if (l1==l2 && k1!=k2) {
553
554 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
555 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
556 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
557
558 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
559 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
560 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
561
562 IndexType iErr = laln2(true, 1, sMin, One,
563 A(_(k1,k1+1),_(k1,k1+1)),
564 One, One, VEC(_,_(1,1)),
565 -sign*B(l1,l1), Zero,
566 X(_,_(1,1)), scaleC, xNorm);
567
568 if (iErr!=0) {
569 info = 1;
570 }
571
572 if (scaleC!=One) {
573 C *= scaleC;
574 scale *= scaleC;
575 }
576 C(k1,l1) = X(1,1);
577 C(k2,l1) = X(2,1);
578
579 } else if (l1!=l2 && k1==k2) {
580
581 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
582 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
583 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
584
585 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
586 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
587 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
588
589 IndexType iErr = laln2(false, 1, sMin, One,
590 B(_(l1,l1+1),_(l1,l1+1)),
591 One, One, VEC(_,_(1,1)),
592 -sign*A(k1,k1), Zero,
593 X(_,_(1,1)), scaleC, xNorm);
594
595 if (iErr!=0) {
596 info = 1;
597 }
598
599 if (scaleC!=One) {
600 C *= scaleC;
601 scale *= scaleC;
602 }
603 C(k1,l1) = X(1,1);
604 C(k1,l2) = X(2,1);
605
606 } else if (l1!=l2 && k1!=k2) {
607
608 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
609 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
610 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
611
612 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
613 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
614 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
615
616 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
617 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
618 VEC(2,1) = C(k2,l1) - ( sumL+sign*sumR);
619
620 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
621 sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
622 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
623
624 IndexType iErr = lasy2(true, true, iSign,
625 A(_(k1,k1+1),_(k1,k1+1)),
626 B(_(l1,l1+1),_(l1,l1+1)),
627 VEC, scaleC, X, xNorm);
628
629 if (iErr!=0) {
630 info = 1;
631 }
632
633 if (scaleC!=One) {
634 C *= scaleC;
635 scale *= scaleC;
636 }
637 C(k1,l1) = X(1,1);
638 C(k1,l2) = X(1,2);
639 C(k2,l1) = X(2,1);
640 C(k2,l2) = X(2,2);
641 }
642
643 }
644 }
645
646 } else if (noTransA && !noTransB) {
647 //
648 // Solve A*X + ISGN*X*B**T = scale*C.
649 //
650 // The (K,L)th block of X is determined starting from
651 // bottom-right corner column by column by
652 //
653 // A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
654 //
655 // Where
656 // M N
657 // R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
658 // I=K+1 J=L+1
659 //
660 // Start column loop (index = L)
661 // L1 (L2): column index of the first (last) row of X(K,L)
662 //
663 lNext = n;
664 for (IndexType l=n; l>=1; --l) {
665 if (l>lNext) {
666 continue;
667 }
668 if (l==1) {
669 l1 = l2 = l;
670 } else {
671 if (B(l,l-1)!=Zero) {
672 l1 = l - 1;
673 l2 = l;
674 lNext = l - 2;
675 } else {
676 l1 = l2 = l;
677 lNext = l - 1;
678 }
679 }
680 //
681 // Start row loop (index = K)
682 // K1 (K2): row index of the first (last) row of X(K,L)
683 //
684 kNext = m;
685 for (IndexType k=m; k>=1; --k) {
686 if (k>kNext) {
687 continue;
688 }
689 if (k==1) {
690 k1 = k2 = k;
691 } else {
692 if (A(k,k-1)!=Zero) {
693 k1 = k - 1;
694 k2 = k;
695 kNext = k - 2;
696 } else {
697 k1 = k2 = k;
698 kNext = k - 1;
699 }
700 }
701
702 if (l1==l2 && k1==k2) {
703 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
704 sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
705 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
706 scaleC = One;
707
708 A11 = A(k1,k1) + sign*B(l1,l1);
709 DA11 = abs(A11);
710 if (DA11<=sMin) {
711 A11 = sMin;
712 DA11 = sMin;
713 info = 1;
714 }
715 DB = abs(VEC(1,1));
716 if (DA11<One && DB>One) {
717 if (DB>bigNum*DA11) {
718 scaleC = One / DB;
719 }
720 }
721 X(1,1) = (VEC(1,1)*scaleC) / A11;
722
723 if (scaleC!=One) {
724 C *= scaleC;
725 scale *= scaleC;
726 }
727 C(k1,l1) = X(1,1);
728
729 } else if (l1==l2 && k1!=k2) {
730
731 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
732 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
733 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
734
735 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
736 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
737 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
738
739 IndexType iErr = laln2(false, 1, sMin, One,
740 A(_(k1,k1+1),_(k1,k1+1)),
741 One, One, VEC(_,_(1,1)),
742 -sign*B(l1,l1), Zero,
743 X(_,_(1,1)), scaleC, xNorm);
744
745 if (iErr!=0) {
746 info = 1;
747 }
748
749 if (scaleC!=One) {
750 C *= scaleC;
751 scale *= scaleC;
752 }
753 C(k1,l1) = X(1,1);
754 C(k2,l1) = X(2,1);
755
756 } else if (l1!=l2 && k1==k2) {
757
758 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
759 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
760 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
761
762 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
763 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
764 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
765
766 IndexType iErr = laln2(false, 1, sMin, One,
767 B(_(l1,l1+1),_(l1,l1+1)),
768 One, One, VEC(_,_(1,1)),
769 -sign*A(k1,k1), Zero,
770 X(_,_(1,1)), scaleC, xNorm);
771
772 if (iErr!=0) {
773 info = 1;
774 }
775
776 if (scaleC!=One) {
777 C *= scaleC;
778 scale *= scaleC;
779 }
780 C(k1,l1) = X(1,1);
781 C(k1,l2) = X(2,1);
782
783 } else if (l1!=l2 && k1!=k2) {
784
785 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
786 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
787 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
788
789 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
790 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
791 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
792
793 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
794 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
795 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
796
797 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
798 sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
799 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
800
801 IndexType iErr = lasy2(false, true, iSign,
802 A(_(k1,k1+1),_(k1,k1+1)),
803 B(_(l1,l1+1),_(l1,l1+1)),
804 VEC, scaleC, X, xNorm);
805
806 if (iErr!=0) {
807 info = 1;
808 }
809
810 if (scaleC!=One) {
811 C *= scaleC;
812 scale *= scaleC;
813 }
814 C(k1,l1) = X(1,1);
815 C(k1,l2) = X(1,2);
816 C(k2,l1) = X(2,1);
817 C(k2,l2) = X(2,2);
818 }
819
820 }
821 }
822
823 }
824 return info;
825 }
826
827 //== interface for native lapack ===============================================
828
829 #ifdef CHECK_CXXLAPACK
830
831 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
832 typename GeMatrix<MC>::IndexType
833 trsyl_native(Transpose transA,
834 Transpose transB,
835 ISGN iSign,
836 const GeMatrix<MA> &A,
837 const GeMatrix<MB> &B,
838 GeMatrix<MC> &C,
839 SCALE &scale)
840 {
841 typedef typename GeMatrix<MC>::ElementType ElementType;
842
843 const char TRANA = getF77LapackChar(transA);
844 const char TRANB = getF77LapackChar(transB);
845 const INTEGER _ISGN = iSign;
846 const INTEGER M = C.numRows();
847 const INTEGER N = C.numCols();
848 const INTEGER LDA = A.leadingDimension();
849 const INTEGER LDB = B.leadingDimension();
850 const INTEGER LDC = C.leadingDimension();
851 ElementType _SCALE = scale;
852 INTEGER INFO;
853
854 if (IsSame<ElementType,DOUBLE>::value) {
855 LAPACK_IMPL(dtrsyl)(&TRANA,
856 &TRANB,
857 &_ISGN,
858 &M,
859 &N,
860 A.data(),
861 &LDA,
862 B.data(),
863 &LDB,
864 C.data(),
865 &LDC,
866 &_SCALE,
867 &INFO);
868 } else {
869 ASSERT(0);
870 }
871 scale = _SCALE;
872 return INFO;
873 }
874
875 #endif // CHECK_CXXLAPACK
876
877 //== public interface ==========================================================
878 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
879 typename GeMatrix<MC>::IndexType
880 trsyl(Transpose transA,
881 Transpose transB,
882 ISGN iSign,
883 const GeMatrix<MA> &A,
884 const GeMatrix<MB> &B,
885 GeMatrix<MC> &C,
886 SCALE &scale)
887 {
888 LAPACK_DEBUG_OUT("trsyl");
889
890 typedef typename GeMatrix<MC>::IndexType IndexType;
891
892 //
893 // Test the input parameters
894 //
895 # ifndef NDEBUG
896 ASSERT(iSign==1 || iSign==-1);
897 ASSERT(A.firstRow()==1);
898 ASSERT(A.firstCol()==1);
899 ASSERT(A.numRows()==A.numCols());
900
901 ASSERT(B.firstRow()==1);
902 ASSERT(B.firstCol()==1);
903 ASSERT(B.numRows()==B.numCols());
904
905 const IndexType m = A.numRows();
906 const IndexType n = B.numRows();
907
908 ASSERT(C.firstRow()==1);
909 ASSERT(C.firstCol()==1);
910 ASSERT(C.numRows()==m);
911 ASSERT(C.numCols()==n);
912 # endif
913
914 # ifdef CHECK_CXXLAPACK
915 //
916 // Make copies of output arguments
917 //
918 const typename GeMatrix<MC>::NoView C_org = C;
919 SCALE scale_org = scale;
920 # endif
921
922 //
923 // Call implementation
924 //
925 IndexType info = trsyl_generic(transA, transB, iSign, A, B, C, scale);
926
927 # ifdef CHECK_CXXLAPACK
928 //
929 // Make copies of results computed by generic implementation
930 //
931 const typename GeMatrix<MC>::NoView C_generic = C;
932 SCALE scale_generic = scale;
933 //
934 // restore output arguments
935 //
936 C = C_org;
937 scale = scale_org;
938 //
939 // Compare generic results with results from the native implementation
940 //
941 IndexType _info = trsyl_native(transA, transB, iSign, A, B, C, scale);
942
943 bool failed = false;
944 if (! isIdentical(C_generic, C, "C_generic", "C")) {
945 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
946 std::cerr << "F77LAPACK: C = " << C << std::endl;
947 failed = true;
948 }
949 if (! isIdentical(scale_generic, scale, "scale_generic", "scale")) {
950 std::cerr << "CXXLAPACK: scale_generic = "
951 << scale_generic << std::endl;
952 std::cerr << "F77LAPACK: scale = "
953 << scale << std::endl;
954 failed = true;
955 }
956 if (! isIdentical(info, _info, "info", "_info")) {
957 std::cerr << "CXXLAPACK: info = " << info << std::endl;
958 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
959 failed = true;
960 }
961 if (failed) {
962 std::cerr << "transA = " << transA << std::endl;
963 std::cerr << "transB = " << transB << std::endl;
964 std::cerr << "iSign = " << iSign << std::endl;
965 std::cerr << "A = " << A << std::endl;
966 std::cerr << "B = " << B << std::endl;
967
968 std::cerr << "error in: trsyl.tcc" << std::endl;
969 ASSERT(0);
970 } else {
971 // std::cerr << "passed: trsyl.tcc" << std::endl;
972 }
973 # endif
974
975 return info;
976 }
977
978 //-- forwarding ----------------------------------------------------------------
979 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
980 typename MC::IndexType
981 trsyl(Transpose transA,
982 Transpose transB,
983 ISGN iSign,
984 const GeMatrix<MA> &A,
985 const GeMatrix<MB> &B,
986 MC &&C,
987 SCALE &&scale)
988 {
989 typedef typename GeMatrix<MC>::IndexType IndexType;
990
991 CHECKPOINT_ENTER;
992 IndexType info = trsyl(transA, transB, iSign, A, B, C, scale);
993 CHECKPOINT_LEAVE;
994
995 return info;
996 }
997
998 } } // namespace lapack, flens
999
1000 #endif // FLENS_LAPACK_EIG_TRSYL_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
36 $ LDC, SCALE, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 *
43 */
44
45 #ifndef FLENS_LAPACK_EIG_TRSYL_TCC
46 #define FLENS_LAPACK_EIG_TRSYL_TCC 1
47
48 #include <cmath>
49
50 namespace flens { namespace lapack {
51
52
53 //== generic lapack implementation =============================================
54 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
55 typename GeMatrix<MC>::IndexType
56 trsyl_generic(Transpose transA,
57 Transpose transB,
58 ISGN iSign,
59 const GeMatrix<MA> &A,
60 const GeMatrix<MB> &B,
61 GeMatrix<MC> &C,
62 SCALE &scale)
63 {
64 using std::abs;
65
66 typedef typename GeMatrix<MC>::ElementType T;
67 typedef typename GeMatrix<MC>::IndexType IndexType;
68
69 const Underscore<IndexType> _;
70 const IndexType m = C.numRows();
71 const IndexType n = C.numCols();
72
73 const T Zero(0), One(1);
74 //
75 // .. Local Arrays ..
76 //
77 T _vecData[4], _xData[4];
78 GeMatrixView<T>
79 VEC = typename GeMatrixView<T>::Engine(2, 2, _vecData, 2),
80 X = typename GeMatrixView<T>::Engine(2, 2, _xData, 2);
81
82 //
83 // Decode and Test input parameters
84 //
85 const bool noTransA = (transA==NoTrans);
86 const bool noTransB = (transB==NoTrans);
87
88 IndexType info = 0;
89 //
90 // Quick return if possible
91 //
92 scale = One;
93 if (m==0 || n==0) {
94 return info;
95 }
96 //
97 // Set constants to control overflow
98 //
99 const T eps = lamch<T>(Precision);
100 T smallNum = lamch<T>(SafeMin);
101 T bigNum = One / smallNum;
102 labad(smallNum, bigNum);
103 smallNum = smallNum*T(m*n) / eps;
104 bigNum = One / smallNum;
105
106 const T sMin = max(smallNum,
107 eps*lan(MaximumNorm, A),
108 eps*lan(MaximumNorm, B));
109
110 const T sign = iSign;
111
112 IndexType lNext, l1, l2, kNext, k1, k2;
113
114 T sumL, sumR, scaleC, A11, DA11, DB, xNorm;
115
116 if (noTransA && noTransB) {
117 //
118 // Solve A*X + ISGN*X*B = scale*C.
119 //
120 // The (K,L)th block of X is determined starting from
121 // bottom-left corner column by column by
122 //
123 // A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
124 //
125 // Where
126 // M L-1
127 // R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
128 // I=K+1 J=1
129 //
130 // Start column loop (index = L)
131 // L1 (L2) : column index of the first (first) row of X(K,L).
132 //
133 lNext = 1;
134 for (IndexType l=1; l<=n; ++l) {
135 if (l<lNext) {
136 continue;
137 }
138 if (l==n) {
139 l1 = l2 = l;
140 } else {
141 if (B(l+1,l)!=Zero) {
142 l1 = l;
143 l2 = l + 1;
144 lNext = l + 2;
145 } else {
146 l1 = l2 = l;
147 lNext = l + 1;
148 }
149 }
150 //
151 // Start row loop (index = K)
152 // K1 (K2): row index of the first (last) row of X(K,L).
153 //
154 kNext = m;
155 for (IndexType k=m; k>=1; --k) {
156 if (k>kNext) {
157 continue;
158 }
159 if (k==1) {
160 k1 = k2 = k;
161 } else {
162 if (A(k,k-1)!=Zero) {
163 k1 = k - 1;
164 k2 = k;
165 kNext = k - 2;
166 } else {
167 k1 = k2 = k;
168 kNext = k - 1;
169 }
170 }
171
172 if (l1==l2 && k1==k2) {
173
174 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
175 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
176 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
177 scaleC = One;
178
179 A11 = A(k1,k1) + sign*B(l1,l1);
180 DA11 = abs(A11);
181 if (DA11<=sMin) {
182 A11 = sMin;
183 DA11 = sMin;
184 info = 1;
185 }
186 DB = abs(VEC(1,1));
187 if (DA11<One && DB>One) {
188 if (DB>bigNum*DA11 ) {
189 scaleC = One / DB;
190 }
191 }
192 X(1,1) = (VEC(1,1)*scaleC) / A11;
193
194 if (scaleC!=One) {
195 C *= scaleC;
196 scale *= scaleC;
197 }
198 C(k1,l1) = X(1,1);
199
200 } else if (l1==l2 && k1!=k2) {
201
202 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
203 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
204 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
205
206 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
207 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
208 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
209
210 IndexType iErr = laln2(false, 1, sMin, One,
211 A(_(k1,k1+1),_(k1,k1+1)),
212 One, One, VEC(_,_(1,1)),
213 -sign*B(l1,l1), Zero,
214 X(_,_(1,1)), scaleC, xNorm);
215 if (iErr!=0) {
216 info = 1;
217 }
218
219 if (scaleC!=One) {
220 C *= scaleC;
221 scale *= scaleC;
222 }
223 C(k1,l1) = X(1,1);
224 C(k2,l1) = X(2,1);
225
226 } else if (l1!=l2 && k1==k2) {
227
228 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
229 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
230 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
231
232 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
233 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
234 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
235
236 IndexType iErr = laln2(true, 1, sMin, One,
237 B(_(l1,l1+1),_(l1,l1+1)),
238 One, One, VEC(_,_(1,1)),
239 -sign*A(k1,k1), Zero,
240 X(_,_(1,1)), scaleC, xNorm);
241 if (iErr!=0) {
242 info = 1;
243 }
244
245 if (scaleC!=One) {
246 C *= scaleC;
247 scale *= scaleC;
248 }
249
250 C(k1,l1) = X(1,1);
251 C(k1,l2) = X(2,1);
252
253 } else if (l1!=l2 && k1!=k2) {
254
255 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
256 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
257 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
258
259 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
260 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
261 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
262
263 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
264 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
265 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
266
267 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
268 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
269 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
270
271 IndexType iErr = lasy2(false, false, iSign,
272 A(_(k1,k1+1),_(k1,k1+1)),
273 B(_(l1,l1+1),_(l1,l1+1)),
274 VEC, scaleC, X, xNorm);
275
276 if (iErr!=0) {
277 info = 1;
278 }
279
280 if (scaleC!=One) {
281 C *= scaleC;
282 scale *= scaleC;
283 }
284 C(k1,l1) = X(1,1);
285 C(k1,l2) = X(1,2);
286 C(k2,l1) = X(2,1);
287 C(k2,l2) = X(2,2);
288 }
289
290 }
291
292 }
293
294 } else if(!noTransA && noTransB) {
295 //
296 // Solve A**T *X + ISGN*X*B = scale*C.
297 //
298 // The (K,L)th block of X is determined starting from
299 // upper-left corner column by column by
300 //
301 // A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
302 //
303 // Where
304 // K-1 L-1
305 // R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
306 // I=1 J=1
307 //
308 // Start column loop (index = L)
309 // L1 (L2): column index of the first (last) row of X(K,L)
310 //
311 lNext = 1;
312 for (IndexType l=1; l<=n; ++l) {
313 if (l<lNext) {
314 continue;
315 }
316 if (l==n){
317 l1 = l2 = l;
318 } else {
319 if (B(l+1,l)!=Zero) {
320 l1 = l;
321 l2 = l + 1;
322 lNext = l + 2;
323 } else {
324 l1 = l2 = l;
325 lNext = l + 1;
326 }
327 }
328 //
329 // Start row loop (index = K)
330 // K1 (K2): row index of the first (last) row of X(K,L)
331 //
332 kNext = 1;
333 for (IndexType k=1; k<=m; ++k) {
334 if (k<kNext) {
335 continue;
336 }
337 if (k==m) {
338 k1 = k2 = k;
339 } else {
340 if (A(k+1,k)!=Zero) {
341 k1 = k;
342 k2 = k + 1;
343 kNext = k + 2;
344 } else {
345 k1 = k2 = k;
346 kNext = k + 1;
347 }
348 }
349
350 if (l1==l2 && k1==k2) {
351 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
352 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
353 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
354 scaleC = One;
355
356 A11 = A(k1,k1) + sign*B(l1,l1);
357 DA11 = abs(A11);
358 if (DA11<=sMin) {
359 A11 = sMin;
360 DA11 = sMin;
361 info = 1;
362 }
363 DB = abs(VEC(1,1));
364 if (DA11<One && DB>One) {
365 if (DB>bigNum*DA11) {
366 scaleC = One/DB;
367 }
368 }
369 X(1,1) = (VEC(1,1)*scaleC) / A11;
370
371 if (scaleC!=One) {
372 C *= scaleC;
373 scale *= scaleC;
374 }
375 C(k1,l1) = X(1,1);
376
377 } else if (l1==l2 && k1!=k2) {
378
379 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
380 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
381 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
382
383 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
384 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
385 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
386
387 IndexType iErr = laln2(true, 1, sMin, One,
388 A(_(k1,k1+1),_(k1,k1+1)),
389 One, One, VEC(_,_(1,1)),
390 -sign*B(l1,l1), Zero,
391 X(_,_(1,1)), scaleC, xNorm);
392 if (iErr!=0) {
393 info = 1;
394 }
395
396 if (scaleC!=One) {
397 C *= scaleC;
398 scale *= scaleC;
399 }
400 C(k1,l1) = X(1,1);
401 C(k2,l1) = X(2,1);
402
403 } else if (l1!=l2 && k1==k2) {
404
405 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
406 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
407 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
408
409 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
410 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
411 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
412
413 IndexType iErr = laln2(true, 1, sMin, One,
414 B(_(l1,l1+1),_(l1,l1+1)),
415 One, One, VEC(_,_(1,1)),
416 -sign*A(k1,k1), Zero,
417 X(_,_(1,1)), scaleC, xNorm);
418 if (iErr!=0) {
419 info = 1;
420 }
421
422 if (scaleC!=One) {
423 C *= scaleC;
424 scale *= scaleC;
425 }
426 C(k1,l1) = X(1,1);
427 C(k1,l2) = X(2,1);
428
429 } else if (l1!=l2 && k1!=k2) {
430
431 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
432 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l1);
433 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
434
435 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
436 sumR = C(k1,_(1,l1-1)) * B(_(1,l1-1),l2);
437 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
438
439 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
440 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l1);
441 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
442
443 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
444 sumR = C(k2,_(1,l1-1)) * B(_(1,l1-1),l2);
445 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
446
447 IndexType iErr = lasy2(true, false, iSign,
448 A(_(k1,k1+1),_(k1,k1+1)),
449 B(_(l1,l1+1),_(l1,l1+1)),
450 VEC, scaleC, X, xNorm);
451
452 if (iErr!=0) {
453 info = 1;
454 }
455
456 if (scaleC!=One) {
457 C *= scaleC;
458 scale *= scaleC;
459 }
460 C(k1,l1) = X(1,1);
461 C(k1,l2) = X(1,2);
462 C(k2,l1) = X(2,1);
463 C(k2,l2) = X(2,2);
464 }
465
466 }
467 }
468
469 } else if (!noTransA && !noTransB) {
470 //
471 // Solve A**T*X + ISGN*X*B**T = scale*C.
472 //
473 // The (K,L)th block of X is determined starting from
474 // top-right corner column by column by
475 //
476 // A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
477 //
478 // Where
479 // K-1 N
480 // R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
481 // I=1 J=L+1
482 //
483 // Start column loop (index = L)
484 // L1 (L2): column index of the first (last) row of X(K,L)
485 //
486 lNext = n;
487 for (IndexType l=n; l>=1; --l) {
488 if (l>lNext) {
489 continue;
490 }
491 if (l==1) {
492 l1 = l2 = l;
493 } else {
494 if (B(l,l-1)!=Zero) {
495 l1 = l-1;
496 l2 = l;
497 lNext = l - 2;
498 } else {
499 l1 = l2 = l;
500 lNext = l - 1;
501 }
502 }
503 //
504 // Start row loop (index = K)
505 // K1 (K2): row index of the first (last) row of X(K,L)
506 //
507 kNext = 1;
508 for (IndexType k=1; k<=m; ++k) {
509 if (k<kNext) {
510 continue;
511 }
512 if (k==m) {
513 k1 = k2 = k;
514 } else {
515 if (A(k+1,k)!=Zero) {
516 k1 = k;
517 k2 = k + 1;
518 kNext = k + 2;
519 } else {
520 k1 = k2 = k;
521 kNext = k + 1;
522 }
523 }
524
525 if (l1==l2 && k1==k2) {
526 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
527 sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
528 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
529 scaleC = One;
530
531 A11 = A(k1,k1) + sign*B(l1,l1);
532 DA11 = abs(A11);
533 if (DA11<=sMin) {
534 A11 = sMin;
535 DA11 = sMin;
536 info = 1;
537 }
538 DB = abs(VEC(1,1));
539 if (DA11<One && DB>One) {
540 if (DB>bigNum*DA11) {
541 scaleC = One / DB;
542 }
543 }
544 X(1,1) = (VEC(1,1)*scaleC) / A11;
545
546 if (scaleC!=One) {
547 C *= scaleC;
548 scale *= scaleC;
549 }
550 C(k1,l1) = X(1,1);
551
552 } else if (l1==l2 && k1!=k2) {
553
554 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
555 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
556 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
557
558 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
559 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
560 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
561
562 IndexType iErr = laln2(true, 1, sMin, One,
563 A(_(k1,k1+1),_(k1,k1+1)),
564 One, One, VEC(_,_(1,1)),
565 -sign*B(l1,l1), Zero,
566 X(_,_(1,1)), scaleC, xNorm);
567
568 if (iErr!=0) {
569 info = 1;
570 }
571
572 if (scaleC!=One) {
573 C *= scaleC;
574 scale *= scaleC;
575 }
576 C(k1,l1) = X(1,1);
577 C(k2,l1) = X(2,1);
578
579 } else if (l1!=l2 && k1==k2) {
580
581 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
582 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
583 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
584
585 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
586 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
587 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
588
589 IndexType iErr = laln2(false, 1, sMin, One,
590 B(_(l1,l1+1),_(l1,l1+1)),
591 One, One, VEC(_,_(1,1)),
592 -sign*A(k1,k1), Zero,
593 X(_,_(1,1)), scaleC, xNorm);
594
595 if (iErr!=0) {
596 info = 1;
597 }
598
599 if (scaleC!=One) {
600 C *= scaleC;
601 scale *= scaleC;
602 }
603 C(k1,l1) = X(1,1);
604 C(k1,l2) = X(2,1);
605
606 } else if (l1!=l2 && k1!=k2) {
607
608 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l1);
609 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
610 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
611
612 sumL = A(_(1,k1-1),k1) * C(_(1,k1-1),l2);
613 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
614 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
615
616 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l1);
617 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
618 VEC(2,1) = C(k2,l1) - ( sumL+sign*sumR);
619
620 sumL = A(_(1,k1-1),k2) * C(_(1,k1-1),l2);
621 sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
622 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
623
624 IndexType iErr = lasy2(true, true, iSign,
625 A(_(k1,k1+1),_(k1,k1+1)),
626 B(_(l1,l1+1),_(l1,l1+1)),
627 VEC, scaleC, X, xNorm);
628
629 if (iErr!=0) {
630 info = 1;
631 }
632
633 if (scaleC!=One) {
634 C *= scaleC;
635 scale *= scaleC;
636 }
637 C(k1,l1) = X(1,1);
638 C(k1,l2) = X(1,2);
639 C(k2,l1) = X(2,1);
640 C(k2,l2) = X(2,2);
641 }
642
643 }
644 }
645
646 } else if (noTransA && !noTransB) {
647 //
648 // Solve A*X + ISGN*X*B**T = scale*C.
649 //
650 // The (K,L)th block of X is determined starting from
651 // bottom-right corner column by column by
652 //
653 // A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
654 //
655 // Where
656 // M N
657 // R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
658 // I=K+1 J=L+1
659 //
660 // Start column loop (index = L)
661 // L1 (L2): column index of the first (last) row of X(K,L)
662 //
663 lNext = n;
664 for (IndexType l=n; l>=1; --l) {
665 if (l>lNext) {
666 continue;
667 }
668 if (l==1) {
669 l1 = l2 = l;
670 } else {
671 if (B(l,l-1)!=Zero) {
672 l1 = l - 1;
673 l2 = l;
674 lNext = l - 2;
675 } else {
676 l1 = l2 = l;
677 lNext = l - 1;
678 }
679 }
680 //
681 // Start row loop (index = K)
682 // K1 (K2): row index of the first (last) row of X(K,L)
683 //
684 kNext = m;
685 for (IndexType k=m; k>=1; --k) {
686 if (k>kNext) {
687 continue;
688 }
689 if (k==1) {
690 k1 = k2 = k;
691 } else {
692 if (A(k,k-1)!=Zero) {
693 k1 = k - 1;
694 k2 = k;
695 kNext = k - 2;
696 } else {
697 k1 = k2 = k;
698 kNext = k - 1;
699 }
700 }
701
702 if (l1==l2 && k1==k2) {
703 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
704 sumR = C(k1,_(l1+1,n)) * B(l1,_(l1+1,n));
705 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
706 scaleC = One;
707
708 A11 = A(k1,k1) + sign*B(l1,l1);
709 DA11 = abs(A11);
710 if (DA11<=sMin) {
711 A11 = sMin;
712 DA11 = sMin;
713 info = 1;
714 }
715 DB = abs(VEC(1,1));
716 if (DA11<One && DB>One) {
717 if (DB>bigNum*DA11) {
718 scaleC = One / DB;
719 }
720 }
721 X(1,1) = (VEC(1,1)*scaleC) / A11;
722
723 if (scaleC!=One) {
724 C *= scaleC;
725 scale *= scaleC;
726 }
727 C(k1,l1) = X(1,1);
728
729 } else if (l1==l2 && k1!=k2) {
730
731 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
732 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
733 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
734
735 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
736 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
737 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
738
739 IndexType iErr = laln2(false, 1, sMin, One,
740 A(_(k1,k1+1),_(k1,k1+1)),
741 One, One, VEC(_,_(1,1)),
742 -sign*B(l1,l1), Zero,
743 X(_,_(1,1)), scaleC, xNorm);
744
745 if (iErr!=0) {
746 info = 1;
747 }
748
749 if (scaleC!=One) {
750 C *= scaleC;
751 scale *= scaleC;
752 }
753 C(k1,l1) = X(1,1);
754 C(k2,l1) = X(2,1);
755
756 } else if (l1!=l2 && k1==k2) {
757
758 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l1);
759 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
760 VEC(1,1) = sign*(C(k1,l1)-(sumL+sign*sumR));
761
762 sumL = A(k1,_(k1+1,m)) * C(_(k1+1,m),l2);
763 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
764 VEC(2,1) = sign*(C(k1,l2)-(sumL+sign*sumR));
765
766 IndexType iErr = laln2(false, 1, sMin, One,
767 B(_(l1,l1+1),_(l1,l1+1)),
768 One, One, VEC(_,_(1,1)),
769 -sign*A(k1,k1), Zero,
770 X(_,_(1,1)), scaleC, xNorm);
771
772 if (iErr!=0) {
773 info = 1;
774 }
775
776 if (scaleC!=One) {
777 C *= scaleC;
778 scale *= scaleC;
779 }
780 C(k1,l1) = X(1,1);
781 C(k1,l2) = X(2,1);
782
783 } else if (l1!=l2 && k1!=k2) {
784
785 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l1);
786 sumR = C(k1,_(l2+1,n)) * B(l1,_(l2+1,n));
787 VEC(1,1) = C(k1,l1) - (sumL+sign*sumR);
788
789 sumL = A(k1,_(k2+1,m)) * C(_(k2+1,m),l2);
790 sumR = C(k1,_(l2+1,n)) * B(l2,_(l2+1,n));
791 VEC(1,2) = C(k1,l2) - (sumL+sign*sumR);
792
793 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l1);
794 sumR = C(k2,_(l2+1,n)) * B(l1,_(l2+1,n));
795 VEC(2,1) = C(k2,l1) - (sumL+sign*sumR);
796
797 sumL = A(k2,_(k2+1,m)) * C(_(k2+1,m),l2);
798 sumR = C(k2,_(l2+1,n)) * B(l2,_(l2+1,n));
799 VEC(2,2) = C(k2,l2) - (sumL+sign*sumR);
800
801 IndexType iErr = lasy2(false, true, iSign,
802 A(_(k1,k1+1),_(k1,k1+1)),
803 B(_(l1,l1+1),_(l1,l1+1)),
804 VEC, scaleC, X, xNorm);
805
806 if (iErr!=0) {
807 info = 1;
808 }
809
810 if (scaleC!=One) {
811 C *= scaleC;
812 scale *= scaleC;
813 }
814 C(k1,l1) = X(1,1);
815 C(k1,l2) = X(1,2);
816 C(k2,l1) = X(2,1);
817 C(k2,l2) = X(2,2);
818 }
819
820 }
821 }
822
823 }
824 return info;
825 }
826
827 //== interface for native lapack ===============================================
828
829 #ifdef CHECK_CXXLAPACK
830
831 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
832 typename GeMatrix<MC>::IndexType
833 trsyl_native(Transpose transA,
834 Transpose transB,
835 ISGN iSign,
836 const GeMatrix<MA> &A,
837 const GeMatrix<MB> &B,
838 GeMatrix<MC> &C,
839 SCALE &scale)
840 {
841 typedef typename GeMatrix<MC>::ElementType ElementType;
842
843 const char TRANA = getF77LapackChar(transA);
844 const char TRANB = getF77LapackChar(transB);
845 const INTEGER _ISGN = iSign;
846 const INTEGER M = C.numRows();
847 const INTEGER N = C.numCols();
848 const INTEGER LDA = A.leadingDimension();
849 const INTEGER LDB = B.leadingDimension();
850 const INTEGER LDC = C.leadingDimension();
851 ElementType _SCALE = scale;
852 INTEGER INFO;
853
854 if (IsSame<ElementType,DOUBLE>::value) {
855 LAPACK_IMPL(dtrsyl)(&TRANA,
856 &TRANB,
857 &_ISGN,
858 &M,
859 &N,
860 A.data(),
861 &LDA,
862 B.data(),
863 &LDB,
864 C.data(),
865 &LDC,
866 &_SCALE,
867 &INFO);
868 } else {
869 ASSERT(0);
870 }
871 scale = _SCALE;
872 return INFO;
873 }
874
875 #endif // CHECK_CXXLAPACK
876
877 //== public interface ==========================================================
878 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
879 typename GeMatrix<MC>::IndexType
880 trsyl(Transpose transA,
881 Transpose transB,
882 ISGN iSign,
883 const GeMatrix<MA> &A,
884 const GeMatrix<MB> &B,
885 GeMatrix<MC> &C,
886 SCALE &scale)
887 {
888 LAPACK_DEBUG_OUT("trsyl");
889
890 typedef typename GeMatrix<MC>::IndexType IndexType;
891
892 //
893 // Test the input parameters
894 //
895 # ifndef NDEBUG
896 ASSERT(iSign==1 || iSign==-1);
897 ASSERT(A.firstRow()==1);
898 ASSERT(A.firstCol()==1);
899 ASSERT(A.numRows()==A.numCols());
900
901 ASSERT(B.firstRow()==1);
902 ASSERT(B.firstCol()==1);
903 ASSERT(B.numRows()==B.numCols());
904
905 const IndexType m = A.numRows();
906 const IndexType n = B.numRows();
907
908 ASSERT(C.firstRow()==1);
909 ASSERT(C.firstCol()==1);
910 ASSERT(C.numRows()==m);
911 ASSERT(C.numCols()==n);
912 # endif
913
914 # ifdef CHECK_CXXLAPACK
915 //
916 // Make copies of output arguments
917 //
918 const typename GeMatrix<MC>::NoView C_org = C;
919 SCALE scale_org = scale;
920 # endif
921
922 //
923 // Call implementation
924 //
925 IndexType info = trsyl_generic(transA, transB, iSign, A, B, C, scale);
926
927 # ifdef CHECK_CXXLAPACK
928 //
929 // Make copies of results computed by generic implementation
930 //
931 const typename GeMatrix<MC>::NoView C_generic = C;
932 SCALE scale_generic = scale;
933 //
934 // restore output arguments
935 //
936 C = C_org;
937 scale = scale_org;
938 //
939 // Compare generic results with results from the native implementation
940 //
941 IndexType _info = trsyl_native(transA, transB, iSign, A, B, C, scale);
942
943 bool failed = false;
944 if (! isIdentical(C_generic, C, "C_generic", "C")) {
945 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
946 std::cerr << "F77LAPACK: C = " << C << std::endl;
947 failed = true;
948 }
949 if (! isIdentical(scale_generic, scale, "scale_generic", "scale")) {
950 std::cerr << "CXXLAPACK: scale_generic = "
951 << scale_generic << std::endl;
952 std::cerr << "F77LAPACK: scale = "
953 << scale << std::endl;
954 failed = true;
955 }
956 if (! isIdentical(info, _info, "info", "_info")) {
957 std::cerr << "CXXLAPACK: info = " << info << std::endl;
958 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
959 failed = true;
960 }
961 if (failed) {
962 std::cerr << "transA = " << transA << std::endl;
963 std::cerr << "transB = " << transB << std::endl;
964 std::cerr << "iSign = " << iSign << std::endl;
965 std::cerr << "A = " << A << std::endl;
966 std::cerr << "B = " << B << std::endl;
967
968 std::cerr << "error in: trsyl.tcc" << std::endl;
969 ASSERT(0);
970 } else {
971 // std::cerr << "passed: trsyl.tcc" << std::endl;
972 }
973 # endif
974
975 return info;
976 }
977
978 //-- forwarding ----------------------------------------------------------------
979 template <typename ISGN, typename MA, typename MB, typename MC, typename SCALE>
980 typename MC::IndexType
981 trsyl(Transpose transA,
982 Transpose transB,
983 ISGN iSign,
984 const GeMatrix<MA> &A,
985 const GeMatrix<MB> &B,
986 MC &&C,
987 SCALE &&scale)
988 {
989 typedef typename GeMatrix<MC>::IndexType IndexType;
990
991 CHECKPOINT_ENTER;
992 IndexType info = trsyl(transA, transB, iSign, A, B, C, scale);
993 CHECKPOINT_LEAVE;
994
995 return info;
996 }
997
998 } } // namespace lapack, flens
999
1000 #endif // FLENS_LAPACK_EIG_TRSYL_TCC