1 /*
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV,
36 $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 *
40 * -- Contributed by Zlatko Drmac of the University of Zagreb and --
41 * -- Kresimir Veselic of the Fernuniversitaet Hagen --
42 * -- April 2011 --
43 *
44 * -- LAPACK is a software package provided by Univ. of Tennessee, --
45 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
46 *
47 * This routine is also part of SIGMA (version 1.23, October 23. 2008.)
48 * SIGMA is a library of algorithms for highly accurate algorithms for
49 * computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
50 * eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
51 *
52 */
53
54 #ifndef FLENS_LAPACK_SVD_SVJ1_TCC
55 #define FLENS_LAPACK_SVD_SVJ1_TCC 1
56
57 #include <flens/blas/blas.h>
58 #include <flens/lapack/lapack.h>
59
60 namespace flens { namespace lapack {
61
62 //== generic lapack implementation =============================================
63 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
64 typename GeMatrix<MA>::IndexType
65 svj1_generic(SVJ::JobV jobV,
66 typename GeMatrix<MA>::IndexType n1,
67 GeMatrix<MA> &A,
68 DenseVector<VD> &d,
69 DenseVector<VSVA> &sva,
70 GeMatrix<MV> &V,
71 const typename GeMatrix<MA>::ElementType &eps,
72 const typename GeMatrix<MA>::ElementType &safeMin,
73 const typename GeMatrix<MA>::ElementType &tol,
74 typename GeMatrix<MA>::IndexType nSweep,
75 DenseVector<VWORK> &work)
76 {
77 using std::abs;
78 using std::max;
79 using std::min;
80 using std::sqrt;
81 using std::swap;
82
83 typedef typename GeMatrix<MA>::ElementType ElementType;
84 typedef typename GeMatrix<MA>::IndexType IndexType;
85
86 const ElementType Zero(0), Half(0.5), One(1);
87
88 const Underscore<IndexType> _;
89
90 ElementType fastr_data[5];
91 DenseVectorView<ElementType>
92 fastr = typename DenseVectorView<ElementType>::Engine(5, fastr_data);
93
94 const IndexType m = A.numRows();
95 const IndexType n = A.numCols();
96
97 auto _work = work(_(1,m));
98
99 const bool applyV = (jobV==SVJ::ApplyV);
100 const bool rhsVec = (jobV==SVJ::ComputeV) || applyV;
101
102 const ElementType rootEps = sqrt(eps);
103 const ElementType rootSafeMin = sqrt(safeMin);
104 const ElementType small = safeMin / eps;
105 const ElementType big = One / safeMin;
106 const ElementType rootBig = One / rootSafeMin;
107 const ElementType bigTheta = One / rootEps;
108 const ElementType rootTol = sqrt(tol);
109
110 IndexType info = 0;
111 //
112 // .. Initialize the right singular vector matrix ..
113 //
114 // RSVEC = LSAME( JOBV, 'Y' )
115 //
116 const IndexType emptsw = n1*(n-n1);
117 fastr(1) = Zero;
118 //
119 // .. Row-cyclic pivot strategy with de Rijk's pivoting ..
120 //
121 const IndexType kbl = min(IndexType(8),n);
122 IndexType nblr = n1 / kbl;
123 if (nblr*kbl!=n1) {
124 ++nblr;
125 }
126 // .. the tiling is nblr-by-nblc [tiles]
127 IndexType nblc = (n-n1) / kbl;
128
129 if (nblc*kbl!=(n-n1)) {
130 ++nblc;
131 }
132 const IndexType blSkip = pow(kbl,2) + 1;
133 //[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
134 const IndexType rowSkip = min(IndexType(5),kbl);
135 //[TP] ROWSKIP is a tuning parameter.
136 IndexType swBand = 0;
137 //[TP] SWBAND is a tuning parameter. It is meaningful and effective
138 // if SGESVJ is used as a computational routine in the preconditioned
139 // Jacobi SVD algorithm SGESVJ.
140 //
141 //
142 // | * * * [x] [x] [x]|
143 // | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks.
144 // | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block.
145 // |[x] [x] [x] * * * |
146 // |[x] [x] [x] * * * |
147 // |[x] [x] [x] * * * |
148 //
149 //
150 ElementType aapp, aapp0, aaqq, aapq, aqoap, apoaq;
151 ElementType tmp, cs, sn, t, theta, thetaSign;
152
153 bool rotOk;
154 bool converged = false;
155
156 for (IndexType i=1; i<=nSweep; ++i) {
157 // .. go go go ...
158 //
159 ElementType max_aapq = Zero;
160 ElementType max_sinj = Zero;
161
162 IndexType iswRot = 0;
163 IndexType notRot = 0;
164 IndexType pSkipped = 0;
165
166 for (IndexType ibr=1; ibr<=nblr; ++ibr) {
167
168 IndexType igl = (ibr-1)*kbl + 1;
169 //
170 //
171 //........................................................
172 //... go to the off diagonal blocks
173
174 for (IndexType jbc=1; jbc<=nblc; ++jbc) {
175
176 IndexType jgl = n1 + (jbc-1)*kbl + 1;
177
178 // doing the block at ( ibr, jbc )
179
180 IndexType ijblsk = 0;
181 for (IndexType p=igl; p<=min(igl+kbl-1,n1); ++p) {
182
183 aapp = sva(p);
184
185 if (aapp>Zero) {
186
187 pSkipped = 0;
188
189 for (IndexType q=jgl; q<=min(jgl+kbl-1,n); ++q) {
190
191 aaqq = sva(q);
192
193 if (aaqq>Zero) {
194 aapp0 = aapp;
195 //
196 // .. M x 2 Jacobi SVD ..
197 //
198 // .. Safe Gram matrix computation ..
199 //
200 if (aaqq>=One) {
201 if (aapp>=aaqq) {
202 rotOk = small*aapp<=aaqq;
203 } else {
204 rotOk = small*aaqq<=aapp;
205 }
206 if (aapp<big/aaqq) {
207 aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
208 } else {
209 _work = A(_,p);
210 lascl(LASCL::FullMatrix, 0, 0,
211 aapp, d(p), _work);
212 aapq = _work*A(_,q)*d(q)/aaqq;
213 }
214 } else {
215 if (aapp>=aaqq) {
216 rotOk = aapp<=aaqq/small;
217 } else {
218 rotOk = aaqq<=aapp/small;
219 }
220 if (aapp>small/aaqq) {
221 aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
222 } else {
223 _work = A(_,q);
224 lascl(LASCL::FullMatrix, 0, 0,
225 aaqq, d(q), _work);
226 aapq = _work*A(_,p)*d(p)/aapp;
227 }
228 }
229
230 max_aapq = max(max_aapq, abs(aapq));
231
232 // TO rotate or NOT to rotate, THAT is the question ...
233 //
234 if (abs(aapq)>tol) {
235 notRot = 0;
236 // ROTATED = ROTATED + 1
237 pSkipped = 0;
238 ++iswRot;
239
240 if (rotOk) {
241
242 aqoap = aaqq / aapp;
243 apoaq = aapp / aaqq;
244 theta = -Half*abs(aqoap-apoaq) / aapq;
245 if (aaqq>aapp0) {
246 theta = -theta;
247 }
248
249 if (abs(theta)>bigTheta) {
250 t = Half / theta;
251 fastr(3) = t*d(p) / d(q);
252 fastr(4) = -t*d(q) / d(p);
253 blas::rotm(A(_,p), A(_,q), fastr);
254 if (rhsVec) {
255 blas::rotm(V(_,p), V(_,q), fastr);
256 }
257 sva(q) = aaqq
258 *sqrt(max(Zero, One+t*apoaq*aapq));
259 aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
260 max_sinj = max(max_sinj, abs(t));
261 } else {
262 //
263 // .. choose correct signum for THETA and rotate
264 //
265 thetaSign = -sign(One, aapq);
266 if (aaqq>aapp0) {
267 thetaSign = -thetaSign;
268 }
269 t = One
270 / (theta+thetaSign*sqrt(One+theta*theta));
271 cs = sqrt(One / (One+t*t));
272 sn = t*cs;
273 max_sinj = max(max_sinj, abs(sn));
274 sva(q) = aaqq
275 *sqrt(max(Zero, One+t*apoaq*aapq));
276 aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
277
278 apoaq = d(p) / d(q);
279 aqoap = d(q) / d(p);
280 if (d(p)>=One) {
281
282 if (d(q)>=One) {
283 fastr(3) = t*apoaq;
284 fastr(4) = -t*aqoap;
285 d(p) *= cs;
286 d(q) *= cs;
287 blas::rotm(A(_,p), A(_,q), fastr);
288 if (rhsVec) {
289 blas::rotm(V(_,p), V(_,q), fastr);
290 }
291 } else {
292 A(_,p) -= t*aqoap*A(_,q);
293 A(_,q) += cs*sn*apoaq*A(_,p);
294 if (rhsVec) {
295 V(_,p) -= t*aqoap*V(_,q);
296 V(_,q) += cs*sn*apoaq*V(_,p);
297 }
298 d(p) *= cs;
299 d(q) /= cs;
300 }
301 } else {
302 if (d(q)>=One) {
303 A(_,q) += t*apoaq*A(_,p);
304 A(_,p) -= cs*sn*aqoap*A(_,q);
305 if (rhsVec) {
306 V(_,q) += t*apoaq*V(_,p);
307 V(_,p) -= cs*sn*aqoap*V(_,q);
308 }
309 d(p) /= cs;
310 d(q) *= cs;
311 } else {
312 if (d(p)>=d(q)) {
313 A(_,p) -= t*aqoap*A(_,q);
314 A(_,q) += cs*sn*apoaq*A(_,p);
315 d(p) *= cs;
316 d(q) /= cs;
317 if (rhsVec) {
318 V(_,p) -= t*aqoap*V(_,q);
319 V(_,q) += cs*sn*apoaq*V(_,p);
320 }
321 } else {
322 A(_,q) += t*apoaq*A(_,p);
323 A(_,p) -= cs*sn*aqoap*A(_,q);
324 d(p) /= cs;
325 d(q) *= cs;
326 if (rhsVec) {
327 V(_,q) += t*apoaq*V(_,p);
328 V(_,p) -= cs*sn*aqoap*V(_,q);
329 }
330 }
331 }
332 }
333 }
334
335 } else {
336 if (aapp>aaqq) {
337 _work = A(_,p);
338 lascl(LASCL::FullMatrix, 0, 0,
339 aapp, One, _work);
340 lascl(LASCL::FullMatrix, 0, 0,
341 aaqq, One, A(_,q));
342 tmp = -aapq*d(p)/d(q);
343 A(_,q) += tmp*_work;
344 lascl(LASCL::FullMatrix, 0, 0,
345 One, aaqq, A(_,q));
346 sva(q) = aaqq*sqrt(max(Zero, One-aapq*aapq));
347 max_sinj = max(max_sinj, safeMin);
348 } else {
349 _work = A(_,q);
350 lascl(LASCL::FullMatrix, 0, 0,
351 aaqq, One, _work);
352 lascl(LASCL::FullMatrix, 0, 0,
353 aapp, One, A(_,p));
354 tmp = -aapq*d(q)/d(p);
355 A(_,p) += tmp*_work;
356 lascl(LASCL::FullMatrix, 0, 0,
357 One, aapp, A(_,p));
358 sva(p) = aapp*sqrt(max(Zero, One-aapq*aapq));
359 max_sinj = max(max_sinj, safeMin);
360 }
361 }
362 // END IF ROTOK THEN ... ELSE
363 //
364 // In the case of cancellation in updating SVA(q)
365 // .. recompute SVA(q)
366 if (pow(sva(q)/aaqq,2)<=rootEps) {
367 if (aaqq<rootBig && aaqq>rootSafeMin) {
368 sva(q) = blas::nrm2(A(_,q))*d(q);
369 } else {
370 t = Zero;
371 aaqq = One;
372 lassq(A(_,q), t, aaqq);
373 sva(q) = t*sqrt(aaqq)*d(q);
374 }
375 }
376 if (pow(aapp/aapp0,2)<=rootEps) {
377 if (aapp<rootBig && aapp>rootSafeMin) {
378 aapp = blas::nrm2(A(_,p))*d(p);
379 } else {
380 t = Zero;
381 aapp = One;
382 lassq(A(_,p), t, aapp);
383 aapp = t*sqrt(aapp)*d(p);
384 }
385 sva(p) = aapp;
386 }
387 // end of OK rotation
388 } else {
389 ++notRot;
390 // SKIPPED = SKIPPED + 1
391 ++pSkipped;
392 ++ijblsk;
393 }
394 } else {
395 ++notRot;
396 ++pSkipped;
397 ++ijblsk;
398 }
399
400 // IF ( NOTROT .GE. EMPTSW ) GO TO 2011
401 if (i<=swBand && ijblsk>=blSkip) {
402 sva(p) = aapp;
403 notRot = 0;
404 goto jbcLooExit;
405 }
406 if (i<=swBand && pSkipped>rowSkip) {
407 aapp = -aapp;
408 notRot = 0;
409 break;
410 }
411
412 }
413 // end of the q-loop
414
415 sva(p) = aapp;
416
417 } else {
418 if (aapp==Zero) {
419 notRot += min(jgl+kbl-1,n) - jgl + 1;
420 }
421 if (aapp<Zero) {
422 notRot = 0;
423 }
424 //** IF ( NOTROT .GE. EMPTSW ) GO TO 2011
425 }
426
427 }
428 // end of the p-loop
429 }
430 // end of the jbc-loop
431 jbcLooExit:
432 //2011 bailed out of the jbc-loop
433 for (IndexType p=igl; p<=min(igl+kbl-1,n); ++p) {
434 sva(p) = abs(sva(p));
435 }
436 //** IF ( NOTROT .GE. EMPTSW ) GO TO 1994
437 }
438 //2000 :: end of the ibr-loop
439 //
440 // .. update SVA(N)
441 if (sva(n)<rootBig && sva(n)>rootSafeMin) {
442 sva(n) = blas::nrm2(A(_,n))*d(n);
443 } else {
444 t = Zero;
445 aapp = One;
446 lassq(A(_,n), t, aapp);
447 sva(n) = t*sqrt(aapp)*d(n);
448 }
449 //
450 // Additional steering devices
451 //
452 if (i<swBand && (max_aapq<=rootTol || iswRot<=n)) {
453 swBand = i;
454 }
455
456 if (i>swBand+1 && max_aapq<n*tol && n*max_aapq*max_sinj<tol) {
457 converged = true;
458 break;
459 }
460
461 if (notRot>=emptsw) {
462 converged = true;
463 break;
464 }
465
466 }
467 // end i=1:NSWEEP loop
468 if (converged) {
469 //#:) Reaching this point means that during the i-th sweep all pivots were
470 // below the given threshold, causing early exit.
471 info = 0;
472 } else {
473 //#:) Reaching this point means that the procedure has completed the given
474 // number of sweeps.
475 info = nSweep - 1;
476 }
477 // Sort the vector D
478 //
479 for (IndexType p=1; p<=n-1; ++p) {
480 const IndexType q = blas::iamax(sva(_(p,n))) + p - 1;
481 if (p!=q) {
482 swap(sva(p), sva(q));
483 swap(d(p), d(q));
484 blas::swap(A(_,p), A(_,q));
485 if (rhsVec) {
486 blas::swap(V(_,p), V(_,q));
487 }
488 }
489 }
490 return info;
491 }
492
493
494 //== interface for native lapack ===============================================
495 #ifdef CHECK_CXXLAPACK
496
497 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
498 typename GeMatrix<MA>::IndexType
499 svj1_native(SVJ::JobV jobV,
500 typename GeMatrix<MA>::IndexType n1,
501 GeMatrix<MA> &A,
502 DenseVector<VD> &d,
503 DenseVector<VSVA> &sva,
504 GeMatrix<MV> &V,
505 const typename GeMatrix<MA>::ElementType &eps,
506 const typename GeMatrix<MA>::ElementType &safeMin,
507 const typename GeMatrix<MA>::ElementType &tol,
508 typename GeMatrix<MA>::IndexType nSweep,
509 DenseVector<VWORK> &work)
510 {
511 typedef typename GeMatrix<MA>::ElementType T;
512
513 const char JOBV = char(jobV);
514 const INTEGER M = A.numRows();
515 const INTEGER N = A.numCols();
516 const INTEGER N1 = n1;
517 const INTEGER LDA = A.leadingDimension();
518 const INTEGER _MV = V.numRows();
519 const INTEGER LDV = V.leadingDimension();
520 const DOUBLE EPS = eps;
521 const DOUBLE SFMIN = safeMin;
522 const DOUBLE TOL = tol;
523 const INTEGER NSWEEP = nSweep;
524 const INTEGER LWORK = work.length();
525 INTEGER INFO;
526
527 if (IsSame<T,DOUBLE>::value) {
528 LAPACK_DECL(dgsvj1)(&JOBV,
529 &M,
530 &N,
531 &N1,
532 A.data(),
533 &LDA,
534 d.data(),
535 sva.data(),
536 &_MV,
537 V.data(),
538 &LDV,
539 &EPS,
540 &SFMIN,
541 &TOL,
542 &NSWEEP,
543 work.data(),
544 &LWORK,
545 &INFO);
546 } else {
547 ASSERT(0);
548 }
549 ASSERT(INFO>=0);
550 return INFO;
551 }
552
553 #endif // CHECK_CXXLAPACK
554
555 //== public interface ==========================================================
556 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
557 typename GeMatrix<MA>::IndexType
558 svj1(SVJ::JobV jobV,
559 typename GeMatrix<MA>::IndexType n1,
560 GeMatrix<MA> &A,
561 DenseVector<VD> &d,
562 DenseVector<VSVA> &sva,
563 GeMatrix<MV> &V,
564 const typename GeMatrix<MA>::ElementType &eps,
565 const typename GeMatrix<MA>::ElementType &safeMin,
566 const typename GeMatrix<MA>::ElementType &tol,
567 typename GeMatrix<MA>::IndexType nSweep,
568 DenseVector<VWORK> &work)
569 {
570 using std::max;
571 using std::min;
572 //
573 // Test the input parameters
574 //
575 # ifndef NDEBUG
576 typedef typename GeMatrix<MA>::IndexType IndexType;
577
578 ASSERT(A.firstRow()==1);
579 ASSERT(A.firstCol()==1);
580
581 const IndexType m = A.numRows();
582 const IndexType n = A.numCols();
583
584 ASSERT(m>=n);
585 ASSERT(n>=n1);
586 ASSERT(n1>=0);
587
588
589 ASSERT(d.firstIndex()==1);
590 ASSERT(d.length()==n);
591
592 ASSERT(sva.firstIndex()==1);
593 ASSERT(sva.length()==n);
594
595 ASSERT(V.firstRow()==1);
596 ASSERT(V.firstCol()==1);
597
598 if (jobV==SVJ::ComputeV) {
599 ASSERT(V.numCols()==n);
600 ASSERT(V.numRows()==n);
601 }
602 if (jobV==SVJ::ApplyV) {
603 ASSERT(V.numCols()==n);
604 }
605
606 if (work.length()>0) {
607 ASSERT(work.length()>=m);
608 }
609 # endif
610
611 //
612 // Make copies of output arguments
613 //
614 # ifdef CHECK_CXXLAPACK
615 typename GeMatrix<MA>::NoView A_org = A;
616 typename DenseVector<VD>::NoView d_org = d;
617 typename DenseVector<VSVA>::NoView sva_org = sva;
618 typename GeMatrix<MV>::NoView V_org = V;
619 typename DenseVector<VWORK>::NoView work_org = work;
620 # endif
621
622 //
623 // Call implementation
624 //
625 IndexType info = svj1_generic(jobV, n1, A, d, sva, V, eps, safeMin, tol,
626 nSweep, work);
627
628 # ifdef CHECK_CXXLAPACK
629 //
630 // Make copies of results computed by the generic implementation
631 //
632 typename GeMatrix<MA>::NoView A_generic = A;
633 typename DenseVector<VD>::NoView d_generic = d;
634 typename DenseVector<VSVA>::NoView sva_generic = sva;
635 typename GeMatrix<MV>::NoView V_generic = V;
636 typename DenseVector<VWORK>::NoView work_generic = work;
637 //
638 // restore output arguments
639 //
640 A = A_org;
641 d = d_org;
642 sva = sva_org;
643 V = V_org;
644 work = work_org;
645 //
646 // Compare generic results with results from the native implementation
647 //
648 IndexType _info = svj1_native(jobV, n1, A, d, sva, V, eps, safeMin, tol,
649 nSweep, work);
650
651 bool failed = false;
652 if (! isIdentical(A_generic, A, "A_generic", "A")) {
653 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
654 std::cerr << "F77LAPACK: A = " << A << std::endl;
655 failed = true;
656 }
657 if (! isIdentical(d_generic, d, "d_generic", "d")) {
658 std::cerr << "CXXLAPACK: d_generic = " << d_generic << std::endl;
659 std::cerr << "F77LAPACK: d = " << d << std::endl;
660 failed = true;
661 }
662 if (! isIdentical(sva_generic, sva, "sva_generic", "sva")) {
663 std::cerr << "CXXLAPACK: sva_generic = " << sva_generic << std::endl;
664 std::cerr << "F77LAPACK: sva = " << sva << std::endl;
665 failed = true;
666 }
667 if (! isIdentical(V_generic, V, "V_generic", "V")) {
668 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
669 std::cerr << "F77LAPACK: V = " << V << std::endl;
670 failed = true;
671 }
672 if (! isIdentical(work_generic, work, "work_generic", "work")) {
673 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
674 std::cerr << "F77LAPACK: work = " << work << std::endl;
675 failed = true;
676 }
677 if (! isIdentical(info, _info, "info", "_info")) {
678 std::cerr << "CXXLAPACK: info = " << info << std::endl;
679 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
680 failed = true;
681 }
682
683 if (failed) {
684 std::cerr << "error in: svj1.tcc" << std::endl;
685 ASSERT(0);
686 } else {
687 std::cerr << "passed: svj1.tcc" << std::endl;
688 }
689 # endif
690
691 return info;
692 }
693
694 //-- forwarding ----------------------------------------------------------------
695 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
696 typename MA::IndexType
697 svj1(SVJ::JobV jobV,
698 typename MA::IndexType n1,
699 MA &&A,
700 VD &&d,
701 VSVA &&sva,
702 MV &&V,
703 const typename MA::ElementType &eps,
704 const typename MA::ElementType &safeMin,
705 const typename MA::ElementType &tol,
706 typename MA::IndexType nSweep,
707 VWORK &&work)
708 {
709 typename MA::IndexType info;
710
711 CHECKPOINT_ENTER;
712 svj1(jobV, n1, A, d, sva, V, eps, safeMin, tol, nSweep, work);
713 CHECKPOINT_LEAVE;
714 }
715
716 } } // namespace lapack, flens
717
718 #endif // FLENS_LAPACK_SVD_SVJ1_TCC
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV,
36 $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 *
40 * -- Contributed by Zlatko Drmac of the University of Zagreb and --
41 * -- Kresimir Veselic of the Fernuniversitaet Hagen --
42 * -- April 2011 --
43 *
44 * -- LAPACK is a software package provided by Univ. of Tennessee, --
45 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
46 *
47 * This routine is also part of SIGMA (version 1.23, October 23. 2008.)
48 * SIGMA is a library of algorithms for highly accurate algorithms for
49 * computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
50 * eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
51 *
52 */
53
54 #ifndef FLENS_LAPACK_SVD_SVJ1_TCC
55 #define FLENS_LAPACK_SVD_SVJ1_TCC 1
56
57 #include <flens/blas/blas.h>
58 #include <flens/lapack/lapack.h>
59
60 namespace flens { namespace lapack {
61
62 //== generic lapack implementation =============================================
63 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
64 typename GeMatrix<MA>::IndexType
65 svj1_generic(SVJ::JobV jobV,
66 typename GeMatrix<MA>::IndexType n1,
67 GeMatrix<MA> &A,
68 DenseVector<VD> &d,
69 DenseVector<VSVA> &sva,
70 GeMatrix<MV> &V,
71 const typename GeMatrix<MA>::ElementType &eps,
72 const typename GeMatrix<MA>::ElementType &safeMin,
73 const typename GeMatrix<MA>::ElementType &tol,
74 typename GeMatrix<MA>::IndexType nSweep,
75 DenseVector<VWORK> &work)
76 {
77 using std::abs;
78 using std::max;
79 using std::min;
80 using std::sqrt;
81 using std::swap;
82
83 typedef typename GeMatrix<MA>::ElementType ElementType;
84 typedef typename GeMatrix<MA>::IndexType IndexType;
85
86 const ElementType Zero(0), Half(0.5), One(1);
87
88 const Underscore<IndexType> _;
89
90 ElementType fastr_data[5];
91 DenseVectorView<ElementType>
92 fastr = typename DenseVectorView<ElementType>::Engine(5, fastr_data);
93
94 const IndexType m = A.numRows();
95 const IndexType n = A.numCols();
96
97 auto _work = work(_(1,m));
98
99 const bool applyV = (jobV==SVJ::ApplyV);
100 const bool rhsVec = (jobV==SVJ::ComputeV) || applyV;
101
102 const ElementType rootEps = sqrt(eps);
103 const ElementType rootSafeMin = sqrt(safeMin);
104 const ElementType small = safeMin / eps;
105 const ElementType big = One / safeMin;
106 const ElementType rootBig = One / rootSafeMin;
107 const ElementType bigTheta = One / rootEps;
108 const ElementType rootTol = sqrt(tol);
109
110 IndexType info = 0;
111 //
112 // .. Initialize the right singular vector matrix ..
113 //
114 // RSVEC = LSAME( JOBV, 'Y' )
115 //
116 const IndexType emptsw = n1*(n-n1);
117 fastr(1) = Zero;
118 //
119 // .. Row-cyclic pivot strategy with de Rijk's pivoting ..
120 //
121 const IndexType kbl = min(IndexType(8),n);
122 IndexType nblr = n1 / kbl;
123 if (nblr*kbl!=n1) {
124 ++nblr;
125 }
126 // .. the tiling is nblr-by-nblc [tiles]
127 IndexType nblc = (n-n1) / kbl;
128
129 if (nblc*kbl!=(n-n1)) {
130 ++nblc;
131 }
132 const IndexType blSkip = pow(kbl,2) + 1;
133 //[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
134 const IndexType rowSkip = min(IndexType(5),kbl);
135 //[TP] ROWSKIP is a tuning parameter.
136 IndexType swBand = 0;
137 //[TP] SWBAND is a tuning parameter. It is meaningful and effective
138 // if SGESVJ is used as a computational routine in the preconditioned
139 // Jacobi SVD algorithm SGESVJ.
140 //
141 //
142 // | * * * [x] [x] [x]|
143 // | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks.
144 // | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block.
145 // |[x] [x] [x] * * * |
146 // |[x] [x] [x] * * * |
147 // |[x] [x] [x] * * * |
148 //
149 //
150 ElementType aapp, aapp0, aaqq, aapq, aqoap, apoaq;
151 ElementType tmp, cs, sn, t, theta, thetaSign;
152
153 bool rotOk;
154 bool converged = false;
155
156 for (IndexType i=1; i<=nSweep; ++i) {
157 // .. go go go ...
158 //
159 ElementType max_aapq = Zero;
160 ElementType max_sinj = Zero;
161
162 IndexType iswRot = 0;
163 IndexType notRot = 0;
164 IndexType pSkipped = 0;
165
166 for (IndexType ibr=1; ibr<=nblr; ++ibr) {
167
168 IndexType igl = (ibr-1)*kbl + 1;
169 //
170 //
171 //........................................................
172 //... go to the off diagonal blocks
173
174 for (IndexType jbc=1; jbc<=nblc; ++jbc) {
175
176 IndexType jgl = n1 + (jbc-1)*kbl + 1;
177
178 // doing the block at ( ibr, jbc )
179
180 IndexType ijblsk = 0;
181 for (IndexType p=igl; p<=min(igl+kbl-1,n1); ++p) {
182
183 aapp = sva(p);
184
185 if (aapp>Zero) {
186
187 pSkipped = 0;
188
189 for (IndexType q=jgl; q<=min(jgl+kbl-1,n); ++q) {
190
191 aaqq = sva(q);
192
193 if (aaqq>Zero) {
194 aapp0 = aapp;
195 //
196 // .. M x 2 Jacobi SVD ..
197 //
198 // .. Safe Gram matrix computation ..
199 //
200 if (aaqq>=One) {
201 if (aapp>=aaqq) {
202 rotOk = small*aapp<=aaqq;
203 } else {
204 rotOk = small*aaqq<=aapp;
205 }
206 if (aapp<big/aaqq) {
207 aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
208 } else {
209 _work = A(_,p);
210 lascl(LASCL::FullMatrix, 0, 0,
211 aapp, d(p), _work);
212 aapq = _work*A(_,q)*d(q)/aaqq;
213 }
214 } else {
215 if (aapp>=aaqq) {
216 rotOk = aapp<=aaqq/small;
217 } else {
218 rotOk = aaqq<=aapp/small;
219 }
220 if (aapp>small/aaqq) {
221 aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
222 } else {
223 _work = A(_,q);
224 lascl(LASCL::FullMatrix, 0, 0,
225 aaqq, d(q), _work);
226 aapq = _work*A(_,p)*d(p)/aapp;
227 }
228 }
229
230 max_aapq = max(max_aapq, abs(aapq));
231
232 // TO rotate or NOT to rotate, THAT is the question ...
233 //
234 if (abs(aapq)>tol) {
235 notRot = 0;
236 // ROTATED = ROTATED + 1
237 pSkipped = 0;
238 ++iswRot;
239
240 if (rotOk) {
241
242 aqoap = aaqq / aapp;
243 apoaq = aapp / aaqq;
244 theta = -Half*abs(aqoap-apoaq) / aapq;
245 if (aaqq>aapp0) {
246 theta = -theta;
247 }
248
249 if (abs(theta)>bigTheta) {
250 t = Half / theta;
251 fastr(3) = t*d(p) / d(q);
252 fastr(4) = -t*d(q) / d(p);
253 blas::rotm(A(_,p), A(_,q), fastr);
254 if (rhsVec) {
255 blas::rotm(V(_,p), V(_,q), fastr);
256 }
257 sva(q) = aaqq
258 *sqrt(max(Zero, One+t*apoaq*aapq));
259 aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
260 max_sinj = max(max_sinj, abs(t));
261 } else {
262 //
263 // .. choose correct signum for THETA and rotate
264 //
265 thetaSign = -sign(One, aapq);
266 if (aaqq>aapp0) {
267 thetaSign = -thetaSign;
268 }
269 t = One
270 / (theta+thetaSign*sqrt(One+theta*theta));
271 cs = sqrt(One / (One+t*t));
272 sn = t*cs;
273 max_sinj = max(max_sinj, abs(sn));
274 sva(q) = aaqq
275 *sqrt(max(Zero, One+t*apoaq*aapq));
276 aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
277
278 apoaq = d(p) / d(q);
279 aqoap = d(q) / d(p);
280 if (d(p)>=One) {
281
282 if (d(q)>=One) {
283 fastr(3) = t*apoaq;
284 fastr(4) = -t*aqoap;
285 d(p) *= cs;
286 d(q) *= cs;
287 blas::rotm(A(_,p), A(_,q), fastr);
288 if (rhsVec) {
289 blas::rotm(V(_,p), V(_,q), fastr);
290 }
291 } else {
292 A(_,p) -= t*aqoap*A(_,q);
293 A(_,q) += cs*sn*apoaq*A(_,p);
294 if (rhsVec) {
295 V(_,p) -= t*aqoap*V(_,q);
296 V(_,q) += cs*sn*apoaq*V(_,p);
297 }
298 d(p) *= cs;
299 d(q) /= cs;
300 }
301 } else {
302 if (d(q)>=One) {
303 A(_,q) += t*apoaq*A(_,p);
304 A(_,p) -= cs*sn*aqoap*A(_,q);
305 if (rhsVec) {
306 V(_,q) += t*apoaq*V(_,p);
307 V(_,p) -= cs*sn*aqoap*V(_,q);
308 }
309 d(p) /= cs;
310 d(q) *= cs;
311 } else {
312 if (d(p)>=d(q)) {
313 A(_,p) -= t*aqoap*A(_,q);
314 A(_,q) += cs*sn*apoaq*A(_,p);
315 d(p) *= cs;
316 d(q) /= cs;
317 if (rhsVec) {
318 V(_,p) -= t*aqoap*V(_,q);
319 V(_,q) += cs*sn*apoaq*V(_,p);
320 }
321 } else {
322 A(_,q) += t*apoaq*A(_,p);
323 A(_,p) -= cs*sn*aqoap*A(_,q);
324 d(p) /= cs;
325 d(q) *= cs;
326 if (rhsVec) {
327 V(_,q) += t*apoaq*V(_,p);
328 V(_,p) -= cs*sn*aqoap*V(_,q);
329 }
330 }
331 }
332 }
333 }
334
335 } else {
336 if (aapp>aaqq) {
337 _work = A(_,p);
338 lascl(LASCL::FullMatrix, 0, 0,
339 aapp, One, _work);
340 lascl(LASCL::FullMatrix, 0, 0,
341 aaqq, One, A(_,q));
342 tmp = -aapq*d(p)/d(q);
343 A(_,q) += tmp*_work;
344 lascl(LASCL::FullMatrix, 0, 0,
345 One, aaqq, A(_,q));
346 sva(q) = aaqq*sqrt(max(Zero, One-aapq*aapq));
347 max_sinj = max(max_sinj, safeMin);
348 } else {
349 _work = A(_,q);
350 lascl(LASCL::FullMatrix, 0, 0,
351 aaqq, One, _work);
352 lascl(LASCL::FullMatrix, 0, 0,
353 aapp, One, A(_,p));
354 tmp = -aapq*d(q)/d(p);
355 A(_,p) += tmp*_work;
356 lascl(LASCL::FullMatrix, 0, 0,
357 One, aapp, A(_,p));
358 sva(p) = aapp*sqrt(max(Zero, One-aapq*aapq));
359 max_sinj = max(max_sinj, safeMin);
360 }
361 }
362 // END IF ROTOK THEN ... ELSE
363 //
364 // In the case of cancellation in updating SVA(q)
365 // .. recompute SVA(q)
366 if (pow(sva(q)/aaqq,2)<=rootEps) {
367 if (aaqq<rootBig && aaqq>rootSafeMin) {
368 sva(q) = blas::nrm2(A(_,q))*d(q);
369 } else {
370 t = Zero;
371 aaqq = One;
372 lassq(A(_,q), t, aaqq);
373 sva(q) = t*sqrt(aaqq)*d(q);
374 }
375 }
376 if (pow(aapp/aapp0,2)<=rootEps) {
377 if (aapp<rootBig && aapp>rootSafeMin) {
378 aapp = blas::nrm2(A(_,p))*d(p);
379 } else {
380 t = Zero;
381 aapp = One;
382 lassq(A(_,p), t, aapp);
383 aapp = t*sqrt(aapp)*d(p);
384 }
385 sva(p) = aapp;
386 }
387 // end of OK rotation
388 } else {
389 ++notRot;
390 // SKIPPED = SKIPPED + 1
391 ++pSkipped;
392 ++ijblsk;
393 }
394 } else {
395 ++notRot;
396 ++pSkipped;
397 ++ijblsk;
398 }
399
400 // IF ( NOTROT .GE. EMPTSW ) GO TO 2011
401 if (i<=swBand && ijblsk>=blSkip) {
402 sva(p) = aapp;
403 notRot = 0;
404 goto jbcLooExit;
405 }
406 if (i<=swBand && pSkipped>rowSkip) {
407 aapp = -aapp;
408 notRot = 0;
409 break;
410 }
411
412 }
413 // end of the q-loop
414
415 sva(p) = aapp;
416
417 } else {
418 if (aapp==Zero) {
419 notRot += min(jgl+kbl-1,n) - jgl + 1;
420 }
421 if (aapp<Zero) {
422 notRot = 0;
423 }
424 //** IF ( NOTROT .GE. EMPTSW ) GO TO 2011
425 }
426
427 }
428 // end of the p-loop
429 }
430 // end of the jbc-loop
431 jbcLooExit:
432 //2011 bailed out of the jbc-loop
433 for (IndexType p=igl; p<=min(igl+kbl-1,n); ++p) {
434 sva(p) = abs(sva(p));
435 }
436 //** IF ( NOTROT .GE. EMPTSW ) GO TO 1994
437 }
438 //2000 :: end of the ibr-loop
439 //
440 // .. update SVA(N)
441 if (sva(n)<rootBig && sva(n)>rootSafeMin) {
442 sva(n) = blas::nrm2(A(_,n))*d(n);
443 } else {
444 t = Zero;
445 aapp = One;
446 lassq(A(_,n), t, aapp);
447 sva(n) = t*sqrt(aapp)*d(n);
448 }
449 //
450 // Additional steering devices
451 //
452 if (i<swBand && (max_aapq<=rootTol || iswRot<=n)) {
453 swBand = i;
454 }
455
456 if (i>swBand+1 && max_aapq<n*tol && n*max_aapq*max_sinj<tol) {
457 converged = true;
458 break;
459 }
460
461 if (notRot>=emptsw) {
462 converged = true;
463 break;
464 }
465
466 }
467 // end i=1:NSWEEP loop
468 if (converged) {
469 //#:) Reaching this point means that during the i-th sweep all pivots were
470 // below the given threshold, causing early exit.
471 info = 0;
472 } else {
473 //#:) Reaching this point means that the procedure has completed the given
474 // number of sweeps.
475 info = nSweep - 1;
476 }
477 // Sort the vector D
478 //
479 for (IndexType p=1; p<=n-1; ++p) {
480 const IndexType q = blas::iamax(sva(_(p,n))) + p - 1;
481 if (p!=q) {
482 swap(sva(p), sva(q));
483 swap(d(p), d(q));
484 blas::swap(A(_,p), A(_,q));
485 if (rhsVec) {
486 blas::swap(V(_,p), V(_,q));
487 }
488 }
489 }
490 return info;
491 }
492
493
494 //== interface for native lapack ===============================================
495 #ifdef CHECK_CXXLAPACK
496
497 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
498 typename GeMatrix<MA>::IndexType
499 svj1_native(SVJ::JobV jobV,
500 typename GeMatrix<MA>::IndexType n1,
501 GeMatrix<MA> &A,
502 DenseVector<VD> &d,
503 DenseVector<VSVA> &sva,
504 GeMatrix<MV> &V,
505 const typename GeMatrix<MA>::ElementType &eps,
506 const typename GeMatrix<MA>::ElementType &safeMin,
507 const typename GeMatrix<MA>::ElementType &tol,
508 typename GeMatrix<MA>::IndexType nSweep,
509 DenseVector<VWORK> &work)
510 {
511 typedef typename GeMatrix<MA>::ElementType T;
512
513 const char JOBV = char(jobV);
514 const INTEGER M = A.numRows();
515 const INTEGER N = A.numCols();
516 const INTEGER N1 = n1;
517 const INTEGER LDA = A.leadingDimension();
518 const INTEGER _MV = V.numRows();
519 const INTEGER LDV = V.leadingDimension();
520 const DOUBLE EPS = eps;
521 const DOUBLE SFMIN = safeMin;
522 const DOUBLE TOL = tol;
523 const INTEGER NSWEEP = nSweep;
524 const INTEGER LWORK = work.length();
525 INTEGER INFO;
526
527 if (IsSame<T,DOUBLE>::value) {
528 LAPACK_DECL(dgsvj1)(&JOBV,
529 &M,
530 &N,
531 &N1,
532 A.data(),
533 &LDA,
534 d.data(),
535 sva.data(),
536 &_MV,
537 V.data(),
538 &LDV,
539 &EPS,
540 &SFMIN,
541 &TOL,
542 &NSWEEP,
543 work.data(),
544 &LWORK,
545 &INFO);
546 } else {
547 ASSERT(0);
548 }
549 ASSERT(INFO>=0);
550 return INFO;
551 }
552
553 #endif // CHECK_CXXLAPACK
554
555 //== public interface ==========================================================
556 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
557 typename GeMatrix<MA>::IndexType
558 svj1(SVJ::JobV jobV,
559 typename GeMatrix<MA>::IndexType n1,
560 GeMatrix<MA> &A,
561 DenseVector<VD> &d,
562 DenseVector<VSVA> &sva,
563 GeMatrix<MV> &V,
564 const typename GeMatrix<MA>::ElementType &eps,
565 const typename GeMatrix<MA>::ElementType &safeMin,
566 const typename GeMatrix<MA>::ElementType &tol,
567 typename GeMatrix<MA>::IndexType nSweep,
568 DenseVector<VWORK> &work)
569 {
570 using std::max;
571 using std::min;
572 //
573 // Test the input parameters
574 //
575 # ifndef NDEBUG
576 typedef typename GeMatrix<MA>::IndexType IndexType;
577
578 ASSERT(A.firstRow()==1);
579 ASSERT(A.firstCol()==1);
580
581 const IndexType m = A.numRows();
582 const IndexType n = A.numCols();
583
584 ASSERT(m>=n);
585 ASSERT(n>=n1);
586 ASSERT(n1>=0);
587
588
589 ASSERT(d.firstIndex()==1);
590 ASSERT(d.length()==n);
591
592 ASSERT(sva.firstIndex()==1);
593 ASSERT(sva.length()==n);
594
595 ASSERT(V.firstRow()==1);
596 ASSERT(V.firstCol()==1);
597
598 if (jobV==SVJ::ComputeV) {
599 ASSERT(V.numCols()==n);
600 ASSERT(V.numRows()==n);
601 }
602 if (jobV==SVJ::ApplyV) {
603 ASSERT(V.numCols()==n);
604 }
605
606 if (work.length()>0) {
607 ASSERT(work.length()>=m);
608 }
609 # endif
610
611 //
612 // Make copies of output arguments
613 //
614 # ifdef CHECK_CXXLAPACK
615 typename GeMatrix<MA>::NoView A_org = A;
616 typename DenseVector<VD>::NoView d_org = d;
617 typename DenseVector<VSVA>::NoView sva_org = sva;
618 typename GeMatrix<MV>::NoView V_org = V;
619 typename DenseVector<VWORK>::NoView work_org = work;
620 # endif
621
622 //
623 // Call implementation
624 //
625 IndexType info = svj1_generic(jobV, n1, A, d, sva, V, eps, safeMin, tol,
626 nSweep, work);
627
628 # ifdef CHECK_CXXLAPACK
629 //
630 // Make copies of results computed by the generic implementation
631 //
632 typename GeMatrix<MA>::NoView A_generic = A;
633 typename DenseVector<VD>::NoView d_generic = d;
634 typename DenseVector<VSVA>::NoView sva_generic = sva;
635 typename GeMatrix<MV>::NoView V_generic = V;
636 typename DenseVector<VWORK>::NoView work_generic = work;
637 //
638 // restore output arguments
639 //
640 A = A_org;
641 d = d_org;
642 sva = sva_org;
643 V = V_org;
644 work = work_org;
645 //
646 // Compare generic results with results from the native implementation
647 //
648 IndexType _info = svj1_native(jobV, n1, A, d, sva, V, eps, safeMin, tol,
649 nSweep, work);
650
651 bool failed = false;
652 if (! isIdentical(A_generic, A, "A_generic", "A")) {
653 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
654 std::cerr << "F77LAPACK: A = " << A << std::endl;
655 failed = true;
656 }
657 if (! isIdentical(d_generic, d, "d_generic", "d")) {
658 std::cerr << "CXXLAPACK: d_generic = " << d_generic << std::endl;
659 std::cerr << "F77LAPACK: d = " << d << std::endl;
660 failed = true;
661 }
662 if (! isIdentical(sva_generic, sva, "sva_generic", "sva")) {
663 std::cerr << "CXXLAPACK: sva_generic = " << sva_generic << std::endl;
664 std::cerr << "F77LAPACK: sva = " << sva << std::endl;
665 failed = true;
666 }
667 if (! isIdentical(V_generic, V, "V_generic", "V")) {
668 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
669 std::cerr << "F77LAPACK: V = " << V << std::endl;
670 failed = true;
671 }
672 if (! isIdentical(work_generic, work, "work_generic", "work")) {
673 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
674 std::cerr << "F77LAPACK: work = " << work << std::endl;
675 failed = true;
676 }
677 if (! isIdentical(info, _info, "info", "_info")) {
678 std::cerr << "CXXLAPACK: info = " << info << std::endl;
679 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
680 failed = true;
681 }
682
683 if (failed) {
684 std::cerr << "error in: svj1.tcc" << std::endl;
685 ASSERT(0);
686 } else {
687 std::cerr << "passed: svj1.tcc" << std::endl;
688 }
689 # endif
690
691 return info;
692 }
693
694 //-- forwarding ----------------------------------------------------------------
695 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
696 typename MA::IndexType
697 svj1(SVJ::JobV jobV,
698 typename MA::IndexType n1,
699 MA &&A,
700 VD &&d,
701 VSVA &&sva,
702 MV &&V,
703 const typename MA::ElementType &eps,
704 const typename MA::ElementType &safeMin,
705 const typename MA::ElementType &tol,
706 typename MA::IndexType nSweep,
707 VWORK &&work)
708 {
709 typename MA::IndexType info;
710
711 CHECKPOINT_ENTER;
712 svj1(jobV, n1, A, d, sva, V, eps, safeMin, tol, nSweep, work);
713 CHECKPOINT_LEAVE;
714 }
715
716 } } // namespace lapack, flens
717
718 #endif // FLENS_LAPACK_SVD_SVJ1_TCC