1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV,
 36       $                   EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
 37  *
 38  *  -- LAPACK routine (version 3.3.1)                                  --
 39  *
 40  *  -- Contributed by Zlatko Drmac of the University of Zagreb and     --
 41  *  -- Kresimir Veselic of the Fernuniversitaet Hagen                  --
 42  *  -- April 2011                                                      --
 43  *
 44  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 45  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 46  *
 47  * This routine is also part of SIGMA (version 1.23, October 23. 2008.)
 48  * SIGMA is a library of algorithms for highly accurate algorithms for
 49  * computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
 50  * eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
 51  *
 52  */
 53 
 54 #ifndef FLENS_LAPACK_SVD_SVJ1_TCC
 55 #define FLENS_LAPACK_SVD_SVJ1_TCC 1
 56 
 57 #include <flens/blas/blas.h>
 58 #include <flens/lapack/lapack.h>
 59 
 60 namespace flens { namespace lapack {
 61 
 62 //== generic lapack implementation =============================================
 63 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
 64 typename GeMatrix<MA>::IndexType
 65 svj1_generic(SVJ::JobV                                  jobV,
 66              typename GeMatrix<MA>::IndexType           n1,
 67              GeMatrix<MA>                               &A,
 68              DenseVector<VD>                            &d,
 69              DenseVector<VSVA>                          &sva,
 70              GeMatrix<MV>                               &V,
 71              const typename GeMatrix<MA>::ElementType   &eps,
 72              const typename GeMatrix<MA>::ElementType   &safeMin,
 73              const typename GeMatrix<MA>::ElementType   &tol,
 74              typename GeMatrix<MA>::IndexType           nSweep,
 75              DenseVector<VWORK>                         &work)
 76 {
 77     using std::abs;
 78     using std::max;
 79     using std::min;
 80     using std::sqrt;
 81     using std::swap;
 82 
 83     typedef typename GeMatrix<MA>::ElementType  ElementType;
 84     typedef typename GeMatrix<MA>::IndexType    IndexType;
 85 
 86     const ElementType  Zero(0), Half(0.5), One(1);
 87 
 88     const Underscore<IndexType>  _;
 89 
 90     ElementType fastr_data[5];
 91     DenseVectorView<ElementType>
 92         fastr  = typename DenseVectorView<ElementType>::Engine(5, fastr_data);
 93 
 94     const IndexType  m     = A.numRows();
 95     const IndexType  n     = A.numCols();
 96 
 97     auto _work = work(_(1,m));
 98 
 99     const bool applyV   = (jobV==SVJ::ApplyV);
100     const bool rhsVec   = (jobV==SVJ::ComputeV) || applyV;
101 
102     const ElementType rootEps = sqrt(eps);
103     const ElementType rootSafeMin = sqrt(safeMin);
104     const ElementType small = safeMin / eps;
105     const ElementType big = One / safeMin;
106     const ElementType rootBig = One / rootSafeMin;
107     const ElementType bigTheta = One / rootEps;
108     const ElementType rootTol = sqrt(tol);
109 
110     IndexType  info = 0;
111 //
112 //  .. Initialize the right singular vector matrix ..
113 //
114 //  RSVEC = LSAME( JOBV, 'Y' )
115 //
116     const IndexType emptsw = n1*(n-n1);
117     fastr(1) = Zero;
118 //
119 //  .. Row-cyclic pivot strategy with de Rijk's pivoting ..
120 //
121     const IndexType kbl = min(IndexType(8),n);
122     IndexType nblr = n1 / kbl;
123     if (nblr*kbl!=n1) {
124         ++nblr;
125     }
126 //  .. the tiling is nblr-by-nblc [tiles]
127     IndexType nblc = (n-n1) / kbl;
128 
129     if (nblc*kbl!=(n-n1)) {
130         ++nblc;
131     }
132     const IndexType blSkip = pow(kbl,2) + 1;
133 //[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
134     const IndexType rowSkip = min(IndexType(5),kbl);
135 //[TP] ROWSKIP is a tuning parameter.
136     IndexType swBand = 0;
137 //[TP] SWBAND is a tuning parameter. It is meaningful and effective
138 //  if SGESVJ is used as a computational routine in the preconditioned
139 //  Jacobi SVD algorithm SGESVJ.
140 //
141 //
142 //  | *   *   * [x] [x] [x]|
143 //  | *   *   * [x] [x] [x]|    Row-cycling in the nblr-by-nblc [x] blocks.
144 //  | *   *   * [x] [x] [x]|    Row-cyclic pivoting inside each [x] block.
145 //  |[x] [x] [x] *   *   * |
146 //  |[x] [x] [x] *   *   * |
147 //  |[x] [x] [x] *   *   * |
148 //
149 //
150     ElementType aapp, aapp0, aaqq, aapq, aqoap, apoaq;
151     ElementType tmp, cs, sn, t, theta, thetaSign;
152 
153     bool rotOk;
154     bool converged = false;
155 
156     for (IndexType i=1; i<=nSweep; ++i) {
157 //  .. go go go ...
158 //
159        ElementType max_aapq = Zero;
160        ElementType max_sinj = Zero;
161 
162        IndexType iswRot = 0;
163        IndexType notRot = 0;
164        IndexType pSkipped = 0;
165 
166        for (IndexType ibr=1; ibr<=nblr; ++ibr) {
167 
168           IndexType igl = (ibr-1)*kbl + 1;
169 //
170 //
171 //........................................................
172 //... go to the off diagonal blocks
173 
174           for (IndexType jbc=1; jbc<=nblc; ++jbc) {
175 
176              IndexType jgl = n1 + (jbc-1)*kbl + 1;
177 
178 //     doing the block at ( ibr, jbc )
179 
180              IndexType ijblsk = 0;
181              for (IndexType p=igl; p<=min(igl+kbl-1,n1); ++p) {
182 
183                 aapp = sva(p);
184 
185                 if (aapp>Zero) {
186 
187                    pSkipped = 0;
188 
189                    for (IndexType q=jgl; q<=min(jgl+kbl-1,n); ++q) {
190 
191                       aaqq = sva(q);
192 
193                       if (aaqq>Zero) {
194                          aapp0 = aapp;
195 //
196 //  .. M x 2 Jacobi SVD ..
197 //
198 //     .. Safe Gram matrix computation ..
199 //
200                          if (aaqq>=One) {
201                             if (aapp>=aaqq) {
202                                rotOk = small*aapp<=aaqq;
203                             } else {
204                                rotOk = small*aaqq<=aapp;
205                             }
206                             if (aapp<big/aaqq) {
207                                aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
208                             } else {
209                                _work = A(_,p);
210                                lascl(LASCL::FullMatrix, 00,
211                                      aapp, d(p), _work);
212                                aapq = _work*A(_,q)*d(q)/aaqq;
213                             }
214                          } else {
215                             if (aapp>=aaqq) {
216                                rotOk = aapp<=aaqq/small;
217                             } else {
218                                rotOk = aaqq<=aapp/small;
219                             }
220                             if (aapp>small/aaqq) {
221                                aapq = (A(_,p)*A(_,q)*d(p)*d(q)/aaqq)/aapp;
222                             } else {
223                                _work = A(_,q);
224                                lascl(LASCL::FullMatrix, 00,
225                                      aaqq, d(q), _work);
226                                aapq = _work*A(_,p)*d(p)/aapp;
227                             }
228                          }
229 
230                          max_aapq = max(max_aapq, abs(aapq));
231 
232 //     TO rotate or NOT to rotate, THAT is the question ...
233 //
234                          if (abs(aapq)>tol) {
235                             notRot = 0;
236 //        ROTATED  = ROTATED + 1
237                             pSkipped = 0;
238                             ++iswRot;
239 
240                             if (rotOk) {
241 
242                                aqoap = aaqq / aapp;
243                                apoaq = aapp / aaqq;
244                                theta = -Half*abs(aqoap-apoaq) / aapq;
245                                if (aaqq>aapp0) {
246                                   theta = -theta;
247                                }
248 
249                                if (abs(theta)>bigTheta) {
250                                   t = Half / theta;
251                                   fastr(3) =  t*d(p) / d(q);
252                                   fastr(4) = -t*d(q) / d(p);
253                                   blas::rotm(A(_,p), A(_,q), fastr);
254                                   if (rhsVec) {
255                                      blas::rotm(V(_,p), V(_,q), fastr);
256                                   }
257                                   sva(q) = aaqq
258                                           *sqrt(max(Zero, One+t*apoaq*aapq));
259                                   aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
260                                   max_sinj = max(max_sinj, abs(t));
261                                } else {
262 //
263 //              .. choose correct signum for THETA and rotate
264 //
265                                   thetaSign = -sign(One, aapq);
266                                   if (aaqq>aapp0) {
267                                      thetaSign = -thetaSign;
268                                   }
269                                   t = One
270                                      / (theta+thetaSign*sqrt(One+theta*theta));
271                                   cs = sqrt(One / (One+t*t));
272                                   sn = t*cs;
273                                   max_sinj = max(max_sinj, abs(sn));
274                                   sva(q) = aaqq
275                                           *sqrt(max(Zero, One+t*apoaq*aapq));
276                                   aapp *= sqrt(max(Zero, One-t*aqoap*aapq));
277 
278                                   apoaq = d(p) / d(q);
279                                   aqoap = d(q) / d(p);
280                                   if (d(p)>=One) {
281 
282                                      if (d(q)>=One) {
283                                         fastr(3) =  t*apoaq;
284                                         fastr(4) = -t*aqoap;
285                                         d(p) *= cs;
286                                         d(q) *= cs;
287                                         blas::rotm(A(_,p), A(_,q), fastr);
288                                         if (rhsVec) {
289                                            blas::rotm(V(_,p), V(_,q), fastr);
290                                         }
291                                      } else {
292                                         A(_,p) -= t*aqoap*A(_,q);
293                                         A(_,q) += cs*sn*apoaq*A(_,p);
294                                         if (rhsVec) {
295                                            V(_,p) -= t*aqoap*V(_,q);
296                                            V(_,q) += cs*sn*apoaq*V(_,p);
297                                         }
298                                         d(p) *= cs;
299                                         d(q) /= cs;
300                                      }
301                                   } else {
302                                      if (d(q)>=One) {
303                                         A(_,q) += t*apoaq*A(_,p);
304                                         A(_,p) -= cs*sn*aqoap*A(_,q);
305                                         if (rhsVec) {
306                                            V(_,q) += t*apoaq*V(_,p);
307                                            V(_,p) -= cs*sn*aqoap*V(_,q);
308                                         }
309                                         d(p) /= cs;
310                                         d(q) *= cs;
311                                      } else {
312                                         if (d(p)>=d(q)) {
313                                            A(_,p) -= t*aqoap*A(_,q);
314                                            A(_,q) += cs*sn*apoaq*A(_,p);
315                                            d(p) *= cs;
316                                            d(q) /= cs;
317                                            if (rhsVec) {
318                                               V(_,p) -= t*aqoap*V(_,q);
319                                               V(_,q) += cs*sn*apoaq*V(_,p);
320                                            }
321                                         } else {
322                                            A(_,q) += t*apoaq*A(_,p);
323                                            A(_,p) -= cs*sn*aqoap*A(_,q);
324                                            d(p) /= cs;
325                                            d(q) *= cs;
326                                            if (rhsVec) {
327                                               V(_,q) += t*apoaq*V(_,p);
328                                               V(_,p) -= cs*sn*aqoap*V(_,q);
329                                            }
330                                         }
331                                      }
332                                   }
333                                }
334 
335                             } else {
336                                if (aapp>aaqq) {
337                                   _work = A(_,p);
338                                   lascl(LASCL::FullMatrix, 00,
339                                         aapp, One, _work);
340                                   lascl(LASCL::FullMatrix, 00,
341                                         aaqq, One, A(_,q));
342                                   tmp = -aapq*d(p)/d(q);
343                                   A(_,q) += tmp*_work;
344                                   lascl(LASCL::FullMatrix, 00,
345                                         One, aaqq, A(_,q));
346                                   sva(q) = aaqq*sqrt(max(Zero, One-aapq*aapq));
347                                   max_sinj = max(max_sinj, safeMin);
348                                } else {
349                                   _work = A(_,q);
350                                   lascl(LASCL::FullMatrix, 00,
351                                         aaqq, One, _work);
352                                   lascl(LASCL::FullMatrix, 00,
353                                         aapp, One, A(_,p));
354                                   tmp = -aapq*d(q)/d(p);
355                                   A(_,p) += tmp*_work;
356                                   lascl(LASCL::FullMatrix, 00,
357                                         One, aapp, A(_,p));
358                                   sva(p) = aapp*sqrt(max(Zero, One-aapq*aapq));
359                                   max_sinj = max(max_sinj, safeMin);
360                                }
361                             }
362 //        END IF ROTOK THEN ... ELSE
363 //
364 //        In the case of cancellation in updating SVA(q)
365 //        .. recompute SVA(q)
366                             if (pow(sva(q)/aaqq,2)<=rootEps) {
367                                if (aaqq<rootBig && aaqq>rootSafeMin) {
368                                   sva(q) = blas::nrm2(A(_,q))*d(q);
369                                } else {
370                                   t = Zero;
371                                   aaqq = One;
372                                   lassq(A(_,q), t, aaqq);
373                                   sva(q) = t*sqrt(aaqq)*d(q);
374                                }
375                             }
376                             if (pow(aapp/aapp0,2)<=rootEps) {
377                                if (aapp<rootBig && aapp>rootSafeMin) {
378                                   aapp = blas::nrm2(A(_,p))*d(p);
379                                } else {
380                                   t = Zero;
381                                   aapp = One;
382                                   lassq(A(_,p), t, aapp);
383                                   aapp = t*sqrt(aapp)*d(p);
384                                }
385                                sva(p) = aapp;
386                             }
387 //           end of OK rotation
388                          } else {
389                             ++notRot;
390 //        SKIPPED  = SKIPPED  + 1
391                             ++pSkipped;
392                             ++ijblsk;
393                          }
394                       } else {
395                          ++notRot;
396                          ++pSkipped;
397                          ++ijblsk;
398                       }
399 
400 //   IF ( NOTROT .GE. EMPTSW )  GO TO 2011
401                       if (i<=swBand && ijblsk>=blSkip) {
402                          sva(p) = aapp;
403                          notRot = 0;
404                          goto jbcLooExit;
405                       }
406                       if (i<=swBand && pSkipped>rowSkip) {
407                          aapp = -aapp;
408                          notRot = 0;
409                          break;
410                       }
411 
412                    }
413 //     end of the q-loop
414 
415                    sva(p) = aapp;
416 
417                 } else {
418                    if (aapp==Zero) {
419                       notRot += min(jgl+kbl-1,n) - jgl + 1;
420                    }
421                    if (aapp<Zero) {
422                       notRot = 0;
423                    }
424 //**   IF ( NOTROT .GE. EMPTSW )  GO TO 2011
425                 }
426 
427              }
428 //  end of the p-loop
429           }
430 //  end of the jbc-loop
431        jbcLooExit:
432 //2011 bailed out of the jbc-loop
433           for (IndexType p=igl; p<=min(igl+kbl-1,n); ++p) {
434              sva(p) = abs(sva(p));
435           }
436 //**   IF ( NOTROT .GE. EMPTSW ) GO TO 1994
437        }
438 //2000 :: end of the ibr-loop
439 //
440 //  .. update SVA(N)
441        if (sva(n)<rootBig && sva(n)>rootSafeMin) {
442           sva(n) = blas::nrm2(A(_,n))*d(n);
443        } else {
444           t = Zero;
445           aapp = One;
446           lassq(A(_,n), t, aapp);
447           sva(n) = t*sqrt(aapp)*d(n);
448        }
449 //
450 //  Additional steering devices
451 //
452        if (i<swBand && (max_aapq<=rootTol || iswRot<=n)) {
453           swBand = i;
454        }
455 
456        if (i>swBand+1 && max_aapq<n*tol && n*max_aapq*max_sinj<tol) {
457           converged = true;
458           break;
459        }
460 
461        if (notRot>=emptsw) {
462           converged = true;
463           break;
464        }
465 
466     }
467 //  end i=1:NSWEEP loop
468     if (converged) {
469 //#:) Reaching this point means that during the i-th sweep all pivots were
470 //  below the given threshold, causing early exit.
471        info = 0;
472     } else {
473 //#:) Reaching this point means that the procedure has completed the given
474 //  number of sweeps.
475        info = nSweep - 1;
476     }
477 //  Sort the vector D
478 //
479     for (IndexType p=1; p<=n-1; ++p) {
480        const IndexType q = blas::iamax(sva(_(p,n))) + p - 1;
481        if (p!=q) {
482           swap(sva(p), sva(q));
483           swap(d(p), d(q));
484           blas::swap(A(_,p), A(_,q));
485           if (rhsVec) {
486              blas::swap(V(_,p), V(_,q));
487           }
488        }
489     }
490     return info;
491 }
492 
493 
494 //== interface for native lapack ===============================================
495 #ifdef CHECK_CXXLAPACK
496 
497 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
498 typename GeMatrix<MA>::IndexType
499 svj1_native(SVJ::JobV                                  jobV,
500             typename GeMatrix<MA>::IndexType           n1,
501             GeMatrix<MA>                               &A,
502             DenseVector<VD>                            &d,
503             DenseVector<VSVA>                          &sva,
504             GeMatrix<MV>                               &V,
505             const typename GeMatrix<MA>::ElementType   &eps,
506             const typename GeMatrix<MA>::ElementType   &safeMin,
507             const typename GeMatrix<MA>::ElementType   &tol,
508             typename GeMatrix<MA>::IndexType           nSweep,
509             DenseVector<VWORK>                         &work)
510 {
511     typedef typename GeMatrix<MA>::ElementType  T;
512 
513     const char       JOBV = char(jobV);
514     const INTEGER    M = A.numRows();
515     const INTEGER    N = A.numCols();
516     const INTEGER    N1 = n1;
517     const INTEGER    LDA = A.leadingDimension();
518     const INTEGER    _MV = V.numRows();
519     const INTEGER    LDV = V.leadingDimension();
520     const DOUBLE     EPS = eps;
521     const DOUBLE     SFMIN = safeMin;
522     const DOUBLE     TOL = tol;
523     const INTEGER    NSWEEP = nSweep;
524     const INTEGER    LWORK = work.length();
525     INTEGER          INFO;
526 
527     if (IsSame<T,DOUBLE>::value) {
528         LAPACK_DECL(dgsvj1)(&JOBV,
529                             &M,
530                             &N,
531                             &N1,
532                             A.data(),
533                             &LDA,
534                             d.data(),
535                             sva.data(),
536                             &_MV,
537                             V.data(),
538                             &LDV,
539                             &EPS,
540                             &SFMIN,
541                             &TOL,
542                             &NSWEEP,
543                             work.data(),
544                             &LWORK,
545                             &INFO);
546     } else {
547         ASSERT(0);
548     }
549     ASSERT(INFO>=0);
550     return INFO;
551 }
552 
553 #endif // CHECK_CXXLAPACK
554 
555 //== public interface ==========================================================
556 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
557 typename GeMatrix<MA>::IndexType
558 svj1(SVJ::JobV                                  jobV,
559      typename GeMatrix<MA>::IndexType           n1,
560      GeMatrix<MA>                               &A,
561      DenseVector<VD>                            &d,
562      DenseVector<VSVA>                          &sva,
563      GeMatrix<MV>                               &V,
564      const typename GeMatrix<MA>::ElementType   &eps,
565      const typename GeMatrix<MA>::ElementType   &safeMin,
566      const typename GeMatrix<MA>::ElementType   &tol,
567      typename GeMatrix<MA>::IndexType           nSweep,
568      DenseVector<VWORK>                         &work)
569 {
570     using std::max;
571     using std::min;
572 //
573 //  Test the input parameters
574 //
575 #   ifndef NDEBUG
576     typedef typename GeMatrix<MA>::IndexType    IndexType;
577 
578     ASSERT(A.firstRow()==1);
579     ASSERT(A.firstCol()==1);
580 
581     const IndexType m = A.numRows();
582     const IndexType n = A.numCols();
583 
584     ASSERT(m>=n);
585     ASSERT(n>=n1);
586     ASSERT(n1>=0);
587 
588 
589     ASSERT(d.firstIndex()==1);
590     ASSERT(d.length()==n);
591 
592     ASSERT(sva.firstIndex()==1);
593     ASSERT(sva.length()==n);
594 
595     ASSERT(V.firstRow()==1);
596     ASSERT(V.firstCol()==1);
597 
598     if (jobV==SVJ::ComputeV) {
599         ASSERT(V.numCols()==n);
600         ASSERT(V.numRows()==n);
601     }
602     if (jobV==SVJ::ApplyV) {
603         ASSERT(V.numCols()==n);
604     }
605 
606     if (work.length()>0) {
607         ASSERT(work.length()>=m);
608     }
609 #   endif
610 
611 //
612 //  Make copies of output arguments
613 //
614 #   ifdef CHECK_CXXLAPACK
615     typename GeMatrix<MA>::NoView       A_org     = A;
616     typename DenseVector<VD>::NoView    d_org     = d;
617     typename DenseVector<VSVA>::NoView  sva_org   = sva;
618     typename GeMatrix<MV>::NoView       V_org     = V;
619     typename DenseVector<VWORK>::NoView work_org  = work;
620 #   endif
621 
622 //
623 //  Call implementation
624 //
625     IndexType info = svj1_generic(jobV, n1, A, d, sva, V, eps, safeMin, tol,
626                                   nSweep, work);
627 
628 #   ifdef CHECK_CXXLAPACK
629 //
630 //  Make copies of results computed by the generic implementation
631 //
632     typename GeMatrix<MA>::NoView       A_generic     = A;
633     typename DenseVector<VD>::NoView    d_generic     = d;
634     typename DenseVector<VSVA>::NoView  sva_generic   = sva;
635     typename GeMatrix<MV>::NoView       V_generic     = V;
636     typename DenseVector<VWORK>::NoView work_generic  = work;
637 //
638 //  restore output arguments
639 //
640     A    = A_org;
641     d    = d_org;
642     sva  = sva_org;
643     V    = V_org;
644     work = work_org;
645 //
646 //  Compare generic results with results from the native implementation
647 //
648     IndexType _info = svj1_native(jobV, n1, A, d, sva, V, eps, safeMin, tol,
649                                   nSweep, work);
650 
651     bool failed = false;
652     if (! isIdentical(A_generic, A, "A_generic""A")) {
653         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
654         std::cerr << "F77LAPACK: A = " << A << std::endl;
655         failed = true;
656     }
657     if (! isIdentical(d_generic, d, "d_generic""d")) {
658         std::cerr << "CXXLAPACK: d_generic = " << d_generic << std::endl;
659         std::cerr << "F77LAPACK: d = " << d << std::endl;
660         failed = true;
661     }
662     if (! isIdentical(sva_generic, sva, "sva_generic""sva")) {
663         std::cerr << "CXXLAPACK: sva_generic = " << sva_generic << std::endl;
664         std::cerr << "F77LAPACK: sva = " << sva << std::endl;
665         failed = true;
666     }
667     if (! isIdentical(V_generic, V, "V_generic""V")) {
668         std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
669         std::cerr << "F77LAPACK: V = " << V << std::endl;
670         failed = true;
671     }
672     if (! isIdentical(work_generic, work, "work_generic""work")) {
673         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
674         std::cerr << "F77LAPACK: work = " << work << std::endl;
675         failed = true;
676     }
677     if (! isIdentical(info, _info, "info""_info")) {
678         std::cerr << "CXXLAPACK: info = " << info << std::endl;
679         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
680         failed = true;
681     }
682 
683     if (failed) {
684         std::cerr << "error in: svj1.tcc" << std::endl;
685         ASSERT(0);
686     } else {
687         std::cerr << "passed: svj1.tcc" << std::endl;
688     }
689 #   endif
690 
691     return info;
692 }
693 
694 //-- forwarding ----------------------------------------------------------------
695 template <typename MA, typename VD, typename VSVA, typename MV, typename VWORK>
696 typename MA::IndexType
697 svj1(SVJ::JobV                                  jobV,
698      typename MA::IndexType                     n1,
699      MA                                         &&A,
700      VD                                         &&d,
701      VSVA                                       &&sva,
702      MV                                         &&V,
703      const typename MA::ElementType             &eps,
704      const typename MA::ElementType             &safeMin,
705      const typename MA::ElementType             &tol,
706      typename MA::IndexType                     nSweep,
707      VWORK                                      &&work)
708 {
709     typename MA::IndexType   info;
710 
711     CHECKPOINT_ENTER;
712     svj1(jobV, n1, A, d, sva, V, eps, safeMin, tol, nSweep, work);
713     CHECKPOINT_LEAVE;
714 }
715 
716 } } // namespace lapack, flens
717 
718 #endif // FLENS_LAPACK_SVD_SVJ1_TCC