1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
 36       $                   EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
 37       $                   WORK, IWORK, INFO )
 38  *
 39  *  -- LAPACK driver routine (version 3.2) --
 40  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 41  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 42  *     November 2006
 43  */
 44 
 45 #ifndef FLENS_LAPACK_GESV_SVX_TCC
 46 #define FLENS_LAPACK_GESV_SVX_TCC 1
 47 
 48 #include <flens/blas/blas.h>
 49 #include <flens/lapack/lapack.h>
 50 
 51 namespace flens { namespace lapack {
 52 
 53 //== generic lapack implementation =============================================
 54 template <typename MA, typename MAF, typename VPIV, typename VR, typename VC,
 55           typename MB, typename MX, typename RCOND, typename FERR,
 56           typename BERR, typename VWORK, typename VIWORK>
 57 typename GeMatrix<MA>::IndexType
 58 svx_generic(SVX::Fact           fact,
 59             Transpose           trans,
 60             GeMatrix<MA>        &A,
 61             GeMatrix<MAF>       &AF,
 62             DenseVector<VPIV>   &piv,
 63             SVX::Equilibration  equed,
 64             DenseVector<VR>     &r,
 65             DenseVector<VC>     &c,
 66             GeMatrix<MB>        &B,
 67             GeMatrix<MX>        &X,
 68             RCOND               &rCond,
 69             DenseVector<FERR>   &fErr,
 70             DenseVector<BERR>   &bErr,
 71             DenseVector<VWORK>  &work,
 72             DenseVector<VIWORK> &iwork)
 73 {
 74     using std::max;
 75     using std::min;
 76     using namespace SVX;
 77 
 78     typedef typename GeMatrix<MA>::ElementType  ElementType;
 79     typedef typename GeMatrix<MA>::IndexType    IndexType;
 80 
 81     const ElementType Zero(0), One(1);
 82 
 83     const Underscore<IndexType> _;
 84 
 85     const IndexType n    = A.numRows();
 86     const IndexType nRhs = B.numCols();
 87 
 88     IndexType info = 0;
 89 
 90     const ElementType smallNum = lamch<ElementType>(SafeMin);
 91     const ElementType bigNum = One / smallNum;
 92 
 93     bool rowEqu, colEqu;
 94     ElementType rPivGrowth;
 95 
 96     if (fact==NotFactored || fact==Equilibrate) {
 97         equed = None;
 98         rowEqu = false;
 99         colEqu = false;
100     } else {
101         rowEqu = (equed==Row || equed==Both);
102         colEqu = (equed==Column || equed==Both);
103     }
104 //
105 //  Test the input parameters.
106 //
107     ElementType rowCond, colCond;
108 
109     if (rowEqu) {
110         ElementType  rcMin = bigNum;
111         ElementType  rcMax = Zero;
112         for (IndexType j=1; j<=n; ++j) {
113             rcMin = min(rcMin, r(j));
114             rcMax = max(rcMax, r(j));
115         }
116         ASSERT(rcMin>Zero);
117         if (n>0) {
118             rowCond = max(rcMin,smallNum) / min(rcMax,bigNum);
119         } else {
120             rowCond = One;
121         }
122     }
123     if (colEqu) {
124         ElementType  rcMin = bigNum;
125         ElementType  rcMax = Zero;
126         for (IndexType j=1; j<=n; ++j) {
127             rcMin = min(rcMin, c(j));
128             rcMax = max(rcMax, c(j));
129         }
130         ASSERT(rcMin>Zero);
131         if (n>0) {
132             colCond = max(rcMin,smallNum) / min(rcMax,bigNum);
133         } else {
134             colCond = One;
135         }
136     }
137 
138     if (fact==Equilibrate) {
139         ElementType amax;
140 //
141 //      Compute row and column scalings to equilibrate the matrix A.
142 //
143         if (equ(A, r, c, rowCond, colCond, amax)==0) {
144 //
145 //          Equilibrate the matrix.
146 //
147             equed = Equilibration(laq(A, r, c, rowCond, colCond, amax));
148             rowEqu = (equed==Row || equed==Both);
149             colEqu = (equed==Column || equed==Both);
150         }
151     }
152 //
153 //  Scale the right hand side.
154 //
155     if (trans==NoTrans) {
156         if (rowEqu) {
157             for (IndexType j=1; j<=nRhs; ++j) {
158                 for (IndexType i=1; i<=n; ++i) {
159                     B(i,j) *= r(i);
160                 }
161             }
162         }
163     } else if (colEqu) {
164         for (IndexType j=1; j<=nRhs; ++j) {
165             for (IndexType i=1; i<=n; ++i) {
166                 B(i,j) *= c(i);
167             }
168         }
169     }
170 
171     if ((fact==NotFactored) || (fact==Equilibrate)) {
172 //
173 //      Compute the LU factorization of A.
174 //
175         AF = A;
176         info = trf(AF, piv);
177 //
178 //      Return if INFO is non-zero.
179 //
180         if (info>0) {
181 //
182 //          Compute the reciprocal pivot growth factor of the
183 //          leading rank-deficient INFO columns of A.
184 //
185             const auto range = _(1,info);
186             rPivGrowth = lan(MaximumNorm, AF(range,range).upper());
187             if (rPivGrowth==Zero) {
188                 rPivGrowth = One;
189             } else {
190                 rPivGrowth = lan(MaximumNorm, A) / rPivGrowth;
191             }
192             work(1) = rPivGrowth;
193             rCond = Zero;
194             return info;
195         }
196     }
197 //
198 //  Compute the norm of the matrix A and the
199 //  reciprocal pivot growth factor RPVGRW.
200 //
201     const Norm norm = (trans==NoTrans) ? OneNorm : InfinityNorm;
202     const ElementType normA = lan(norm, A, work);
203     rPivGrowth = lan(MaximumNorm, AF.upper());
204     if (rPivGrowth==Zero) {
205         rPivGrowth = One;
206     } else {
207         rPivGrowth = lan(MaximumNorm, A) / rPivGrowth;
208     }
209 //
210 //  Compute the reciprocal of the condition number of A.
211 //
212     con(norm, AF, normA, rCond, work, iwork);
213 //
214 //  Compute the solution matrix X.
215 //
216     X = B;
217     trs(trans, AF, piv, X);
218 //
219 //  Use iterative refinement to improve the computed solution and
220 //  compute error bounds and backward error estimates for it.
221 //
222     rfs(trans, A, AF, piv, B, X, fErr, bErr, work(_(1,3*n)), iwork);
223 //
224 //  Transform the solution matrix X to a solution of the original
225 //  system.
226 //
227     if (trans==NoTrans) {
228         if (colEqu) {
229             for (IndexType j=1; j<=nRhs; ++j) {
230                 for (IndexType i=1; i<=n; ++i) {
231                     X(i,j) *= c(i);
232                 }
233             }
234             for (IndexType j=1; j<=nRhs; ++j) {
235                 fErr(j) /= colCond;
236             }
237         }
238     } else if (rowEqu) {
239         for (IndexType j=1; j<=nRhs; ++j) {
240             for (IndexType i=1; i<=n; ++i) {
241                 X(i,j) *= r(i);
242             }
243         }
244         for (IndexType j=1; j<=nRhs; ++j) {
245             fErr(j) /= rowCond;
246         }
247     }
248 
249     work(1) = rPivGrowth;
250 //
251 //  Set INFO = N+1 if the matrix is singular to working precision.
252 //
253     const ElementType eps = lamch<ElementType>(Eps);
254     if (rCond<eps) {
255         info = n+1;
256     }
257     return info;
258 }
259 
260 //== interface for native lapack ===============================================
261 
262 #ifdef CHECK_CXXLAPACK
263 
264 template <typename MA, typename MAF, typename VPIV, typename VR, typename VC,
265           typename MB, typename MX, typename RCOND, typename FERR,
266           typename BERR, typename VWORK, typename VIWORK>
267 typename GeMatrix<MA>::IndexType
268 svx_native(SVX::Fact           fact,
269            Transpose           trans,
270            GeMatrix<MA>        &A,
271            GeMatrix<MAF>       &AF,
272            DenseVector<VPIV>   &piv,
273            SVX::Equilibration  equed,
274            DenseVector<VR>     &r,
275            DenseVector<VC>     &c,
276            GeMatrix<MB>        &B,
277            GeMatrix<MX>        &X,
278            RCOND               &rCond,
279            DenseVector<FERR>   &fErr,
280            DenseVector<BERR>   &bErr,
281            DenseVector<VWORK>  &work,
282            DenseVector<VIWORK> &iwork)
283 {
284     typedef typename GeMatrix<MA>::ElementType  T;
285 
286     const char       FACT   = char(fact);
287     const char       TRANS  = getF77LapackChar(trans);
288     const INTEGER    N      = A.numRows();
289     const INTEGER    NRHS   = B.numCols();
290     const INTEGER    LDA    = A.leadingDimension();
291     const INTEGER    LDAF   = AF.leadingDimension();
292     char             EQUED  = char(equed);
293     const INTEGER    LDB    = B.leadingDimension();
294     const INTEGER    LDX    = X.leadingDimension();
295     INTEGER          INFO;
296 
297     if (IsSame<T,double>::value) {
298         LAPACK_IMPL(dgesvx)(&FACT,
299                             &TRANS,
300                             &N,
301                             &NRHS,
302                             A.data(),
303                             &LDA,
304                             AF.data(),
305                             &LDAF,
306                             piv.data(),
307                             &EQUED,
308                             r.data(),
309                             c.data(),
310                             B.data(),
311                             &LDB,
312                             X.data(),
313                             &LDX,
314                             &rCond,
315                             fErr.data(),
316                             bErr.data(),
317                             work.data(),
318                             iwork.data(),
319                             &INFO);
320     } else {
321         ASSERT(0);
322     }
323     ASSERT(INFO>=0);
324 
325     return INFO;
326 }
327 
328 #endif // CHECK_CXXLAPACK
329 
330 //== public interface ==========================================================
331 template <typename MA, typename MAF, typename VPIV, typename VR, typename VC,
332           typename MB, typename MX, typename RCOND, typename FERR,
333           typename BERR, typename VWORK, typename VIWORK>
334 typename GeMatrix<MA>::IndexType
335 svx(SVX::Fact           fact,
336     Transpose           trans,
337     GeMatrix<MA>        &A,
338     GeMatrix<MAF>       &AF,
339     DenseVector<VPIV>   &piv,
340     SVX::Equilibration  equed,
341     DenseVector<VR>     &r,
342     DenseVector<VC>     &c,
343     GeMatrix<MB>        &B,
344     GeMatrix<MX>        &X,
345     RCOND               &rCond,
346     DenseVector<FERR>   &fErr,
347     DenseVector<BERR>   &bErr,
348     DenseVector<VWORK>  &work,
349     DenseVector<VIWORK> &iwork)
350 {
351     typedef typename GeMatrix<MA>::IndexType    IndexType;
352 //
353 //  Test the input parameters
354 //
355 #   ifndef NDEBUG
356 #   endif
357 
358 #   ifdef CHECK_CXXLAPACK
359 //
360 //  Make copies of output arguments
361 //
362     typename GeMatrix<MA>::NoView        A_org     = A;
363     typename GeMatrix<MAF>::NoView       AF_org    = AF;
364     typename DenseVector<VPIV>::NoView   piv_org   = piv;
365     SVX::Equilibration                   equed_org = equed;
366     typename DenseVector<VR>::NoView     r_org     = r;
367     typename DenseVector<VC>::NoView     c_org     = c;
368     typename GeMatrix<MB>::NoView        B_org     = B;
369     typename GeMatrix<MX>::NoView        X_org     = X;
370     RCOND                                rCond_org = rCond;
371     typename DenseVector<FERR>::NoView   fErr_org  = fErr;
372     typename DenseVector<BERR>::NoView   bErr_org  = bErr;
373     typename DenseVector<VWORK>::NoView  work_org  = work;
374     typename DenseVector<VIWORK>::NoView iwork_org = iwork;
375 #   endif
376 
377 //
378 //  Call implementation
379 //
380     IndexType info = svx_generic(fact, trans, A, AF, piv, equed,
381                                  r, c, B, X, rCond, fErr, bErr,
382                                  work, iwork);
383 
384 #   ifdef CHECK_CXXLAPACK
385 //
386 //  Compare results
387 //
388     typename GeMatrix<MA>::NoView        A_generic     = A;
389     typename GeMatrix<MAF>::NoView       AF_generic    = AF;
390     typename DenseVector<VPIV>::NoView   piv_generic   = piv;
391     SVX::Equilibration                   equed_generic = equed;
392     typename DenseVector<VR>::NoView     r_generic     = r;
393     typename DenseVector<VC>::NoView     c_generic     = c;
394     typename GeMatrix<MB>::NoView        B_generic     = B;
395     typename GeMatrix<MX>::NoView        X_generic     = X;
396     RCOND                                rCond_generic = rCond;
397     typename DenseVector<FERR>::NoView   fErr_generic  = fErr;
398     typename DenseVector<BERR>::NoView   bErr_generic  = bErr;
399     typename DenseVector<VWORK>::NoView  work_generic  = work;
400     typename DenseVector<VIWORK>::NoView iwork_generic = iwork;
401 
402     A       = A_org;
403     AF      = AF_org;
404     piv     = piv_org;
405     equed   = equed_org;
406     r       = r_org;
407     c       = c_org;
408     B       = B_org;
409     X       = X_org;
410     rCond   = rCond_org;
411     fErr    = fErr_org;
412     bErr    = bErr_org;
413     work    = work_org;
414     iwork   = iwork_org;
415 
416     IndexType _info = svx_native(fact, trans, A, AF, piv, equed,
417                                  r, c, B, X, rCond, fErr, bErr,
418                                  work, iwork);
419 
420     bool failed = false;
421     if (! isIdentical(A_generic, A, "A_generic""A")) {
422         std::cerr << "A_org = " << A_org << std::endl;
423         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
424         std::cerr << "F77LAPACK: A = " << A << std::endl;
425         failed = true;
426     }
427 
428     if (! isIdentical(AF_generic, AF, "AF_generic""AF")) {
429         std::cerr << "AF_org = " << AF_org << std::endl;
430         std::cerr << "CXXLAPACK: AF_generic = " << AF_generic << std::endl;
431         std::cerr << "F77LAPACK: AF = " << AF << std::endl;
432         failed = true;
433     }
434 
435     if (! isIdentical(piv_generic, piv, "piv_generic""piv")) {
436         std::cerr << "piv_org = " << piv_org << std::endl;
437         std::cerr << "CXXLAPACK: piv_generic = " << piv_generic << std::endl;
438         std::cerr << "F77LAPACK: piv = " << piv << std::endl;
439         failed = true;
440     }
441 
442     if (equed_generic!=equed) {
443         std::cerr << "equed_org = " << equed_org << std::endl;
444         std::cerr << "CXXLAPACK: equed_generic = "
445                   << equed_generic << std::endl;
446         std::cerr << "F77LAPACK: equed = " << equed << std::endl;
447         failed = true;
448     }
449 
450     if (! isIdentical(r_generic, r, "r_generic""r")) {
451         std::cerr << "r_org = " << r_org << std::endl;
452         std::cerr << "CXXLAPACK: r_generic = " << r_generic << std::endl;
453         std::cerr << "F77LAPACK: r = " << r << std::endl;
454         failed = true;
455     }
456 
457     if (! isIdentical(c_generic, c, "c_generic""c")) {
458         std::cerr << "c_org = " << c_org << std::endl;
459         std::cerr << "CXXLAPACK: c_generic = " << c_generic << std::endl;
460         std::cerr << "F77LAPACK: c = " << piv << std::endl;
461         failed = true;
462     }
463 
464     if (! isIdentical(B_generic, B, "B_generic""B")) {
465         std::cerr << "B_org = " << B_org << std::endl;
466         std::cerr << "CXXLAPACK: B_generic = " << B_generic << std::endl;
467         std::cerr << "F77LAPACK: B = " << B << std::endl;
468         failed = true;
469     }
470 
471     if (! isIdentical(X_generic, X, "X_generic""X")) {
472         std::cerr << "X_org = " << X_org << std::endl;
473         std::cerr << "CXXLAPACK: X_generic = " << X_generic << std::endl;
474         std::cerr << "F77LAPACK: X = " << X << std::endl;
475         failed = true;
476     }
477 
478     if (! isIdentical(rCond_generic, rCond, "rCond_generic""rCond")) {
479         std::cerr << "rCond_org = " << rCond_org << std::endl;
480         std::cerr << "CXXLAPACK: rCond_generic = "
481                   << rCond_generic << std::endl;
482         std::cerr << "F77LAPACK: rCond = " << rCond << std::endl;
483         failed = true;
484     }
485 
486     if (! isIdentical(fErr_generic, fErr, "fErr_generic""fErr")) {
487         std::cerr << "fErr_org = " << fErr_org << std::endl;
488         std::cerr << "CXXLAPACK: fErr_generic = " << fErr_generic << std::endl;
489         std::cerr << "F77LAPACK: fErr = " << fErr << std::endl;
490         failed = true;
491     }
492 
493     if (! isIdentical(bErr_generic, bErr, "bErr_generic""bErr")) {
494         std::cerr << "bErr_org = " << bErr_org << std::endl;
495         std::cerr << "CXXLAPACK: bErr_generic = " << bErr_generic << std::endl;
496         std::cerr << "F77LAPACK: bErr = " << bErr << std::endl;
497         failed = true;
498     }
499 
500     if (! isIdentical(work_generic, work, "work_generic""work")) {
501         std::cerr << "work_org = " << work_org << std::endl;
502         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
503         std::cerr << "F77LAPACK: work = " << work << std::endl;
504         failed = true;
505     }
506 
507     if (! isIdentical(iwork_generic, iwork, "iwork_generic""iwork")) {
508         std::cerr << "iwork_org = " << iwork_org << std::endl;
509         std::cerr << "CXXLAPACK: iwork_generic = "
510                   << iwork_generic << std::endl;
511         std::cerr << "F77LAPACK: iwork = " << iwork << std::endl;
512         failed = true;
513     }
514 
515     if (! isIdentical(info, _info, " info""_info")) {
516         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
517         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
518         failed = true;
519     }
520 
521     if (failed) {
522         ASSERT(0);
523     }
524 
525 #   endif
526 
527     return info;
528 }
529 
530 //-- forwarding ----------------------------------------------------------------
531 
532 } } // namespace lapack, flens
533 
534 #endif // FLENS_LAPACK_GESV_SVX_TCC