1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.2.2) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *     June 2010
 41  */
 42 
 43 #ifndef FLENS_LAPACK_EIG_BAL_TCC
 44 #define FLENS_LAPACK_EIG_BAL_TCC 1
 45 
 46 #include <flens/aux/aux.h>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename MA, typename IndexType, typename VSCALE>
 55 void
 56 bal_generic(BALANCE::Balance    job,
 57             GeMatrix<MA>        &A,
 58             IndexType           &iLo,
 59             IndexType           &iHi,
 60             DenseVector<VSCALE> &scale)
 61 {
 62     using namespace BALANCE;
 63 
 64     using std::abs;
 65     using std::isnan;
 66     using flens::max;
 67     using flens::min;
 68 
 69     typedef typename GeMatrix<MA>::ElementType  T;
 70 
 71     const T Zero(0), One(1);
 72     const T scaleFactor(2), factor(0.95);
 73 
 74     const T safeMin1 = lamch<T>(SafeMin) / lamch<T>(Precision);
 75     const T safeMax1 = One / safeMin1;
 76     const T safeMin2 = safeMin1*scaleFactor;
 77     const T safeMax2 = One / safeMin2;
 78 
 79 
 80     const Underscore<IndexType> _;
 81     const IndexType n = A.numRows();
 82 
 83     IndexType k = 1;
 84     IndexType l = n;
 85 
 86     IndexType j = l, m;
 87 
 88     if (n==0) {
 89         goto DONE;
 90     }
 91 
 92     if (job==None) {
 93         for (IndexType i=1; i<=n; ++i) {
 94             scale(i) = One;
 95         }
 96         goto DONE;
 97     }
 98 
 99     if (job!=ScaleOnly) {
100 //
101 //      Permutation to isolate eigenvalues if possible
102 //
103         IndexType iExc = 0;
104 //
105 //      Row and column exchange.
106 //
107         EXCHANGE:
108             if (iExc!=0) {
109                 scale(m) = j;
110                 if (j!=m) {
111                     blas::swap(A(_(1,l),j), A(_(1,l),m));
112                     blas::swap(A(j,_(k,n)), A(m,_(k,n)));
113                 }
114             }
115 
116         switch (iExc) {
117 //
118 //      Search for rows isolating an eigenvalue and push them down.
119 //
120         case 1:
121             if (l==1) {
122                 goto DONE;
123             }
124             --l;
125 
126         case 0:
127             for (j=l; j>=1; --j) {
128                 bool foundRow = true;
129 
130                 for (IndexType i=1; i<=l; ++i) {
131                     if (i==j) {
132                         continue;
133                     }
134                     if (A(j,i)!=Zero) {
135                         foundRow = false;
136                         break;
137                     }
138                 }
139 
140                 if (foundRow) {
141                     m = l;
142                     iExc = 1;
143                     goto EXCHANGE;
144                 }
145             }
146 //
147 //      Search for columns isolating an eigenvalue and push them left.
148 //
149         case 2:
150             if ((iExc!=0) && (iExc!=1)) {
151                 ++k;
152             }
153 
154             for (j=k; j<=l; ++j) {
155                 bool foundCol = true;
156 
157                 for (IndexType i=k; i<=l; ++i) {
158                     if (i==j) {
159                         continue;
160                     }
161                     if (A(i,j)!=Zero) {
162                         foundCol = false;
163                         break;
164                     }
165                 }
166 
167                 if (foundCol) {
168                     m = k;
169                     iExc = 2;
170                     goto EXCHANGE;
171                 }
172             }
173         }
174     }
175 
176     for (IndexType i=k; i<=l; ++i) {
177         scale(i) = One;
178     }
179 
180     if (job==PermuteOnly) {
181         goto DONE;
182     }
183 //
184 //  Balance the submatrix in rows K to L.
185 //
186 //  Iterative loop for norm reduction
187 //
188     bool noConv;
189     do {
190         noConv = false;
191 
192         for (IndexType i=k; i<=l; ++i) {
193             T c = Zero;
194             T r = Zero;
195 
196             for (IndexType j=k; j<=l; ++j) {
197                 if (j==i) {
198                     continue;
199                 }
200                 c += abs(A(j,i));
201                 r += abs(A(i,j));
202             }
203             const IndexType ica = blas::iamax(A(_(1,l),i));
204             T ca = abs(A(ica,i));
205             const IndexType ira = blas::iamax(A(i,_(k,n)))+k-1;
206             T ra = abs(A(i,ira));
207 //
208 //          Guard against zero C or R due to underflow.
209 //
210             if (c==Zero || r==Zero) {
211                 continue;
212             }
213             T g = r / scaleFactor;
214             T f = One;
215             T s = c + r;
216 
217             while (c<g && max(f,c,ca)<safeMax2 && min(r,g,ra)>safeMin2) {
218                 if (isnan(c+f+ca+r+g+ra)) {
219 //
220 //                  Exit if NaN to avoid infinite loop
221 //
222                     ASSERT(0);
223                     return;
224                 }
225                 f *= scaleFactor;
226                 c *= scaleFactor;
227                 ca *= scaleFactor;
228                 r /= scaleFactor;
229                 g /= scaleFactor;
230                 ra /= scaleFactor;
231             }
232 
233             g = c / scaleFactor;
234             while (g>=r && max(r,ra)<safeMax2 && min(f,c,g,ca)>safeMin2) {
235                 f /= scaleFactor;
236                 c /= scaleFactor;
237                 g /= scaleFactor;
238                 ca /= scaleFactor;
239                 r *= scaleFactor;
240                 ra *= scaleFactor;
241             }
242 //
243 //          Now balance.
244 //
245             if ((c+r)>=factor*s) {
246                 continue;
247             }
248             if (f<One && scale(i)<One) {
249                 if (f*scale(i)<=safeMin1) {
250                     continue;
251                 }
252             }
253             if (f>One && scale(i)>One) {
254                 if (scale(i)>=safeMax1/f) {
255                     continue;
256                 }
257             }
258             g = One / f;
259             scale(i) *= f;
260             noConv = true;
261 
262             A(i,_(k,n)) *= g;
263             A(_(1,l),i) *= f;
264 
265         }
266 
267     } while (noConv);
268 
269     DONE:
270         iLo = k;
271         iHi = l;
272 }
273 
274 //== interface for native lapack ===============================================
275 
276 #ifdef CHECK_CXXLAPACK
277 
278 template <typename MA, typename IndexType, typename VSCALE>
279 void
280 bal_native(BALANCE::Balance    job,
281            GeMatrix<MA>        &A,
282            IndexType           &iLo,
283            IndexType           &iHi,
284            DenseVector<VSCALE> &scale)
285 {
286     typedef typename GeMatrix<MA>::ElementType  T;
287 
288     const char       JOB    = getF77LapackChar<BALANCE::Balance>(job);
289     const INTEGER    N      = A.numRows();
290     const INTEGER    LDA    = A.leadingDimension();
291     INTEGER          _ILO   = iLo;
292     INTEGER          _IHI   = iHi;
293     INTEGER          INFO;
294 
295     if (IsSame<T, DOUBLE>::value) {
296         LAPACK_IMPL(dgebal)(&JOB,
297                             &N,
298                             A.data(),
299                             &LDA,
300                             &_ILO,
301                             &_IHI,
302                             scale.data(),
303                             &INFO);
304     } else {
305         ASSERT(0);
306     }
307 
308     iLo     = _ILO;
309     iHi     = _IHI;
310 
311     ASSERT(INFO==0);
312 }
313 
314 #endif // CHECK_CXXLAPACK
315 
316 //== public interface ==========================================================
317 
318 template <typename MA, typename IndexType, typename VSCALE>
319 void
320 bal(BALANCE::Balance    job,
321     GeMatrix<MA>        &A,
322     IndexType           &iLo,
323     IndexType           &iHi,
324     DenseVector<VSCALE> &scale)
325 {
326     LAPACK_DEBUG_OUT("bal");
327 
328 #   ifndef NDEBUG
329 //
330 //  Test the input parameters
331 //
332     ASSERT(A.firstRow()==1);
333     ASSERT(A.firstCol()==1);
334     ASSERT(A.numRows()==A.numCols());
335 #   endif
336 
337 #   ifdef CHECK_CXXLAPACK
338 //
339 //  Make copies of output arguments
340 //
341     typename GeMatrix<MA>::NoView           _A      = A;
342     IndexType                               _iLo    = iLo;
343     IndexType                               _iHi    = iHi;
344     typename DenseVector<VSCALE>::NoView    _scale  = scale;
345 #   endif
346 
347 //
348 //  Call implementation
349 //
350     bal_generic(job, A, iLo, iHi, scale);
351 
352 #   ifdef CHECK_CXXLAPACK
353 //
354 //  Compare results
355 //
356     bal_native(job, _A, _iLo, _iHi, _scale);
357 
358     bool failed = false;
359     if (! isIdentical(A, _A, " A""_A")) {
360         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
361         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
362         failed = true;
363     }
364 
365     if (! isIdentical(iLo, _iLo, " iLo""_iLo")) {
366         failed = true;
367     }
368 
369     if (! isIdentical(iHi, _iHi, " iHi""_iHi")) {
370         failed = true;
371     }
372 
373     if (! isIdentical(scale, _scale, " scale""_scale")) {
374         std::cerr << "CXXLAPACK:  scale = " << scale << std::endl;
375         std::cerr << "F77LAPACK: _scale = " << _scale << std::endl;
376         failed = true;
377     }
378 
379     if (failed) {
380         ASSERT(0);
381     }
382 #   endif
383 }
384 
385 //-- forwarding ----------------------------------------------------------------
386 template <typename MA, typename IndexType, typename VSCALE>
387 void
388 bal(BALANCE::Balance    job,
389     MA                  &&A,
390     IndexType           &&iLo,
391     IndexType           &&iHi,
392     VSCALE              &&scale)
393 {
394     CHECKPOINT_ENTER;
395     bal(job, A, iLo, iHi, scale);
396     CHECKPOINT_LEAVE;
397 }
398 
399 
400 } } // namespace lapack, flens
401 
402 #endif // FLENS_LAPACK_EIG_BAL_TCC