1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK )
 36  *
 37  *  -- LAPACK auxiliary routine (version 3.2) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *     November 2006
 41  */
 42 
 43 #ifndef FLENS_LAPACK_AUX_LAN_TCC
 44 #define FLENS_LAPACK_AUX_LAN_TCC 1
 45 
 46 #include <flens/blas/blas.h>
 47 #include <flens/lapack/lapack.h>
 48 
 49 namespace flens { namespace lapack {
 50 
 51 //== generic lapack implementation =============================================
 52 
 53 template <typename MA, typename VWORK>
 54 typename GeMatrix<MA>::ElementType
 55 lan_generic(Norm norm, const GeMatrix<MA> &A, DenseVector<VWORK> &work)
 56 {
 57     using std::abs;
 58     using std::max;
 59     using std::min;
 60     using std::sqrt;
 61 
 62     typedef typename GeMatrix<MA>::ElementType  T;
 63     typedef typename GeMatrix<MA>::IndexType    IndexType;
 64 
 65     const Underscore<IndexType> _;
 66     const IndexType m = A.numRows();
 67     const IndexType n = A.numCols();
 68 
 69     const T  Zero(0), One(1);
 70 
 71     if (min(m,n)==T(0)) {
 72         return Zero;
 73     } else if (norm==MaximumNorm) {
 74 //
 75 //      Find max(abs(A(i,j))).
 76 //
 77         T value = Zero;
 78 
 79         for (IndexType j=1; j<=n; ++j) {
 80             for (IndexType i=1; i<=m; ++i) {
 81                 value = max(value, abs(A(i,j)));
 82             }
 83         }
 84         return value;
 85     } else if (norm==OneNorm) {
 86 //
 87 //      Find norm1(A).
 88 //
 89         T value = Zero;
 90 
 91         for (IndexType j=1; j<=n; ++j) {
 92             T sum = Zero;
 93             for (IndexType i=1; i<=m; ++i) {
 94                 sum += abs(A(i,j));
 95             }
 96             value = max(value, sum);
 97         }
 98         return value;
 99     } else if (norm==InfinityNorm) {
100 //
101 //      Find normI(A).
102 //
103         for (IndexType i=1; i<=m; ++i) {
104             work(i) = Zero;
105         }
106         for (IndexType j=1; j<=n; ++j) {
107             for (IndexType i=1; i<=m; ++i) {
108                 work(i) += abs(A(i,j));
109             }
110         }
111         T value = Zero;
112         for (IndexType i=1; i<=m; ++i) {
113             value = max(value, work(i));
114         }
115         return value;
116     } else if (norm==FrobeniusNorm) {
117 //
118 //      Find normF(A).
119 //
120         T scale = Zero;
121         T sum = One;
122         for (IndexType j=1; j<=n; ++j) {
123             lassq(A(_,j), scale, sum);
124         }
125         return scale*sqrt(sum);
126     }
127     ASSERT(0);
128     return Zero;
129 }
130 
131 template <typename MA, typename VWORK>
132 typename TrMatrix<MA>::ElementType
133 lan_generic(Norm norm, const TrMatrix<MA> &A, DenseVector<VWORK> &work)
134 {
135     using std::abs;
136     using std::max;
137     using std::min;
138     using std::sqrt;
139 
140     typedef typename TrMatrix<MA>::ElementType  T;
141     typedef typename TrMatrix<MA>::IndexType    IndexType;
142 
143     const Underscore<IndexType> _;
144     const IndexType m = A.numRows();
145     const IndexType n = A.numCols();
146 
147     const T  Zero(0), One(1);
148 
149     T value = Zero;
150 
151     if (min(m,n)==0) {
152         return value;
153     } else if (norm==MaximumNorm) {
154 //
155 //      Find max(abs(A(i,j))).
156 //
157         if (A.diag()==Unit) {
158             value = One;
159             if (A.upLo()==Upper) {
160                 for (IndexType j=1; j<=n; ++j) {
161                     for (IndexType i=1; i<=min(m,j-1); ++i) {
162                         value = max(value, abs(A(i,j)));
163                     }
164                 }
165             } else {
166                 for (IndexType j=1; j<=n; ++j) {
167                     for (IndexType i=j+1; i<=m; ++i) {
168                         value = max(value, abs(A(i,j)));
169                     }
170                 }
171             }
172         } else {
173             value = Zero;
174             if (A.upLo()==Upper) {
175                 for (IndexType j=1; j<=n; ++j) {
176                     for (IndexType i=1; i<=min(m,j); ++i) {
177                         value = max(value, abs(A(i,j)));
178                     }
179                 }
180             } else {
181                 for (IndexType j=1; j<=n; ++j) {
182                     for (IndexType i=j; i<=m; ++i) {
183                         value = max(value, abs(A(i,j)));
184                     }
185                 }
186             }
187         }
188     } else if (norm==OneNorm) {
189 //
190 //      Find norm1(A).
191 //
192         value = Zero;
193         const bool unitDiag = (A.diag()==Unit);
194         if (A.upLo()==Upper) {
195             for (IndexType j=1; j<=n; ++j) {
196                 T sum;
197                 if (unitDiag && (j<=m)) {
198                     sum = One;
199                     for (IndexType i=1; i<=j-1; ++i) {
200                         sum += abs(A(i,j));
201                     }
202                 } else {
203                     sum = Zero;
204                     for (IndexType i=1; i<=min(m,j); ++i) {
205                         sum += abs(A(i,j));
206                     }
207                 }
208                 value = max(value, sum);
209             }
210         } else {
211             for (IndexType j=1; j<=n; ++j) {
212                 T sum;
213                 if (unitDiag) {
214                     sum = One;
215                     for (IndexType i=j+1; i<=m; ++i) {
216                         sum += abs(A(i,j));
217                     }
218                 } else {
219                     sum = Zero;
220                     for (IndexType i=j; i<=m; ++i) {
221                         sum += abs(A(i,j));
222                     }
223                 }
224                 value = max(value, sum);
225             }
226         }
227     } else if (norm==InfinityNorm) {
228 //
229 //      Find normI(A).
230 //
231         if (A.upLo()==Upper) {
232             if (A.diag()==Unit) {
233                 for (IndexType i=1; i<=m; ++i) {
234                     work(i) = One;
235                 }
236                 for (IndexType j=1; j<=n; ++j) {
237                     for (IndexType i=1; i<=min(m,j-1); ++i) {
238                         work(i) += abs(A(i,j));
239                     }
240                 }
241             } else {
242                 for (IndexType i=1; i<=m; ++i) {
243                     work(i) = Zero;
244                 }
245                 for (IndexType j=1; j<=n; ++j) {
246                     for (IndexType i=1; i<=min(m,j); ++j) {
247                         work(i) += abs(A(i,j));
248                     }
249                 }
250             }
251         } else {
252             if (A.diag()==Unit) {
253                 for (IndexType i=1; i<=n; ++i) {
254                     work(i) = One;
255                 }
256                 for (IndexType i=n+1; i<=m; ++i) {
257                     work(i) = Zero;
258                 }
259                 for (IndexType j=1; j<=n; ++j) {
260                     for (IndexType i=j+1; i<=m; ++i) {
261                         work(i) += abs(A(i,j));
262                     }
263                 }
264             } else {
265                 for (IndexType i=1; i<=m; ++i) {
266                     work(i) = Zero;
267                 }
268                 for (IndexType j=1; j<=n; ++j) {
269                     for (IndexType i=j; i<=m; ++i) {
270                         work(i) += abs(A(i,j));
271                     }
272                 }
273             }
274         }
275         value = Zero;
276         for (IndexType i=1; i<=m; ++i) {
277             value = max(value, work(i));
278         }
279     } else if (norm==FrobeniusNorm) {
280 //
281 //      Find normF(A).
282 //
283         T  scale, sum;
284         if (A.upLo()==Upper) {
285             if (A.diag()==Unit) {
286                 scale = One;
287                 sum = min(m, n);
288                 for (IndexType j=2; j<=n; ++j) {
289                     const auto rows = _(1,min(m,j-1));
290                     const auto Aj = A(rows,j);
291                     lassq(Aj, scale, sum);
292                 }
293             } else {
294                 scale = Zero;
295                 sum = One;
296                 for (IndexType j=1; j<=n; ++j) {
297                     const auto rows = _(1,min(m,j));
298                     const auto Aj = A(rows,j);
299                     lassq(Aj, scale, sum);
300                 }
301             }
302         } else {
303             if (A.diag()==Unit) {
304                 scale = One,
305                 sum = min(m,n);
306                 for (IndexType j=1; j<=n; ++j) {
307                     const auto rows = _(min(m,j)+1,m);
308                     const auto Aj = A(rows,j);
309                     lassq(Aj, scale, sum);
310                 }
311             } else {
312                 scale = Zero;
313                 sum = One;
314                 for (IndexType j=1; j<=n; ++j) {
315                     const auto rows = _(min(m+1,j),m);
316                     const auto Aj = A(rows,j);
317                     lassq(Aj, scale, sum);
318                 }
319             }
320         }
321         value = scale*sqrt(sum);
322     }
323     return value;
324 }
325 
326 //== interface for native lapack ===============================================
327 
328 #ifdef CHECK_CXXLAPACK
329 
330 template <typename MA, typename VWORK>
331 typename GeMatrix<MA>::ElementType
332 lan_native(Norm norm, const GeMatrix<MA> &A, DenseVector<VWORK> &work)
333 {
334     typedef typename GeMatrix<MA>::ElementType  T;
335 
336     const char      NORM    = getF77LapackChar<Norm>(norm);
337     const INTEGER   M       = A.numRows();
338     const INTEGER   N       = A.numCols();
339     const INTEGER   LDA     = A.leadingDimension();
340 
341     if (IsSame<T, DOUBLE>::value) {
342         return LAPACK_IMPL(dlange)(&NORM,
343                                    &M,
344                                    &N,
345                                    A.data(),
346                                    &LDA,
347                                    work.data());
348     } else {
349         ASSERT(0);
350     }
351 }
352 
353 template <typename MA, typename VWORK>
354 typename TrMatrix<MA>::ElementType
355 lan_native(Norm norm, const TrMatrix<MA> &A, DenseVector<VWORK> &work)
356 {
357     typedef typename TrMatrix<MA>::ElementType  T;
358 
359     const char      NORM    = getF77LapackChar<Norm>(norm);
360     const char      UPLO    = getF77BlasChar(A.upLo());
361     const char      DIAG    = getF77BlasChar(A.diag());
362     const INTEGER   M       = A.numRows();
363     const INTEGER   N       = A.numCols();
364     const INTEGER   LDA     = A.leadingDimension();
365 
366     if (IsSame<T, DOUBLE>::value) {
367         return LAPACK_IMPL(dlantr)(&NORM,
368                                    &UPLO,
369                                    &DIAG,
370                                    &M,
371                                    &N,
372                                    A.data(),
373                                    &LDA,
374                                    work.data());
375     } else {
376         ASSERT(0);
377     }
378 }
379 
380 #endif // CHECK_CXXLAPACK
381 
382 //== public interface ==========================================================
383 
384 //-- lan(ge)
385 template <typename MA>
386 typename GeMatrix<MA>::ElementType
387 lan(Norm norm, const GeMatrix<MA> &A)
388 {
389     ASSERT(norm!=InfinityNorm);
390 
391     typedef typename GeMatrix<MA>::ElementType  T;
392     DenseVector<Array<T> >  dummy;
393     return lan(norm, A, dummy);
394 }
395 
396 template <typename MA, typename VWORK>
397 typename GeMatrix<MA>::ElementType
398 lan(Norm norm, const GeMatrix<MA> &A, DenseVector<VWORK> &work)
399 {
400     typedef typename GeMatrix<MA>::ElementType T;
401 //
402 //  Test the input parameters
403 //
404     ASSERT(A.firstRow()==1);
405     ASSERT(A.firstCol()==1);
406     ASSERT(norm!=InfinityNorm || work.length()>=A.numRows());
407 #   ifdef CHECK_CXXLAPACK
408 //
409 //  Make copies of output arguments
410 //
411     typename DenseVector<VWORK>::NoView _work = work;
412 #   endif
413 
414 //
415 //  Call implementation
416 //
417     T result = lan_generic(norm, A, work);
418 
419 #   ifdef CHECK_CXXLAPACK
420 //
421 //  Compare results
422 //
423     if (_work.length()==0) {
424         _work.resize(work.length());
425     }
426 
427     T _result = lan_native(norm, A, _work);
428 
429     bool failed = false;
430     if (! isIdentical(work, _work, " work""_work")) {
431         std::cerr << "CXXLAPACK:  work = " << work << std::endl;
432         std::cerr << "F77LAPACK: _work = " << _work << std::endl;
433         failed = true;
434     }
435 
436     if (! isIdentical(result, _result, " result""_result")) {
437         failed = true;
438     }
439 
440     if (failed) {
441         ASSERT(0);
442     }
443 #   endif
444 
445     return result;
446 }
447 
448 //-- lan(tr)
449 template <typename MA>
450 typename TrMatrix<MA>::ElementType
451 lan(Norm norm, const TrMatrix<MA> &A)
452 {
453     ASSERT(norm!=InfinityNorm);
454 
455     typedef typename TrMatrix<MA>::ElementType  T;
456     DenseVector<Array<T> >  dummy;
457     return lan(norm, A, dummy);
458 }
459 
460 template <typename MA, typename VWORK>
461 typename TrMatrix<MA>::ElementType
462 lan(Norm norm, const TrMatrix<MA> &A, DenseVector<VWORK> &work)
463 {
464     typedef typename TrMatrix<MA>::ElementType T;
465 //
466 //  Test the input parameters
467 //
468     ASSERT(A.firstRow()==1);
469     ASSERT(A.firstCol()==1);
470     ASSERT(norm!=InfinityNorm || work.length()>=A.numRows());
471 #   ifdef CHECK_CXXLAPACK
472 //
473 //  Make copies of output arguments
474 //
475     typename DenseVector<VWORK>::NoView _work = work;
476 #   endif
477 
478 //
479 //  Call implementation
480 //
481     T result = lan_generic(norm, A, work);
482 
483 #   ifdef CHECK_CXXLAPACK
484 //
485 //  Compare results
486 //
487     if (_work.length()==0) {
488         _work.resize(work.length());
489     }
490 
491     T _result = lan_native(norm, A, _work);
492 
493     bool failed = false;
494     if (! isIdentical(work, _work, " work""_work")) {
495         std::cerr << "CXXLAPACK:  work = " << work << std::endl;
496         std::cerr << "F77LAPACK: _work = " << _work << std::endl;
497         failed = true;
498     }
499 
500     if (! isIdentical(result, _result, " result""_result")) {
501         failed = true;
502     }
503 
504     if (failed) {
505         ASSERT(0);
506     }
507 #   endif
508 
509     return result;
510 }
511 
512 //-- forwarding ----------------------------------------------------------------
513 template <typename MA>
514 typename MA::ElementType
515 lan(Norm norm, const MA &A)
516 {
517     typedef typename MA::ElementType  T;
518 
519     CHECKPOINT_ENTER;
520     const T result = lan(norm, A);
521     CHECKPOINT_LEAVE;
522 
523     return result;
524 }
525 
526 template <typename MA, typename VWORK>
527 typename MA::ElementType
528 lan(Norm norm, const MA &A, VWORK &&work)
529 {
530     typedef typename MA::ElementType  T;
531 
532     CHECKPOINT_ENTER;
533     const T result = lan(norm, A, work);
534     CHECKPOINT_LEAVE;
535 
536     return result;
537 }
538 
539 } } // namespace lapack, flens
540 
541 #endif // FLENS_LAPACK_AUX_LAN_TCC