1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO )
 36  *
 37  *  -- LAPACK auxiliary routine (version 3.3.1) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *  -- April 2011 
 41  */
 42 
 43 #ifndef FLENS_LAPACK_GESV_LAUUM_TCC
 44 #define FLENS_LAPACK_GESV_LAUUM_TCC 1
 45 
 46 #include <algorithm>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 #include <flens/lapack/interface/include/f77lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename MA>
 57 void
 58 lauum_generic(TrMatrix<MA> &A)
 59 {
 60     using std::min;
 61 
 62     typedef typename TrMatrix<MA>::ElementType   ElementType;
 63     typedef typename TrMatrix<MA>::IndexType     IndexType;
 64 
 65     const ElementType            One(1);
 66     const IndexType              n = A.dim();
 67     const Underscore<IndexType>  _;
 68 
 69     const bool upper = (A.upLo()==Upper);
 70 //
 71 //  Quick return if possible
 72 //
 73     if (n==0) {
 74         return;
 75     }
 76 //
 77 //  Determine the block size for this environment.
 78 //
 79     const char *upLo = (upper) ? "U" : "L";
 80     const IndexType nb = ilaenv<ElementType>(1"LAUUM", upLo, n);
 81 
 82     if (nb<=1 || nb>=n) {
 83 //
 84 //      Use unblocked code
 85 //
 86         lauu2(A);
 87     } else {
 88 //
 89 //      Use blocked code
 90 //
 91         if (upper) {
 92 //
 93 //          Compute the product U * U**T.
 94 //
 95             for (IndexType i=1; i<=n; i+=nb) {
 96                 const IndexType ib = min(nb, n-i+1);
 97 
 98                 const auto range1 = _(1,i-1);
 99                 const auto range2 = _(i,i+ib-1);
100                 const auto range3 = _(i+ib,n);
101 
102                 auto A12       = A(range1,range2);
103                 const auto A13 = A(range1,range3);
104                 auto U22       = A(range2,range2).upper();
105                 auto A22       = U22.symmetric();
106                 const auto A23 = A(range2,range3);
107 
108                 blas::mm(Right, Trans, One, U22, A12);
109                 lauu2(U22);
110                 if (i+ib<=n) {
111                     blas::mm(NoTrans, Trans, One, A13, A23, One, A12);
112                     blas::rk(NoTrans, One, A23, One, A22);
113                 }
114             }
115         } else {
116 //
117 //          Compute the product L**T * L.
118 //
119             for (IndexType i=1; i<=n; i+=nb) {
120                 const IndexType ib = min(nb, n-i+1);
121 
122                 const auto range1 = _(1,i-1);
123                 const auto range2 = _(i,i+ib-1);
124                 const auto range3 = _(i+ib,n);
125 
126                 auto A21       = A(range2,range1);
127                 const auto A31 = A(range3,range1);
128                 auto L22       = A(range2,range2).lower();
129                 auto A22       = L22.symmetric();
130                 const auto A32 = A(range3,range2);
131 
132                 blas::mm(Left, Trans, One, L22, A21);
133                 lauu2(L22);
134                 if (i+ib<=n) {
135                     blas::mm(Trans, NoTrans, One, A32, A31, One, A21);
136                     blas::rk(Trans, One, A32, One, A22);
137                 }
138             }
139         }
140     }
141 }
142 
143 //== interface for native lapack ===============================================
144 
145 #ifdef CHECK_CXXLAPACK
146 
147 template <typename MA>
148 void
149 lauum_native(TrMatrix<MA> &A)
150 {
151     typedef typename TrMatrix<MA>::ElementType  ElementType;
152 
153     const char     UPLO(A.upLo());
154     const INTEGER  N     = A.dim();
155     const INTEGER  LDA   = A.leadingDimension();
156     INTEGER        INFO;
157 
158     if (IsSame<ElementType,double>::value) {
159         LAPACK_DECL(dlauum)(&UPLO,
160                             &N,
161                             A.data(),
162                             &LDA,
163                             &INFO);
164     } else {
165         ASSERT(0);
166     }
167     ASSERT(INFO==0);
168 }
169 
170 #endif // CHECK_CXXLAPACK
171 
172 //== public interface ==========================================================
173 
174 template <typename MA>
175 void
176 lauum(TrMatrix<MA> &A)
177 {
178     typedef typename TrMatrix<MA>::IndexType    IndexType;
179 
180 //
181 //  Test the input parameters
182 //
183     ASSERT(A.firstRow()==1);
184     ASSERT(A.firstCol()==1);
185     ASSERT(A.diag()==NonUnit);
186 
187 #   ifdef CHECK_CXXLAPACK
188 //
189 //  Make copies of output arguments
190 //
191     typename TrMatrix<MA>::NoView       A_org      = A;
192 #   endif
193 
194 //
195 //  Call implementation
196 //
197     lauum_generic(A);
198 
199 #   ifdef CHECK_CXXLAPACK
200 //
201 //  Make copies of generic results
202 //
203     typename TrMatrix<MA>::NoView       A_generic      = A;
204 //
205 //  Restore output arguments
206 //
207     A = A_org;
208 
209 //
210 //  Compare results
211 //
212     lauum_native(A);
213 
214     bool failed = false;
215     if (! isIdentical(A_generic, A, "A_generic""_A")) {
216         std::cerr << "A_org = " << A_org << std::endl;
217         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
218         std::cerr << "F77LAPACK: A = " << A << std::endl;
219         failed = true;
220     }
221 
222     if (failed) {
223         ASSERT(0);
224     }
225 
226 #   endif
227 }
228 
229 //-- forwarding ----------------------------------------------------------------
230 template <typename MA>
231 void
232 lauum(MA &&A)
233 {
234     lauum(A);
235 }
236 
237 } } // namespace lapack, flens
238 
239 #endif // FLENS_LAPACK_GESV_LAUUM_TCC