1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO )
36 *
37 * -- LAPACK auxiliary routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011
41 */
42
43 #ifndef FLENS_LAPACK_GESV_LAUUM_TCC
44 #define FLENS_LAPACK_GESV_LAUUM_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 void
58 lauum_generic(TrMatrix<MA> &A)
59 {
60 using std::min;
61
62 typedef typename TrMatrix<MA>::ElementType ElementType;
63 typedef typename TrMatrix<MA>::IndexType IndexType;
64
65 const ElementType One(1);
66 const IndexType n = A.dim();
67 const Underscore<IndexType> _;
68
69 const bool upper = (A.upLo()==Upper);
70 //
71 // Quick return if possible
72 //
73 if (n==0) {
74 return;
75 }
76 //
77 // Determine the block size for this environment.
78 //
79 const char *upLo = (upper) ? "U" : "L";
80 const IndexType nb = ilaenv<ElementType>(1, "LAUUM", upLo, n);
81
82 if (nb<=1 || nb>=n) {
83 //
84 // Use unblocked code
85 //
86 lauu2(A);
87 } else {
88 //
89 // Use blocked code
90 //
91 if (upper) {
92 //
93 // Compute the product U * U**T.
94 //
95 for (IndexType i=1; i<=n; i+=nb) {
96 const IndexType ib = min(nb, n-i+1);
97
98 const auto range1 = _(1,i-1);
99 const auto range2 = _(i,i+ib-1);
100 const auto range3 = _(i+ib,n);
101
102 auto A12 = A(range1,range2);
103 const auto A13 = A(range1,range3);
104 auto U22 = A(range2,range2).upper();
105 auto A22 = U22.symmetric();
106 const auto A23 = A(range2,range3);
107
108 blas::mm(Right, Trans, One, U22, A12);
109 lauu2(U22);
110 if (i+ib<=n) {
111 blas::mm(NoTrans, Trans, One, A13, A23, One, A12);
112 blas::rk(NoTrans, One, A23, One, A22);
113 }
114 }
115 } else {
116 //
117 // Compute the product L**T * L.
118 //
119 for (IndexType i=1; i<=n; i+=nb) {
120 const IndexType ib = min(nb, n-i+1);
121
122 const auto range1 = _(1,i-1);
123 const auto range2 = _(i,i+ib-1);
124 const auto range3 = _(i+ib,n);
125
126 auto A21 = A(range2,range1);
127 const auto A31 = A(range3,range1);
128 auto L22 = A(range2,range2).lower();
129 auto A22 = L22.symmetric();
130 const auto A32 = A(range3,range2);
131
132 blas::mm(Left, Trans, One, L22, A21);
133 lauu2(L22);
134 if (i+ib<=n) {
135 blas::mm(Trans, NoTrans, One, A32, A31, One, A21);
136 blas::rk(Trans, One, A32, One, A22);
137 }
138 }
139 }
140 }
141 }
142
143 //== interface for native lapack ===============================================
144
145 #ifdef CHECK_CXXLAPACK
146
147 template <typename MA>
148 void
149 lauum_native(TrMatrix<MA> &A)
150 {
151 typedef typename TrMatrix<MA>::ElementType ElementType;
152
153 const char UPLO(A.upLo());
154 const INTEGER N = A.dim();
155 const INTEGER LDA = A.leadingDimension();
156 INTEGER INFO;
157
158 if (IsSame<ElementType,double>::value) {
159 LAPACK_DECL(dlauum)(&UPLO,
160 &N,
161 A.data(),
162 &LDA,
163 &INFO);
164 } else {
165 ASSERT(0);
166 }
167 ASSERT(INFO==0);
168 }
169
170 #endif // CHECK_CXXLAPACK
171
172 //== public interface ==========================================================
173
174 template <typename MA>
175 void
176 lauum(TrMatrix<MA> &A)
177 {
178 typedef typename TrMatrix<MA>::IndexType IndexType;
179
180 //
181 // Test the input parameters
182 //
183 ASSERT(A.firstRow()==1);
184 ASSERT(A.firstCol()==1);
185 ASSERT(A.diag()==NonUnit);
186
187 # ifdef CHECK_CXXLAPACK
188 //
189 // Make copies of output arguments
190 //
191 typename TrMatrix<MA>::NoView A_org = A;
192 # endif
193
194 //
195 // Call implementation
196 //
197 lauum_generic(A);
198
199 # ifdef CHECK_CXXLAPACK
200 //
201 // Make copies of generic results
202 //
203 typename TrMatrix<MA>::NoView A_generic = A;
204 //
205 // Restore output arguments
206 //
207 A = A_org;
208
209 //
210 // Compare results
211 //
212 lauum_native(A);
213
214 bool failed = false;
215 if (! isIdentical(A_generic, A, "A_generic", "_A")) {
216 std::cerr << "A_org = " << A_org << std::endl;
217 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
218 std::cerr << "F77LAPACK: A = " << A << std::endl;
219 failed = true;
220 }
221
222 if (failed) {
223 ASSERT(0);
224 }
225
226 # endif
227 }
228
229 //-- forwarding ----------------------------------------------------------------
230 template <typename MA>
231 void
232 lauum(MA &&A)
233 {
234 lauum(A);
235 }
236
237 } } // namespace lapack, flens
238
239 #endif // FLENS_LAPACK_GESV_LAUUM_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO )
36 *
37 * -- LAPACK auxiliary routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011
41 */
42
43 #ifndef FLENS_LAPACK_GESV_LAUUM_TCC
44 #define FLENS_LAPACK_GESV_LAUUM_TCC 1
45
46 #include <algorithm>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 #include <flens/lapack/interface/include/f77lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 void
58 lauum_generic(TrMatrix<MA> &A)
59 {
60 using std::min;
61
62 typedef typename TrMatrix<MA>::ElementType ElementType;
63 typedef typename TrMatrix<MA>::IndexType IndexType;
64
65 const ElementType One(1);
66 const IndexType n = A.dim();
67 const Underscore<IndexType> _;
68
69 const bool upper = (A.upLo()==Upper);
70 //
71 // Quick return if possible
72 //
73 if (n==0) {
74 return;
75 }
76 //
77 // Determine the block size for this environment.
78 //
79 const char *upLo = (upper) ? "U" : "L";
80 const IndexType nb = ilaenv<ElementType>(1, "LAUUM", upLo, n);
81
82 if (nb<=1 || nb>=n) {
83 //
84 // Use unblocked code
85 //
86 lauu2(A);
87 } else {
88 //
89 // Use blocked code
90 //
91 if (upper) {
92 //
93 // Compute the product U * U**T.
94 //
95 for (IndexType i=1; i<=n; i+=nb) {
96 const IndexType ib = min(nb, n-i+1);
97
98 const auto range1 = _(1,i-1);
99 const auto range2 = _(i,i+ib-1);
100 const auto range3 = _(i+ib,n);
101
102 auto A12 = A(range1,range2);
103 const auto A13 = A(range1,range3);
104 auto U22 = A(range2,range2).upper();
105 auto A22 = U22.symmetric();
106 const auto A23 = A(range2,range3);
107
108 blas::mm(Right, Trans, One, U22, A12);
109 lauu2(U22);
110 if (i+ib<=n) {
111 blas::mm(NoTrans, Trans, One, A13, A23, One, A12);
112 blas::rk(NoTrans, One, A23, One, A22);
113 }
114 }
115 } else {
116 //
117 // Compute the product L**T * L.
118 //
119 for (IndexType i=1; i<=n; i+=nb) {
120 const IndexType ib = min(nb, n-i+1);
121
122 const auto range1 = _(1,i-1);
123 const auto range2 = _(i,i+ib-1);
124 const auto range3 = _(i+ib,n);
125
126 auto A21 = A(range2,range1);
127 const auto A31 = A(range3,range1);
128 auto L22 = A(range2,range2).lower();
129 auto A22 = L22.symmetric();
130 const auto A32 = A(range3,range2);
131
132 blas::mm(Left, Trans, One, L22, A21);
133 lauu2(L22);
134 if (i+ib<=n) {
135 blas::mm(Trans, NoTrans, One, A32, A31, One, A21);
136 blas::rk(Trans, One, A32, One, A22);
137 }
138 }
139 }
140 }
141 }
142
143 //== interface for native lapack ===============================================
144
145 #ifdef CHECK_CXXLAPACK
146
147 template <typename MA>
148 void
149 lauum_native(TrMatrix<MA> &A)
150 {
151 typedef typename TrMatrix<MA>::ElementType ElementType;
152
153 const char UPLO(A.upLo());
154 const INTEGER N = A.dim();
155 const INTEGER LDA = A.leadingDimension();
156 INTEGER INFO;
157
158 if (IsSame<ElementType,double>::value) {
159 LAPACK_DECL(dlauum)(&UPLO,
160 &N,
161 A.data(),
162 &LDA,
163 &INFO);
164 } else {
165 ASSERT(0);
166 }
167 ASSERT(INFO==0);
168 }
169
170 #endif // CHECK_CXXLAPACK
171
172 //== public interface ==========================================================
173
174 template <typename MA>
175 void
176 lauum(TrMatrix<MA> &A)
177 {
178 typedef typename TrMatrix<MA>::IndexType IndexType;
179
180 //
181 // Test the input parameters
182 //
183 ASSERT(A.firstRow()==1);
184 ASSERT(A.firstCol()==1);
185 ASSERT(A.diag()==NonUnit);
186
187 # ifdef CHECK_CXXLAPACK
188 //
189 // Make copies of output arguments
190 //
191 typename TrMatrix<MA>::NoView A_org = A;
192 # endif
193
194 //
195 // Call implementation
196 //
197 lauum_generic(A);
198
199 # ifdef CHECK_CXXLAPACK
200 //
201 // Make copies of generic results
202 //
203 typename TrMatrix<MA>::NoView A_generic = A;
204 //
205 // Restore output arguments
206 //
207 A = A_org;
208
209 //
210 // Compare results
211 //
212 lauum_native(A);
213
214 bool failed = false;
215 if (! isIdentical(A_generic, A, "A_generic", "_A")) {
216 std::cerr << "A_org = " << A_org << std::endl;
217 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
218 std::cerr << "F77LAPACK: A = " << A << std::endl;
219 failed = true;
220 }
221
222 if (failed) {
223 ASSERT(0);
224 }
225
226 # endif
227 }
228
229 //-- forwarding ----------------------------------------------------------------
230 template <typename MA>
231 void
232 lauum(MA &&A)
233 {
234 lauum(A);
235 }
236
237 } } // namespace lapack, flens
238
239 #endif // FLENS_LAPACK_GESV_LAUUM_TCC