1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
36 $ WORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_LQ_ORML2_TCC
45 #define FLENS_LAPACK_LQ_ORML2_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53 template <typename MA, typename VTAU, typename MC, typename VWORK>
54 void
55 orml2_generic(Side side, Transpose trans, GeMatrix<MA> &A,
56 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
57 DenseVector<VWORK> &work)
58 {
59 typedef typename GeMatrix<MC>::IndexType IndexType;
60 typedef typename GeMatrix<MC>::ElementType T;
61
62 typedef Range<IndexType> Range;
63 const Underscore<IndexType> _;
64
65 const IndexType m = C.numRows();
66 const IndexType n = C.numCols();
67 const IndexType k = A.numRows();
68
69 const T One(1);
70
71 const bool noTrans = ((trans==Trans) || (trans==ConjTrans)) ? false
72 : true;
73 //
74 // nq is the order of Q
75 //
76 const IndexType nq = (side==Left) ? m : n;
77
78 //
79 // Quick return if possible
80 //
81 if ((m==0) || (n==0) || (k==0)) {
82 return;
83 }
84
85 IndexType iBeg, iEnd, iInc;
86 if ((side==Left && noTrans) || (side==Right && !noTrans)) {
87 iBeg = 1;
88 iEnd = k;
89 iInc = 1;
90 } else {
91 iBeg = k;
92 iEnd = 1;
93 iInc = -1;
94 }
95 iEnd += iInc;
96
97 Range rows = _(1,m);
98 Range cols = _(1,n);
99
100 for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
101 if (side==Left) {
102 //
103 // H(i) is applied to C(i:m,1:n)
104 //
105 rows = _(i,m);
106 } else {
107 //
108 // H(i) is applied to C(1:m,i:n)
109 //
110 cols = _(i,n);
111 }
112 //
113 // Apply H(i)
114 //
115 const T Aii = A(i,i);
116 A(i,i) = One;
117 larf(side, A(i,_(i,nq)), tau(i), C(rows, cols), work);
118 A(i,i) = Aii;
119 }
120 }
121
122 //== interface for native lapack ===============================================
123
124 #ifdef CHECK_CXXLAPACK
125
126 template <typename MA, typename VTAU, typename MC, typename VWORK>
127 void
128 orml2_native(Side side, Transpose trans, GeMatrix<MA> &A,
129 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
130 DenseVector<VWORK> &work)
131 {
132 typedef typename GeMatrix<MC>::ElementType T;
133
134 const char SIDE = char(side);
135 const char TRANS = getF77LapackChar(trans);
136 const INTEGER M = C.numRows();
137 const INTEGER N = C.numCols();
138 const INTEGER K = A.numRows();
139 const INTEGER LDA = A.leadingDimension();
140 const INTEGER LDC = C.leadingDimension();
141 INTEGER INFO;
142
143 if (IsSame<T,DOUBLE>::value) {
144 LAPACK_DECL(dorml2)(&SIDE,
145 &TRANS,
146 &M,
147 &N,
148 &K,
149 A.data(),
150 &LDA,
151 tau.data(),
152 C.data(),
153 &LDC,
154 work.data(),
155 &INFO);
156 } else {
157 ASSERT(0);
158 }
159 ASSERT(INFO==0);
160 }
161
162 #endif // CHECK_CXXLAPACK
163
164 //== public interface ==========================================================
165
166 template <typename MA, typename VTAU, typename MC, typename VWORK>
167 void
168 orml2(Side side, Transpose trans, GeMatrix<MA> &A,
169 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
170 DenseVector<VWORK> &work)
171 {
172 typedef typename GeMatrix<MC>::IndexType IndexType;
173
174 //
175 // Test the input parameters
176 //
177 # ifndef NDEBUG
178 const IndexType m = C.numRows();
179 const IndexType n = C.numCols();
180 const IndexType k = A.numRows();
181
182 const IndexType nq = (side==Left) ? m : n;
183
184 ASSERT(tau.length()==k);
185
186 ASSERT(k<=nq);
187 ASSERT(A.numRows()==k);
188 ASSERT(A.numCols()==nq);
189
190 if (work.length()>0) {
191 if (side==Left) {
192 ASSERT(work.length()==n);
193 } else {
194 ASSERT(work.length()==m);
195 }
196 }
197 # endif
198 //
199 // Make copies of output arguments
200 //
201 # ifdef CHECK_CXXLAPACK
202 typename GeMatrix<MA>::NoView A_org = A;
203 typename GeMatrix<MC>::NoView C_org = C;
204 typename DenseVector<VWORK>::NoView work_org = work;
205 # endif
206
207 //
208 // Call implementation
209 //
210 orml2_generic(side, trans, A, tau, C, work);
211
212 # ifdef CHECK_CXXLAPACK
213 //
214 // Make copies of results computed by the generic implementation
215 //
216 typename GeMatrix<MA>::NoView A_generic = A;
217 typename GeMatrix<MC>::NoView C_generic = C;
218 typename DenseVector<VWORK>::NoView work_generic = work;
219
220 //
221 // restore output arguments
222 //
223 A = A_org;
224 C = C_org;
225 work = work_org;
226
227 //
228 // Compare generic results with results from the native implementation
229 //
230 orml2_native(side, trans, A, tau, C, work);
231
232 bool failed = false;
233 if (! isIdentical(A_generic, A, "A_generic", "A")) {
234 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
235 std::cerr << "F77LAPACK: A = " << A << std::endl;
236 failed = true;
237 }
238 if (! isIdentical(C_generic, C, "C_generic", "C")) {
239 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
240 std::cerr << "F77LAPACK: C = " << C << std::endl;
241 failed = true;
242 }
243 if (! isIdentical(work_generic, work, "work_generic", "work")) {
244 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
245 std::cerr << "F77LAPACK: work = " << work << std::endl;
246 failed = true;
247 }
248
249 if (failed) {
250 std::cerr << "error in: orml2.tcc" << std::endl;
251 ASSERT(0);
252 } else {
253 // std::cerr << "passed: orml2.tcc" << std::endl;
254 }
255 # endif
256
257 }
258
259 //-- forwarding ----------------------------------------------------------------
260 template <typename MA, typename VTAU, typename MC, typename VWORK>
261 void
262 orml2(Side side, Transpose trans, MA &&A, const VTAU &tau, MC &&C,
263 VWORK &&work)
264 {
265 CHECKPOINT_ENTER;
266 orml2(side, trans, A, tau, C, work);
267 CHECKPOINT_LEAVE;
268 }
269
270
271 } } // namespace lapack, flens
272
273 #endif // FLENS_LAPACK_LQ_ORML2_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
36 $ WORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_LQ_ORML2_TCC
45 #define FLENS_LAPACK_LQ_ORML2_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53 template <typename MA, typename VTAU, typename MC, typename VWORK>
54 void
55 orml2_generic(Side side, Transpose trans, GeMatrix<MA> &A,
56 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
57 DenseVector<VWORK> &work)
58 {
59 typedef typename GeMatrix<MC>::IndexType IndexType;
60 typedef typename GeMatrix<MC>::ElementType T;
61
62 typedef Range<IndexType> Range;
63 const Underscore<IndexType> _;
64
65 const IndexType m = C.numRows();
66 const IndexType n = C.numCols();
67 const IndexType k = A.numRows();
68
69 const T One(1);
70
71 const bool noTrans = ((trans==Trans) || (trans==ConjTrans)) ? false
72 : true;
73 //
74 // nq is the order of Q
75 //
76 const IndexType nq = (side==Left) ? m : n;
77
78 //
79 // Quick return if possible
80 //
81 if ((m==0) || (n==0) || (k==0)) {
82 return;
83 }
84
85 IndexType iBeg, iEnd, iInc;
86 if ((side==Left && noTrans) || (side==Right && !noTrans)) {
87 iBeg = 1;
88 iEnd = k;
89 iInc = 1;
90 } else {
91 iBeg = k;
92 iEnd = 1;
93 iInc = -1;
94 }
95 iEnd += iInc;
96
97 Range rows = _(1,m);
98 Range cols = _(1,n);
99
100 for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
101 if (side==Left) {
102 //
103 // H(i) is applied to C(i:m,1:n)
104 //
105 rows = _(i,m);
106 } else {
107 //
108 // H(i) is applied to C(1:m,i:n)
109 //
110 cols = _(i,n);
111 }
112 //
113 // Apply H(i)
114 //
115 const T Aii = A(i,i);
116 A(i,i) = One;
117 larf(side, A(i,_(i,nq)), tau(i), C(rows, cols), work);
118 A(i,i) = Aii;
119 }
120 }
121
122 //== interface for native lapack ===============================================
123
124 #ifdef CHECK_CXXLAPACK
125
126 template <typename MA, typename VTAU, typename MC, typename VWORK>
127 void
128 orml2_native(Side side, Transpose trans, GeMatrix<MA> &A,
129 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
130 DenseVector<VWORK> &work)
131 {
132 typedef typename GeMatrix<MC>::ElementType T;
133
134 const char SIDE = char(side);
135 const char TRANS = getF77LapackChar(trans);
136 const INTEGER M = C.numRows();
137 const INTEGER N = C.numCols();
138 const INTEGER K = A.numRows();
139 const INTEGER LDA = A.leadingDimension();
140 const INTEGER LDC = C.leadingDimension();
141 INTEGER INFO;
142
143 if (IsSame<T,DOUBLE>::value) {
144 LAPACK_DECL(dorml2)(&SIDE,
145 &TRANS,
146 &M,
147 &N,
148 &K,
149 A.data(),
150 &LDA,
151 tau.data(),
152 C.data(),
153 &LDC,
154 work.data(),
155 &INFO);
156 } else {
157 ASSERT(0);
158 }
159 ASSERT(INFO==0);
160 }
161
162 #endif // CHECK_CXXLAPACK
163
164 //== public interface ==========================================================
165
166 template <typename MA, typename VTAU, typename MC, typename VWORK>
167 void
168 orml2(Side side, Transpose trans, GeMatrix<MA> &A,
169 const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
170 DenseVector<VWORK> &work)
171 {
172 typedef typename GeMatrix<MC>::IndexType IndexType;
173
174 //
175 // Test the input parameters
176 //
177 # ifndef NDEBUG
178 const IndexType m = C.numRows();
179 const IndexType n = C.numCols();
180 const IndexType k = A.numRows();
181
182 const IndexType nq = (side==Left) ? m : n;
183
184 ASSERT(tau.length()==k);
185
186 ASSERT(k<=nq);
187 ASSERT(A.numRows()==k);
188 ASSERT(A.numCols()==nq);
189
190 if (work.length()>0) {
191 if (side==Left) {
192 ASSERT(work.length()==n);
193 } else {
194 ASSERT(work.length()==m);
195 }
196 }
197 # endif
198 //
199 // Make copies of output arguments
200 //
201 # ifdef CHECK_CXXLAPACK
202 typename GeMatrix<MA>::NoView A_org = A;
203 typename GeMatrix<MC>::NoView C_org = C;
204 typename DenseVector<VWORK>::NoView work_org = work;
205 # endif
206
207 //
208 // Call implementation
209 //
210 orml2_generic(side, trans, A, tau, C, work);
211
212 # ifdef CHECK_CXXLAPACK
213 //
214 // Make copies of results computed by the generic implementation
215 //
216 typename GeMatrix<MA>::NoView A_generic = A;
217 typename GeMatrix<MC>::NoView C_generic = C;
218 typename DenseVector<VWORK>::NoView work_generic = work;
219
220 //
221 // restore output arguments
222 //
223 A = A_org;
224 C = C_org;
225 work = work_org;
226
227 //
228 // Compare generic results with results from the native implementation
229 //
230 orml2_native(side, trans, A, tau, C, work);
231
232 bool failed = false;
233 if (! isIdentical(A_generic, A, "A_generic", "A")) {
234 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
235 std::cerr << "F77LAPACK: A = " << A << std::endl;
236 failed = true;
237 }
238 if (! isIdentical(C_generic, C, "C_generic", "C")) {
239 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
240 std::cerr << "F77LAPACK: C = " << C << std::endl;
241 failed = true;
242 }
243 if (! isIdentical(work_generic, work, "work_generic", "work")) {
244 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
245 std::cerr << "F77LAPACK: work = " << work << std::endl;
246 failed = true;
247 }
248
249 if (failed) {
250 std::cerr << "error in: orml2.tcc" << std::endl;
251 ASSERT(0);
252 } else {
253 // std::cerr << "passed: orml2.tcc" << std::endl;
254 }
255 # endif
256
257 }
258
259 //-- forwarding ----------------------------------------------------------------
260 template <typename MA, typename VTAU, typename MC, typename VWORK>
261 void
262 orml2(Side side, Transpose trans, MA &&A, const VTAU &tau, MC &&C,
263 VWORK &&work)
264 {
265 CHECKPOINT_ENTER;
266 orml2(side, trans, A, tau, C, work);
267 CHECKPOINT_LEAVE;
268 }
269
270
271 } } // namespace lapack, flens
272
273 #endif // FLENS_LAPACK_LQ_ORML2_TCC