1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 * SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
35 *
36 * -- LAPACK auxiliary routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_AUX_LARF_TCC
43 #define FLENS_LAPACK_AUX_LARF_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename VV, typename TAU, typename MC, typename VWORK>
53 void
54 larf_generic(Side side, const DenseVector<VV> &v, const TAU &tau,
55 GeMatrix<MC> &C, DenseVector<VWORK> &work)
56 {
57 using lapack::ilalc;
58 using lapack::ilalr;
59
60 typedef typename GeMatrix<MC>::ElementType ElementType;
61 typedef typename GeMatrix<MC>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const ElementType Zero(0), One(1);
66
67 const IndexType m = C.numRows();
68 const IndexType n = C.numCols();
69
70 IndexType lastV = 0;
71 IndexType lastC = 0;
72
73 if (tau!=Zero) {
74 //
75 // Set up variables for scanning V. LASTV begins pointing to the end of V.
76 // Look for the last non-zero row in V.
77 //
78 if (side==Left) {
79 lastV = m;
80 } else {
81 lastV = n;
82 }
83 IndexType i = (v.inc()>0) ? 1 + (lastV-1)*v.inc() : 1;
84 //
85 // Look for the last non-zero row in V.
86 //
87 while (lastV>0 && v(i)==Zero) {
88 --lastV;
89 i -= v.inc();
90 }
91 if (side==Left) {
92 //
93 // Scan for the last non-zero column in C(1:lastv,:).
94 //
95 lastC = ilalc(C(_(1,lastV),_));
96 } else {
97 //
98 // Scan for the last non-zero row in C(:,1:lastv).
99 //
100 lastC = ilalr(C(_,_(1,lastV)));
101 }
102 }
103 //
104 // Note that lastc.eq.0 renders the BLAS operations null; no special
105 // case is needed at this level.
106 //
107 const auto _v = v(_(1,lastV));
108
109 if (side==Left) {
110 //
111 // Form H * C
112 //
113 if (lastV>0) {
114 auto _work = work(_(1,lastC));
115 auto _C = C(_(1,lastV),_(1,lastC));
116 //
117 // work(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1)
118 //
119 blas::mv(Trans, One, _C, _v, Zero, _work);
120 //
121 // C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * work(1:lastc,1)'
122 //
123 blas::r(-tau, _v, _work, _C);
124 }
125 } else {
126 //
127 // Form C * H
128 //
129 if (lastV>0) {
130 auto _work = work(_(1,lastC));
131 auto _C = C(_(1,lastC),_(1,lastV));
132 //
133 // work(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1)
134 //
135 blas::mv(NoTrans, One, _C, _v, Zero, _work);
136 //
137 // C(1:lastc,1:lastv) := C(...) - work(1:lastc,1) * v(1:lastv,1)'
138 //
139 blas::r(-tau, _work, _v, _C);
140 }
141 }
142 }
143
144 //== interface for native lapack ===============================================
145
146 #ifdef CHECK_CXXLAPACK
147
148 template <typename VV, typename TAU, typename MC, typename VWORK>
149 void
150 larf_native(Side side, const DenseVector<VV> &v, const TAU &tau,
151 GeMatrix<MC> &C, DenseVector<VWORK> &work)
152 {
153 typedef typename GeMatrix<MC>::ElementType T;
154
155 const char SIDE = getF77LapackChar<Side>(side);
156 const INTEGER M = C.numRows();
157 const INTEGER N = C.numCols();
158 const INTEGER INCV = v.inc()*v.stride();
159 const INTEGER LDC = C.leadingDimension();
160
161 LAPACK_IMPL(dlarf)(&SIDE,
162 &M,
163 &N,
164 v.data(),
165 &INCV,
166 &tau,
167 C.data(),
168 &LDC,
169 work.data());
170 }
171
172 #endif // CHECK_CXXLAPACK
173
174 //== public interface ==========================================================
175
176 template <typename VV, typename TAU, typename MC, typename VWORK>
177 void
178 larf(Side side, const DenseVector<VV> &v, const TAU &tau,
179 GeMatrix<MC> &C, DenseVector<VWORK> &work)
180 {
181 LAPACK_DEBUG_OUT("larf");
182
183 //
184 // Test the input parameters
185 //
186 ASSERT((v.inc()>0 && v.firstIndex()==1)
187 || (v.inc()<0 && v.lastIndex()==1));
188 ASSERT((side==Left && v.length()==C.numRows())
189 || (side==Right && v.length()==C.numCols()));
190 ASSERT(C.firstRow()==1);
191 ASSERT(C.firstCol()==1);
192 ASSERT((side==Left && work.length()>=C.numCols())
193 || (side==Right && work.length()>=C.numRows()));
194
195 # ifdef CHECK_CXXLAPACK
196 //
197 // Make copies of output arguments
198 //
199 typename GeMatrix<MC>::NoView C_org = C;
200 typename DenseVector<VV>::NoView work_org = work;
201 # endif
202
203 //
204 // Call implementation
205 //
206 larf_generic(side, v, tau, C, work);
207
208 # ifdef CHECK_CXXLAPACK
209 //
210 // Make copies of results computed by the generic implementation
211 //
212 typename GeMatrix<MC>::NoView C_generic = C;
213 typename DenseVector<VV>::NoView work_generic = work;
214
215 //
216 // restore output arguments
217 //
218 C = C_org;
219 work = work_org;
220
221 //
222 // Compare generic results with results from the native implementation
223 //
224 larf_native(side, v, tau, C, work);
225
226 bool failed = false;
227
228 if (! isIdentical(C_generic, C, "C_generic", "C")) {
229 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
230 std::cerr << "F77LAPACK: C = " << C << std::endl;
231 failed = true;
232 }
233
234 if (! isIdentical(work_generic, work, "work_generic", "work")) {
235 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
236 std::cerr << "F77LAPACK: work = " << work << std::endl;
237 failed = true;
238 }
239
240 if (failed) {
241 ASSERT(0);
242 }
243 # endif
244 }
245
246 //-- forwarding ----------------------------------------------------------------
247 template <typename VV, typename TAU, typename MC, typename VWORK>
248 void
249 larf(Side side, const VV &v, const TAU &tau, MC &&C, VWORK &&work)
250 {
251 CHECKPOINT_ENTER;
252 larf(side, v, tau, C, work);
253 CHECKPOINT_LEAVE;
254 }
255
256 } } // namespace lapack, flens
257
258 #endif // FLENS_LAPACK_AUX_LARF_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 * SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
35 *
36 * -- LAPACK auxiliary routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_AUX_LARF_TCC
43 #define FLENS_LAPACK_AUX_LARF_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename VV, typename TAU, typename MC, typename VWORK>
53 void
54 larf_generic(Side side, const DenseVector<VV> &v, const TAU &tau,
55 GeMatrix<MC> &C, DenseVector<VWORK> &work)
56 {
57 using lapack::ilalc;
58 using lapack::ilalr;
59
60 typedef typename GeMatrix<MC>::ElementType ElementType;
61 typedef typename GeMatrix<MC>::IndexType IndexType;
62
63 const Underscore<IndexType> _;
64
65 const ElementType Zero(0), One(1);
66
67 const IndexType m = C.numRows();
68 const IndexType n = C.numCols();
69
70 IndexType lastV = 0;
71 IndexType lastC = 0;
72
73 if (tau!=Zero) {
74 //
75 // Set up variables for scanning V. LASTV begins pointing to the end of V.
76 // Look for the last non-zero row in V.
77 //
78 if (side==Left) {
79 lastV = m;
80 } else {
81 lastV = n;
82 }
83 IndexType i = (v.inc()>0) ? 1 + (lastV-1)*v.inc() : 1;
84 //
85 // Look for the last non-zero row in V.
86 //
87 while (lastV>0 && v(i)==Zero) {
88 --lastV;
89 i -= v.inc();
90 }
91 if (side==Left) {
92 //
93 // Scan for the last non-zero column in C(1:lastv,:).
94 //
95 lastC = ilalc(C(_(1,lastV),_));
96 } else {
97 //
98 // Scan for the last non-zero row in C(:,1:lastv).
99 //
100 lastC = ilalr(C(_,_(1,lastV)));
101 }
102 }
103 //
104 // Note that lastc.eq.0 renders the BLAS operations null; no special
105 // case is needed at this level.
106 //
107 const auto _v = v(_(1,lastV));
108
109 if (side==Left) {
110 //
111 // Form H * C
112 //
113 if (lastV>0) {
114 auto _work = work(_(1,lastC));
115 auto _C = C(_(1,lastV),_(1,lastC));
116 //
117 // work(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1)
118 //
119 blas::mv(Trans, One, _C, _v, Zero, _work);
120 //
121 // C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * work(1:lastc,1)'
122 //
123 blas::r(-tau, _v, _work, _C);
124 }
125 } else {
126 //
127 // Form C * H
128 //
129 if (lastV>0) {
130 auto _work = work(_(1,lastC));
131 auto _C = C(_(1,lastC),_(1,lastV));
132 //
133 // work(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1)
134 //
135 blas::mv(NoTrans, One, _C, _v, Zero, _work);
136 //
137 // C(1:lastc,1:lastv) := C(...) - work(1:lastc,1) * v(1:lastv,1)'
138 //
139 blas::r(-tau, _work, _v, _C);
140 }
141 }
142 }
143
144 //== interface for native lapack ===============================================
145
146 #ifdef CHECK_CXXLAPACK
147
148 template <typename VV, typename TAU, typename MC, typename VWORK>
149 void
150 larf_native(Side side, const DenseVector<VV> &v, const TAU &tau,
151 GeMatrix<MC> &C, DenseVector<VWORK> &work)
152 {
153 typedef typename GeMatrix<MC>::ElementType T;
154
155 const char SIDE = getF77LapackChar<Side>(side);
156 const INTEGER M = C.numRows();
157 const INTEGER N = C.numCols();
158 const INTEGER INCV = v.inc()*v.stride();
159 const INTEGER LDC = C.leadingDimension();
160
161 LAPACK_IMPL(dlarf)(&SIDE,
162 &M,
163 &N,
164 v.data(),
165 &INCV,
166 &tau,
167 C.data(),
168 &LDC,
169 work.data());
170 }
171
172 #endif // CHECK_CXXLAPACK
173
174 //== public interface ==========================================================
175
176 template <typename VV, typename TAU, typename MC, typename VWORK>
177 void
178 larf(Side side, const DenseVector<VV> &v, const TAU &tau,
179 GeMatrix<MC> &C, DenseVector<VWORK> &work)
180 {
181 LAPACK_DEBUG_OUT("larf");
182
183 //
184 // Test the input parameters
185 //
186 ASSERT((v.inc()>0 && v.firstIndex()==1)
187 || (v.inc()<0 && v.lastIndex()==1));
188 ASSERT((side==Left && v.length()==C.numRows())
189 || (side==Right && v.length()==C.numCols()));
190 ASSERT(C.firstRow()==1);
191 ASSERT(C.firstCol()==1);
192 ASSERT((side==Left && work.length()>=C.numCols())
193 || (side==Right && work.length()>=C.numRows()));
194
195 # ifdef CHECK_CXXLAPACK
196 //
197 // Make copies of output arguments
198 //
199 typename GeMatrix<MC>::NoView C_org = C;
200 typename DenseVector<VV>::NoView work_org = work;
201 # endif
202
203 //
204 // Call implementation
205 //
206 larf_generic(side, v, tau, C, work);
207
208 # ifdef CHECK_CXXLAPACK
209 //
210 // Make copies of results computed by the generic implementation
211 //
212 typename GeMatrix<MC>::NoView C_generic = C;
213 typename DenseVector<VV>::NoView work_generic = work;
214
215 //
216 // restore output arguments
217 //
218 C = C_org;
219 work = work_org;
220
221 //
222 // Compare generic results with results from the native implementation
223 //
224 larf_native(side, v, tau, C, work);
225
226 bool failed = false;
227
228 if (! isIdentical(C_generic, C, "C_generic", "C")) {
229 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
230 std::cerr << "F77LAPACK: C = " << C << std::endl;
231 failed = true;
232 }
233
234 if (! isIdentical(work_generic, work, "work_generic", "work")) {
235 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
236 std::cerr << "F77LAPACK: work = " << work << std::endl;
237 failed = true;
238 }
239
240 if (failed) {
241 ASSERT(0);
242 }
243 # endif
244 }
245
246 //-- forwarding ----------------------------------------------------------------
247 template <typename VV, typename TAU, typename MC, typename VWORK>
248 void
249 larf(Side side, const VV &v, const TAU &tau, MC &&C, VWORK &&work)
250 {
251 CHECKPOINT_ENTER;
252 larf(side, v, tau, C, work);
253 CHECKPOINT_LEAVE;
254 }
255
256 } } // namespace lapack, flens
257
258 #endif // FLENS_LAPACK_AUX_LARF_TCC