1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
35 *
36 * -- LAPACK auxiliary routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_AUX_LASWP_TCC
43 #define FLENS_LAPACK_AUX_LASWP_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename MA, typename VP>
53 void
54 laswp_generic(GeMatrix<MA> &A, const DenseVector<VP> &piv)
55 {
56 typedef typename GeMatrix<MA>::IndexType IndexType;
57 typedef Range<IndexType> Range;
58
59 const IndexType n = A.numCols();
60
61 //
62 // Interchange row i with row piv(i) for each of rows K1 through K2.
63 //
64 const IndexType pBeg = piv.firstIndex();
65 const IndexType pEnd = piv.endIndex();
66 const IndexType pInc = piv.inc();
67
68 const IndexType bs = 32;
69 const IndexType nbs = (n/bs)*bs;
70
71 if (nbs!=0) {
72 for (IndexType j=1; j<=nbs; j+=bs) {
73 const Range cols(j,j+bs-1);
74 for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
75 const IndexType iP = piv(i);
76 if (iP!=i) {
77 blas::swap(A(i, cols), A(iP, cols));
78 }
79 }
80 }
81 }
82 if (nbs!=n) {
83 const Range cols(nbs+1,n);
84 for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
85 const IndexType iP = piv(i);
86 if (iP!=i) {
87 blas::swap(A(i, cols), A(iP, cols));
88 }
89 }
90 }
91 }
92
93 //== interface for native lapack ===============================================
94
95 #ifdef CHECK_CXXLAPACK
96
97 template <typename MA, typename VP>
98 void
99 laswp_native(GeMatrix<MA> &A, const DenseVector<VP> &piv)
100 {
101 using std::max;
102 using std::min;
103
104 const INTEGER k1 = min(piv.firstIndex(), piv.lastIndex());
105 const INTEGER k2 = max(piv.firstIndex(), piv.lastIndex());
106
107 // set pointer IPIV such that IPIV[K1] points to piv.data()
108 cxxlapack::laswp<INTEGER>(A.numCols(), A.data(), A.leadingDimension(),
109 k1, k2,
110 piv.data()-k1+1, piv.inc());
111 }
112
113 #endif // CHECK_CXXLAPACK
114
115 //== public interface ==========================================================
116
117 template <typename MA, typename VP>
118 void
119 laswp(GeMatrix<MA> &A, const DenseVector<VP> &piv)
120 {
121 //
122 // Test the input parameters
123 //
124 ASSERT(A.firstRow()==1);
125 ASSERT(A.firstCol()==1);
126 ASSERT((piv.inc()>0 && piv.firstIndex()>=A.firstRow())
127 || (piv.inc()<0 && piv.firstIndex()<=A.numRows()));
128 ASSERT((piv.inc()>0 && piv.lastIndex()<=A.numRows())
129 || (piv.inc()<0 && piv.lastIndex()>=A.firstRow()));
130
131 # ifdef CHECK_CXXLAPACK
132 //
133 // Make copies of output arguments
134 //
135 typename GeMatrix<MA>::NoView org_A = A;
136 typename GeMatrix<MA>::NoView _A = A;
137 # endif
138
139 //
140 // Call implementation
141 //
142 laswp_generic(A, piv);
143
144 # ifdef CHECK_CXXLAPACK
145 //
146 // Compare results
147 //
148 laswp_native(_A, piv);
149
150 bool failed = false;
151 if (! isIdentical(A, _A, " A", "_A")) {
152 std::cerr << "CXXLAPACK: A = " << A << std::endl;
153 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
154 failed = true;
155 }
156
157 if (failed) {
158 std::cerr << "piv.firstIndex() = " << piv.firstIndex() << std::endl;
159 std::cerr << "piv.lastIndex() = " << piv.lastIndex() << std::endl;
160 std::cerr << "piv.inc() = " << piv.inc() << std::endl;
161 std::cerr << "piv = " << piv << std::endl;
162
163 std::cerr << "org_A = " << org_A << std::endl;
164 ASSERT(0);
165 }
166 # endif
167 }
168
169 //-- forwarding ----------------------------------------------------------------
170 template <typename MA, typename VP>
171 void
172 laswp(MA &&A, const VP &&piv)
173 {
174 CHECKPOINT_ENTER;
175 laswp(A, piv);
176 CHECKPOINT_LEAVE;
177 }
178
179 } } // namespace lapack, flens
180
181 #endif // FLENS_LAPACK_AUX_LASWP_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
35 *
36 * -- LAPACK auxiliary routine (version 3.2) --
37 * -- LAPACK is a software package provided by Univ. of Tennessee, --
38 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
39 * November 2006
40 */
41
42 #ifndef FLENS_LAPACK_AUX_LASWP_TCC
43 #define FLENS_LAPACK_AUX_LASWP_TCC 1
44
45 #include <flens/blas/blas.h>
46 #include <flens/lapack/lapack.h>
47
48 namespace flens { namespace lapack {
49
50 //== generic lapack implementation =============================================
51
52 template <typename MA, typename VP>
53 void
54 laswp_generic(GeMatrix<MA> &A, const DenseVector<VP> &piv)
55 {
56 typedef typename GeMatrix<MA>::IndexType IndexType;
57 typedef Range<IndexType> Range;
58
59 const IndexType n = A.numCols();
60
61 //
62 // Interchange row i with row piv(i) for each of rows K1 through K2.
63 //
64 const IndexType pBeg = piv.firstIndex();
65 const IndexType pEnd = piv.endIndex();
66 const IndexType pInc = piv.inc();
67
68 const IndexType bs = 32;
69 const IndexType nbs = (n/bs)*bs;
70
71 if (nbs!=0) {
72 for (IndexType j=1; j<=nbs; j+=bs) {
73 const Range cols(j,j+bs-1);
74 for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
75 const IndexType iP = piv(i);
76 if (iP!=i) {
77 blas::swap(A(i, cols), A(iP, cols));
78 }
79 }
80 }
81 }
82 if (nbs!=n) {
83 const Range cols(nbs+1,n);
84 for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
85 const IndexType iP = piv(i);
86 if (iP!=i) {
87 blas::swap(A(i, cols), A(iP, cols));
88 }
89 }
90 }
91 }
92
93 //== interface for native lapack ===============================================
94
95 #ifdef CHECK_CXXLAPACK
96
97 template <typename MA, typename VP>
98 void
99 laswp_native(GeMatrix<MA> &A, const DenseVector<VP> &piv)
100 {
101 using std::max;
102 using std::min;
103
104 const INTEGER k1 = min(piv.firstIndex(), piv.lastIndex());
105 const INTEGER k2 = max(piv.firstIndex(), piv.lastIndex());
106
107 // set pointer IPIV such that IPIV[K1] points to piv.data()
108 cxxlapack::laswp<INTEGER>(A.numCols(), A.data(), A.leadingDimension(),
109 k1, k2,
110 piv.data()-k1+1, piv.inc());
111 }
112
113 #endif // CHECK_CXXLAPACK
114
115 //== public interface ==========================================================
116
117 template <typename MA, typename VP>
118 void
119 laswp(GeMatrix<MA> &A, const DenseVector<VP> &piv)
120 {
121 //
122 // Test the input parameters
123 //
124 ASSERT(A.firstRow()==1);
125 ASSERT(A.firstCol()==1);
126 ASSERT((piv.inc()>0 && piv.firstIndex()>=A.firstRow())
127 || (piv.inc()<0 && piv.firstIndex()<=A.numRows()));
128 ASSERT((piv.inc()>0 && piv.lastIndex()<=A.numRows())
129 || (piv.inc()<0 && piv.lastIndex()>=A.firstRow()));
130
131 # ifdef CHECK_CXXLAPACK
132 //
133 // Make copies of output arguments
134 //
135 typename GeMatrix<MA>::NoView org_A = A;
136 typename GeMatrix<MA>::NoView _A = A;
137 # endif
138
139 //
140 // Call implementation
141 //
142 laswp_generic(A, piv);
143
144 # ifdef CHECK_CXXLAPACK
145 //
146 // Compare results
147 //
148 laswp_native(_A, piv);
149
150 bool failed = false;
151 if (! isIdentical(A, _A, " A", "_A")) {
152 std::cerr << "CXXLAPACK: A = " << A << std::endl;
153 std::cerr << "F77LAPACK: _A = " << _A << std::endl;
154 failed = true;
155 }
156
157 if (failed) {
158 std::cerr << "piv.firstIndex() = " << piv.firstIndex() << std::endl;
159 std::cerr << "piv.lastIndex() = " << piv.lastIndex() << std::endl;
160 std::cerr << "piv.inc() = " << piv.inc() << std::endl;
161 std::cerr << "piv = " << piv << std::endl;
162
163 std::cerr << "org_A = " << org_A << std::endl;
164 ASSERT(0);
165 }
166 # endif
167 }
168
169 //-- forwarding ----------------------------------------------------------------
170 template <typename MA, typename VP>
171 void
172 laswp(MA &&A, const VP &&piv)
173 {
174 CHECKPOINT_ENTER;
175 laswp(A, piv);
176 CHECKPOINT_LEAVE;
177 }
178
179 } } // namespace lapack, flens
180
181 #endif // FLENS_LAPACK_AUX_LASWP_TCC