1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /*
 34        SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
 35  *
 36  *  -- LAPACK auxiliary routine (version 3.2) --
 37  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 38  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 39  *     November 2006
 40  */
 41 
 42 #ifndef FLENS_LAPACK_AUX_LASWP_TCC
 43 #define FLENS_LAPACK_AUX_LASWP_TCC 1
 44 
 45 #include <flens/blas/blas.h>
 46 #include <flens/lapack/lapack.h>
 47 
 48 namespace flens { namespace lapack {
 49 
 50 //== generic lapack implementation =============================================
 51 
 52 template <typename MA, typename VP>
 53 void
 54 laswp_generic(GeMatrix<MA> &A, const DenseVector<VP> &piv)
 55 {
 56     typedef typename GeMatrix<MA>::IndexType IndexType;
 57     typedef Range<IndexType>                 Range;
 58 
 59     const IndexType n = A.numCols();
 60 
 61 //
 62 //  Interchange row i with row piv(i) for each of rows K1 through K2.
 63 //
 64     const IndexType pBeg = piv.firstIndex();
 65     const IndexType pEnd = piv.endIndex();
 66     const IndexType pInc = piv.inc();
 67 
 68     const IndexType bs = 32;
 69     const IndexType nbs = (n/bs)*bs;
 70 
 71     if (nbs!=0) {
 72         for (IndexType j=1; j<=nbs; j+=bs) {
 73             const Range cols(j,j+bs-1);
 74             for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
 75                 const IndexType iP = piv(i);
 76                 if (iP!=i) {
 77                     blas::swap(A(i, cols), A(iP, cols));
 78                 }
 79             }
 80         }
 81     }
 82     if (nbs!=n) {
 83         const Range cols(nbs+1,n);
 84         for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
 85             const IndexType iP = piv(i);
 86             if (iP!=i) {
 87                 blas::swap(A(i, cols), A(iP, cols));
 88             }
 89         }
 90     }
 91 }
 92 
 93 //== interface for native lapack ===============================================
 94 
 95 #ifdef CHECK_CXXLAPACK
 96 
 97 template <typename MA, typename VP>
 98 void
 99 laswp_native(GeMatrix<MA> &A, const DenseVector<VP> &piv)
100 {
101     using std::max;
102     using std::min;
103 
104     const INTEGER    k1 = min(piv.firstIndex(), piv.lastIndex());
105     const INTEGER    k2 = max(piv.firstIndex(), piv.lastIndex());
106 
107     // set pointer IPIV such that IPIV[K1] points to piv.data()
108     cxxlapack::laswp<INTEGER>(A.numCols(), A.data(), A.leadingDimension(),
109                               k1, k2,
110                               piv.data()-k1+1, piv.inc());
111 }
112 
113 #endif // CHECK_CXXLAPACK
114 
115 //== public interface ==========================================================
116 
117 template <typename MA, typename VP>
118 void
119 laswp(GeMatrix<MA> &A, const DenseVector<VP> &piv)
120 {
121 //
122 //  Test the input parameters
123 //
124     ASSERT(A.firstRow()==1);
125     ASSERT(A.firstCol()==1);
126     ASSERT((piv.inc()>0 && piv.firstIndex()>=A.firstRow())
127         || (piv.inc()<0 && piv.firstIndex()<=A.numRows()));
128     ASSERT((piv.inc()>0 && piv.lastIndex()<=A.numRows())
129         || (piv.inc()<0 && piv.lastIndex()>=A.firstRow()));
130 
131 #   ifdef CHECK_CXXLAPACK
132 //
133 //  Make copies of output arguments
134 //
135     typename GeMatrix<MA>::NoView       org_A   = A;
136     typename GeMatrix<MA>::NoView       _A      = A;
137 #   endif
138 
139 //
140 //  Call implementation
141 //
142     laswp_generic(A, piv);
143 
144 #   ifdef CHECK_CXXLAPACK
145 //
146 //  Compare results
147 //
148     laswp_native(_A, piv);
149 
150     bool failed = false;
151     if (! isIdentical(A, _A, " A""_A")) {
152         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
153         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
154         failed = true;
155     }
156 
157     if (failed) {
158         std::cerr << "piv.firstIndex() = " << piv.firstIndex() << std::endl;
159         std::cerr << "piv.lastIndex() = " << piv.lastIndex() << std::endl;
160         std::cerr << "piv.inc() = " << piv.inc() << std::endl;
161         std::cerr << "piv = " << piv << std::endl;
162 
163         std::cerr << "org_A = " << org_A << std::endl;
164         ASSERT(0);
165     }
166 #   endif
167 }
168 
169 //-- forwarding ----------------------------------------------------------------
170 template <typename MA, typename VP>
171 void
172 laswp(MA &&A, const VP &&piv)
173 {
174     CHECKPOINT_ENTER;
175     laswp(A, piv);
176     CHECKPOINT_LEAVE;
177 }
178 
179 } } // namespace lapack, flens
180 
181 #endif // FLENS_LAPACK_AUX_LASWP_TCC