1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /*
 34        SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
 35  *
 36  *  -- LAPACK auxiliary routine (version 3.2) --
 37  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 38  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 39  *     November 2006
 40  */
 41 
 42 #ifndef FLENS_LAPACK_AUX_LASWP_TCC
 43 #define FLENS_LAPACK_AUX_LASWP_TCC 1
 44 
 45 #include <flens/blas/blas.h>
 46 #include <flens/lapack/lapack.h>
 47 
 48 namespace flens { namespace lapack {
 49 
 50 //== generic lapack implementation =============================================
 51 
 52 template <typename MA, typename VP>
 53 void
 54 laswp_generic(GeMatrix<MA> &A, const DenseVector<VP> &piv)
 55 {
 56     typedef typename GeMatrix<MA>::IndexType IndexType;
 57     typedef Range<IndexType>                 Range;
 58 
 59     const IndexType n = A.numCols();
 60 
 61 //
 62 //  Interchange row i with row piv(i) for each of rows K1 through K2.
 63 //
 64     const IndexType pBeg = piv.firstIndex();
 65     const IndexType pEnd = piv.endIndex();
 66     const IndexType pInc = piv.inc();
 67 
 68     const IndexType bs = 32;
 69     const IndexType nbs = (n/bs)*bs;
 70 
 71     if (nbs!=0) {
 72         for (IndexType j=1; j<=nbs; j+=bs) {
 73             const Range cols(j,j+bs-1);
 74             for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
 75                 const IndexType iP = piv(i);
 76                 if (iP!=i) {
 77                     blas::swap(A(i, cols), A(iP, cols));
 78                 }
 79             }
 80         }
 81     }
 82     if (nbs!=n) {
 83         const Range cols(nbs+1,n);
 84         for (IndexType i=pBeg; i!=pEnd; i+=pInc) {
 85             const IndexType iP = piv(i);
 86             if (iP!=i) {
 87                 blas::swap(A(i, cols), A(iP, cols));
 88             }
 89         }
 90     }
 91 }
 92 
 93 //== interface for native lapack ===============================================
 94 
 95 #ifdef CHECK_CXXLAPACK
 96 
 97 template <typename MA, typename VP>
 98 void
 99 laswp_native(GeMatrix<MA> &_A, const DenseVector<VP> &piv)
100 {
101     using std::max;
102     using std::min;
103 
104     typedef typename GeMatrix<MA>::ElementType  T;
105 
106     const INTEGER    N = _A.numCols();
107     T               *A = _A.data();
108     const INTEGER    LDA = _A.leadingDimension();
109     const INTEGER    K1 = min(piv.firstIndex(), piv.lastIndex());
110     const INTEGER    K2 = max(piv.firstIndex(), piv.lastIndex());
111 
112     // set pointer IPIV such that IPIV[K1] points to piv.data()
113     // (assumes an index base of 1)
114     const INTEGER   *IPIV = piv.data() - K1 + 1;
115     INTEGER          INCX = piv.inc();
116 
117     if (IsSame<T, DOUBLE>::value) {
118         LAPACK_IMPL(dlaswp)(&N, A, &LDA, &K1, &K2, IPIV, &INCX);
119     } else {
120         ASSERT(0);
121     }
122 }
123 
124 #endif // CHECK_CXXLAPACK
125 
126 //== public interface ==========================================================
127 
128 template <typename MA, typename VP>
129 void
130 laswp(GeMatrix<MA> &A, const DenseVector<VP> &piv)
131 {
132 //
133 //  Test the input parameters
134 //
135     ASSERT(A.firstRow()==1);
136     ASSERT(A.firstCol()==1);
137     ASSERT((piv.inc()>0 && piv.firstIndex()>=A.firstRow())
138         || (piv.inc()<0 && piv.firstIndex()<=A.numRows()));
139     ASSERT((piv.inc()>0 && piv.lastIndex()<=A.numRows())
140         || (piv.inc()<0 && piv.lastIndex()>=A.firstRow()));
141 
142 #   ifdef CHECK_CXXLAPACK
143 //
144 //  Make copies of output arguments
145 //
146     typename GeMatrix<MA>::NoView       org_A   = A;
147     typename GeMatrix<MA>::NoView       _A      = A;
148 #   endif
149 
150 //
151 //  Call implementation
152 //
153     laswp_generic(A, piv);
154 
155 #   ifdef CHECK_CXXLAPACK
156 //
157 //  Compare results
158 //
159     laswp_native(_A, piv);
160 
161     bool failed = false;
162     if (! isIdentical(A, _A, " A""_A")) {
163         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
164         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
165         failed = true;
166     }
167 
168     if (failed) {
169         std::cerr << "piv.firstIndex() = " << piv.firstIndex() << std::endl;
170         std::cerr << "piv.lastIndex() = " << piv.lastIndex() << std::endl;
171         std::cerr << "piv.inc() = " << piv.inc() << std::endl;
172         std::cerr << "piv = " << piv << std::endl;
173 
174         std::cerr << "org_A = " << org_A << std::endl;
175         ASSERT(0);
176     }
177 #   endif
178 }
179 
180 //-- forwarding ----------------------------------------------------------------
181 template <typename MA, typename VP>
182 void
183 laswp(MA &&A, const VP &&piv)
184 {
185     CHECKPOINT_ENTER;
186     laswp(A, piv);
187     CHECKPOINT_LEAVE;
188 }
189 
190 } } // namespace lapack, flens
191 
192 #endif // FLENS_LAPACK_AUX_LASWP_TCC