1 /*
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_LEVEL1_RAXPY_TCC
34 #define FLENS_BLAS_LEVEL1_RAXPY_TCC 1
35
36 #include <cxxblas/cxxblas.h>
37 #include <flens/aux/macros.h>
38 #include <flens/typedefs.h>
39
40 #ifdef FLENS_DEBUG_CLOSURES
41 # include <flens/blas/blaslogon.h>
42 #else
43 # include <flens/blas/blaslogoff.h>
44 #endif
45
46 namespace flens { namespace blas {
47
48 //-- raxpy
49 template <typename ALPHA, typename VX, typename VY>
50 void
51 raxpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
52 {
53 FLENS_BLASLOG_SETTAG("--> ");
54 FLENS_BLASLOG_BEGIN_RAXPY(alpha, x, y);
55
56 if (y.length()==0) {
57 //
58 // So we allow y += 1/alpha*x for an empty vector y
59 //
60 typedef typename DenseVector<VY>::ElementType T;
61 const T Zero(0);
62
63 y.resize(x, Zero);
64 }
65 ASSERT(y.length()==x.length());
66
67 # ifdef HAVE_CXXBLAS_RAXPY
68 cxxblas::raxpy(x.length(), alpha,
69 x.data(), x.stride(),
70 y.data(), y.stride());
71 # else
72 ASSERT(0);
73 # endif
74 FLENS_BLASLOG_END;
75 FLENS_BLASLOG_UNSETTAG;
76 }
77
78 //-- geraxpy
79 //
80 // B += A/alpha
81 //
82 template <typename ALPHA, typename MA, typename MB>
83 void
84 raxpy(Transpose trans,
85 const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
86 {
87 if (B.numRows()==0 || B.numCols()==0) {
88 //
89 // So we allow B += 1/alpha*A for an empty matrix B
90 //
91 typedef typename GeMatrix<MB>::ElementType T;
92 const T Zero(0);
93
94 if ((trans==NoTrans) || (trans==Conj)) {
95 B.resize(A.numRows(), A.numCols(), Zero);
96 } else {
97 B.resize(A.numCols(), A.numRows(), Zero);
98 }
99 }
100
101 # ifndef NDEBUG
102 if ((trans==NoTrans) || (trans==Conj)) {
103 ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
104 } else {
105 ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
106 }
107 # endif
108
109
110 trans = (MA::order==MB::order)
111 ? Transpose(trans ^ NoTrans)
112 : Transpose(trans ^ Trans);
113
114
115 # ifndef FLENS_DEBUG_CLOSURES
116 # ifndef NDEBUG
117 if (trans==Trans || trans==ConjTrans) {
118 ASSERT(!DEBUGCLOSURE::identical(A, B));
119 }
120 # endif
121 # else
122 //
123 // If A and B are identical a temporary is needed if we want to use axpy
124 // for B += A^T/alpha or B+= A^H/alpha
125 //
126 if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
127 typename Result<GeMatrix<MA> >::Type _A = A;
128 FLENS_BLASLOG_TMP_ADD(_A);
129
130 axpy(trans, alpha, _A, B);
131
132 FLENS_BLASLOG_TMP_REMOVE(_A, A);
133 return;
134 }
135 # endif
136
137 FLENS_BLASLOG_SETTAG("--> ");
138 FLENS_BLASLOG_BEGIN_MRAXPY(trans, alpha, A, B);
139
140 # ifdef HAVE_CXXBLAS_GERAXPY
141 geraxpy(MB::order, trans,
142 B.numRows(), B.numCols(), alpha,
143 A.data(), A.leadingDimension(),
144 B.data(), B.leadingDimension());
145 # else
146 ASSERT(0);
147 # endif
148 FLENS_BLASLOG_END;
149 FLENS_BLASLOG_UNSETTAG;
150 }
151
152
153 } } // namespace blas, flens
154
155 #endif // FLENS_BLAS_LEVEL1_RAXPY_TCC
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_LEVEL1_RAXPY_TCC
34 #define FLENS_BLAS_LEVEL1_RAXPY_TCC 1
35
36 #include <cxxblas/cxxblas.h>
37 #include <flens/aux/macros.h>
38 #include <flens/typedefs.h>
39
40 #ifdef FLENS_DEBUG_CLOSURES
41 # include <flens/blas/blaslogon.h>
42 #else
43 # include <flens/blas/blaslogoff.h>
44 #endif
45
46 namespace flens { namespace blas {
47
48 //-- raxpy
49 template <typename ALPHA, typename VX, typename VY>
50 void
51 raxpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
52 {
53 FLENS_BLASLOG_SETTAG("--> ");
54 FLENS_BLASLOG_BEGIN_RAXPY(alpha, x, y);
55
56 if (y.length()==0) {
57 //
58 // So we allow y += 1/alpha*x for an empty vector y
59 //
60 typedef typename DenseVector<VY>::ElementType T;
61 const T Zero(0);
62
63 y.resize(x, Zero);
64 }
65 ASSERT(y.length()==x.length());
66
67 # ifdef HAVE_CXXBLAS_RAXPY
68 cxxblas::raxpy(x.length(), alpha,
69 x.data(), x.stride(),
70 y.data(), y.stride());
71 # else
72 ASSERT(0);
73 # endif
74 FLENS_BLASLOG_END;
75 FLENS_BLASLOG_UNSETTAG;
76 }
77
78 //-- geraxpy
79 //
80 // B += A/alpha
81 //
82 template <typename ALPHA, typename MA, typename MB>
83 void
84 raxpy(Transpose trans,
85 const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
86 {
87 if (B.numRows()==0 || B.numCols()==0) {
88 //
89 // So we allow B += 1/alpha*A for an empty matrix B
90 //
91 typedef typename GeMatrix<MB>::ElementType T;
92 const T Zero(0);
93
94 if ((trans==NoTrans) || (trans==Conj)) {
95 B.resize(A.numRows(), A.numCols(), Zero);
96 } else {
97 B.resize(A.numCols(), A.numRows(), Zero);
98 }
99 }
100
101 # ifndef NDEBUG
102 if ((trans==NoTrans) || (trans==Conj)) {
103 ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
104 } else {
105 ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
106 }
107 # endif
108
109
110 trans = (MA::order==MB::order)
111 ? Transpose(trans ^ NoTrans)
112 : Transpose(trans ^ Trans);
113
114
115 # ifndef FLENS_DEBUG_CLOSURES
116 # ifndef NDEBUG
117 if (trans==Trans || trans==ConjTrans) {
118 ASSERT(!DEBUGCLOSURE::identical(A, B));
119 }
120 # endif
121 # else
122 //
123 // If A and B are identical a temporary is needed if we want to use axpy
124 // for B += A^T/alpha or B+= A^H/alpha
125 //
126 if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
127 typename Result<GeMatrix<MA> >::Type _A = A;
128 FLENS_BLASLOG_TMP_ADD(_A);
129
130 axpy(trans, alpha, _A, B);
131
132 FLENS_BLASLOG_TMP_REMOVE(_A, A);
133 return;
134 }
135 # endif
136
137 FLENS_BLASLOG_SETTAG("--> ");
138 FLENS_BLASLOG_BEGIN_MRAXPY(trans, alpha, A, B);
139
140 # ifdef HAVE_CXXBLAS_GERAXPY
141 geraxpy(MB::order, trans,
142 B.numRows(), B.numCols(), alpha,
143 A.data(), A.leadingDimension(),
144 B.data(), B.leadingDimension());
145 # else
146 ASSERT(0);
147 # endif
148 FLENS_BLASLOG_END;
149 FLENS_BLASLOG_UNSETTAG;
150 }
151
152
153 } } // namespace blas, flens
154
155 #endif // FLENS_BLAS_LEVEL1_RAXPY_TCC