1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_LEVEL1_AXPY_TCC
34 #define FLENS_BLAS_LEVEL1_AXPY_TCC 1
35
36 #include <cxxblas/cxxblas.h>
37 #include <flens/aux/macros.h>
38 #include <flens/blas/closures/debugclosure.h>
39 #include <flens/typedefs.h>
40
41 #ifdef FLENS_DEBUG_CLOSURES
42 # include <flens/blas/blaslogon.h>
43 #else
44 # include <flens/blas/blaslogoff.h>
45 #endif
46
47 namespace flens { namespace blas {
48
49 //-- axpy
50 template <typename ALPHA, typename VX, typename VY>
51 void
52 axpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
53 {
54 FLENS_BLASLOG_SETTAG("--> ");
55 FLENS_BLASLOG_BEGIN_AXPY(alpha, x, y);
56
57 if (y.length()==0) {
58 //
59 // So we allow y += alpha*x for an empty vector y
60 //
61 typedef typename DenseVector<VY>::ElementType T;
62 const T Zero(0);
63
64 y.resize(x, Zero);
65 }
66 ASSERT(y.length()==x.length());
67
68 # ifdef HAVE_CXXBLAS_AXPY
69 cxxblas::axpy(x.length(), alpha,
70 x.data(), x.stride(),
71 y.data(), y.stride());
72 # else
73 ASSERT(0);
74 # endif
75
76 FLENS_BLASLOG_END;
77 FLENS_BLASLOG_UNSETTAG;
78 }
79
80 //-- geaxpy
81 template <typename ALPHA, typename MA, typename MB>
82 void
83 axpy(Transpose trans,
84 const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
85 {
86 if (B.numRows()==0 || B.numCols()==0) {
87 //
88 // So we allow B += alpha*A for an empty matrix B
89 //
90 typedef typename GeMatrix<MB>::ElementType T;
91 const T Zero(0);
92
93 if ((trans==NoTrans) || (trans==Conj)) {
94 B.resize(A.numRows(), A.numCols(),
95 A.firstRow(), A.firstCol(),
96 Zero);
97 } else {
98 B.resize(A.numCols(), A.numRows(),
99 A.firstCol(), A.firstRow(),
100 Zero);
101 }
102 }
103
104 # ifndef NDEBUG
105 if ((trans==NoTrans) || (trans==Conj)) {
106 ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
107 } else {
108 ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
109 }
110 # endif
111
112
113 trans = (MA::order==MB::order)
114 ? Transpose(trans ^ NoTrans)
115 : Transpose(trans ^ Trans);
116
117
118 # ifndef FLENS_DEBUG_CLOSURES
119 # ifndef NDEBUG
120 if (trans==Trans || trans==ConjTrans) {
121 ASSERT(!DEBUGCLOSURE::identical(A, B));
122 }
123 # endif
124 # else
125 //
126 // If A and B are identical a temporary is needed if we want to use axpy
127 // for B += alpha*A^T or B+= alpha*A^H
128 //
129 if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
130 typename GeMatrix<MA>::NoView _A;
131 FLENS_BLASLOG_TMP_ADD(_A);
132
133 copy(trans, A, _A);
134 axpy(NoTrans, alpha, A, B);
135
136 FLENS_BLASLOG_TMP_REMOVE(_A, A);
137 return;
138 }
139 # endif
140
141
142 FLENS_BLASLOG_SETTAG("--> ");
143 FLENS_BLASLOG_BEGIN_MAXPY(trans, alpha, A, B);
144
145 # ifdef HAVE_CXXBLAS_GEAXPY
146 geaxpy(MB::order, trans,
147 B.numRows(), B.numCols(), alpha,
148 A.data(), A.leadingDimension(),
149 B.data(), B.leadingDimension());
150 # else
151 ASSERT(0);
152 # endif
153 FLENS_BLASLOG_END;
154 FLENS_BLASLOG_UNSETTAG;
155 }
156
157 } } // namespace blas, flens
158
159 #endif // FLENS_BLAS_LEVEL1_AXPY_TCC
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_LEVEL1_AXPY_TCC
34 #define FLENS_BLAS_LEVEL1_AXPY_TCC 1
35
36 #include <cxxblas/cxxblas.h>
37 #include <flens/aux/macros.h>
38 #include <flens/blas/closures/debugclosure.h>
39 #include <flens/typedefs.h>
40
41 #ifdef FLENS_DEBUG_CLOSURES
42 # include <flens/blas/blaslogon.h>
43 #else
44 # include <flens/blas/blaslogoff.h>
45 #endif
46
47 namespace flens { namespace blas {
48
49 //-- axpy
50 template <typename ALPHA, typename VX, typename VY>
51 void
52 axpy(const ALPHA &alpha, const DenseVector<VX> &x, DenseVector<VY> &y)
53 {
54 FLENS_BLASLOG_SETTAG("--> ");
55 FLENS_BLASLOG_BEGIN_AXPY(alpha, x, y);
56
57 if (y.length()==0) {
58 //
59 // So we allow y += alpha*x for an empty vector y
60 //
61 typedef typename DenseVector<VY>::ElementType T;
62 const T Zero(0);
63
64 y.resize(x, Zero);
65 }
66 ASSERT(y.length()==x.length());
67
68 # ifdef HAVE_CXXBLAS_AXPY
69 cxxblas::axpy(x.length(), alpha,
70 x.data(), x.stride(),
71 y.data(), y.stride());
72 # else
73 ASSERT(0);
74 # endif
75
76 FLENS_BLASLOG_END;
77 FLENS_BLASLOG_UNSETTAG;
78 }
79
80 //-- geaxpy
81 template <typename ALPHA, typename MA, typename MB>
82 void
83 axpy(Transpose trans,
84 const ALPHA &alpha, const GeMatrix<MA> &A, GeMatrix<MB> &B)
85 {
86 if (B.numRows()==0 || B.numCols()==0) {
87 //
88 // So we allow B += alpha*A for an empty matrix B
89 //
90 typedef typename GeMatrix<MB>::ElementType T;
91 const T Zero(0);
92
93 if ((trans==NoTrans) || (trans==Conj)) {
94 B.resize(A.numRows(), A.numCols(),
95 A.firstRow(), A.firstCol(),
96 Zero);
97 } else {
98 B.resize(A.numCols(), A.numRows(),
99 A.firstCol(), A.firstRow(),
100 Zero);
101 }
102 }
103
104 # ifndef NDEBUG
105 if ((trans==NoTrans) || (trans==Conj)) {
106 ASSERT((A.numRows()==B.numRows()) && (A.numCols()==B.numCols()));
107 } else {
108 ASSERT((A.numRows()==B.numCols()) && (A.numCols()==B.numRows()));
109 }
110 # endif
111
112
113 trans = (MA::order==MB::order)
114 ? Transpose(trans ^ NoTrans)
115 : Transpose(trans ^ Trans);
116
117
118 # ifndef FLENS_DEBUG_CLOSURES
119 # ifndef NDEBUG
120 if (trans==Trans || trans==ConjTrans) {
121 ASSERT(!DEBUGCLOSURE::identical(A, B));
122 }
123 # endif
124 # else
125 //
126 // If A and B are identical a temporary is needed if we want to use axpy
127 // for B += alpha*A^T or B+= alpha*A^H
128 //
129 if ((trans==Trans || trans==ConjTrans) && DEBUGCLOSURE::identical(A, B)) {
130 typename GeMatrix<MA>::NoView _A;
131 FLENS_BLASLOG_TMP_ADD(_A);
132
133 copy(trans, A, _A);
134 axpy(NoTrans, alpha, A, B);
135
136 FLENS_BLASLOG_TMP_REMOVE(_A, A);
137 return;
138 }
139 # endif
140
141
142 FLENS_BLASLOG_SETTAG("--> ");
143 FLENS_BLASLOG_BEGIN_MAXPY(trans, alpha, A, B);
144
145 # ifdef HAVE_CXXBLAS_GEAXPY
146 geaxpy(MB::order, trans,
147 B.numRows(), B.numCols(), alpha,
148 A.data(), A.leadingDimension(),
149 B.data(), B.leadingDimension());
150 # else
151 ASSERT(0);
152 # endif
153 FLENS_BLASLOG_END;
154 FLENS_BLASLOG_UNSETTAG;
155 }
156
157 } } // namespace blas, flens
158
159 #endif // FLENS_BLAS_LEVEL1_AXPY_TCC