1 /*
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TRMV_TCC
34 #define CXXBLAS_LEVEL2_TRMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 trmv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A, IndexType ldA,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 trmv_generic(RowMajor, upLo, transA, diag, n, A, ldA, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
56 VX _x;
57 dotu_generic(n-i, A+i*(ldA+1), IndexType(1),
58 x+iX, incX, _x);
59 x[iX] = _x;
60 }
61 } else { /* diag==Unit */
62 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
63 VX _x;
64 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
65 x+iX+incX, incX, _x);
66 x[iX] += _x;
67 }
68 }
69 } else { /* upLo==Lower */
70 if (diag==NonUnit) {
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 VX _x;
73 dotu_generic(i+1, A+i*ldA, IndexType(1),
74 x, incX, _x);
75 x[iX] = _x;
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
79 VX _x;
80 dotu_generic(i, A+i*ldA, IndexType(1),
81 x, incX, _x);
82 x[iX] += _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
91 VX _x;
92 dot_generic(n-i, A+i*(ldA+1), IndexType(1),
93 x+iX, incX, _x);
94 x[iX] = _x;
95 }
96 } else { /* diag==Unit */
97 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
98 VX _x;
99 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
100 x+iX+incX, incX, _x);
101 x[iX] += _x;
102 }
103 }
104 } else { /* upLo==Lower */
105 if (diag==NonUnit) {
106 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
107 VX _x;
108 dot_generic(i+1, A+i*ldA, IndexType(1),
109 x, incX, _x);
110 x[iX] = _x;
111 }
112 } else { /* diag==Unit */
113 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
114 VX _x;
115 dot_generic(i, A+i*ldA, IndexType(1),
116 x, incX, _x);
117 x[iX] += _x;
118 }
119 }
120 }
121 }
122 if (transA==Trans) {
123 if (upLo==Upper) {
124 if (diag==NonUnit) {
125 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
126 VX _x;
127 dotu_generic(i+1, A+i, IndexType(ldA),
128 x, incX, _x);
129 x[iX] = _x;
130 }
131 } else { /* diag==Unit */
132 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
133 VX _x;
134 dotu_generic(i, A+i, IndexType(ldA),
135 x, incX, _x);
136 x[iX] += _x;
137 }
138 }
139 } else { /* upLo==Lower */
140 if (diag==NonUnit) {
141 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
142 VX _x;
143 dotu_generic(n-i, A+i*(ldA+1), IndexType(ldA),
144 x+iX, incX, _x);
145 x[iX] = _x;
146 }
147 } else {
148 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
149 VX _x;
150 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
151 x+iX+incX, incX, _x);
152 x[iX] += _x;
153 }
154 }
155 }
156 }
157 if (transA==ConjTrans) {
158 if (upLo==Upper) {
159 if (diag==NonUnit) {
160 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
161 VX _x;
162 dot_generic(i+1, A+i, IndexType(ldA),
163 x, incX, _x);
164 x[iX] = _x;
165 }
166 } else { /* diag==Unit */
167 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
168 VX _x;
169 dot_generic(i, A+i, IndexType(ldA),
170 x, incX, _x);
171 x[iX] += _x;
172 }
173 }
174 } else { /* upLo==Lower */
175 if (diag==NonUnit) {
176 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
177 VX _x;
178 dot_generic(n-i, A+i*(ldA+1), IndexType(ldA),
179 x+iX, incX, _x);
180 x[iX] = _x;
181 }
182 } else {
183 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
184 VX _x;
185 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
186 x+iX+incX, incX, _x);
187 x[iX] += _x;
188 }
189 }
190 }
191 }
192 }
193
194 template <typename IndexType, typename MA, typename VX>
195 void
196 trmv(StorageOrder order, StorageUpLo upLo,
197 Transpose transA, Diag diag,
198 IndexType n,
199 const MA *A, IndexType ldA,
200 VX *x, IndexType incX)
201 {
202 CXXBLAS_DEBUG_OUT("trmv_generic");
203
204 if (incX<0) {
205 x -= incX*(n-1);
206 }
207 trmv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
208 }
209
210 #ifdef HAVE_CBLAS
211
212 // strmv
213 template <typename IndexType>
214 typename If<IndexType>::isBlasCompatibleInteger
215 trmv(StorageOrder order, StorageUpLo upLo,
216 Transpose transA, Diag diag,
217 IndexType n,
218 const float *A, IndexType ldA,
219 float *x, IndexType incX)
220 {
221 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strmv");
222
223 cblas_strmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
224 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
225 n,
226 A, ldA,
227 x, incX);
228 }
229
230 // dtrmv
231 template <typename IndexType>
232 typename If<IndexType>::isBlasCompatibleInteger
233 trmv(StorageOrder order, StorageUpLo upLo,
234 Transpose transA, Diag diag,
235 IndexType n,
236 const double *A, IndexType ldA,
237 double *x, IndexType incX)
238 {
239 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrmv");
240
241 cblas_dtrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
242 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
243 n,
244 A, ldA,
245 x, incX);
246 }
247
248 // ctrmv
249 template <typename IndexType>
250 typename If<IndexType>::isBlasCompatibleInteger
251 trmv(StorageOrder order, StorageUpLo upLo,
252 Transpose transA, Diag diag,
253 IndexType n,
254 const ComplexFloat *A, IndexType ldA,
255 ComplexFloat *x, IndexType incX)
256 {
257 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrmv");
258
259 cblas_ctrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
260 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
261 n,
262 reinterpret_cast<const float *>(A), ldA,
263 reinterpret_cast<const float *>(x), incX);
264 }
265
266 // ztrmv
267 template <typename IndexType>
268 typename If<IndexType>::isBlasCompatibleInteger
269 trmv(StorageOrder order, StorageUpLo upLo,
270 Transpose transA, Diag diag,
271 IndexType n,
272 const ComplexDouble *A, IndexType ldA,
273 ComplexDouble *x, IndexType incX)
274 {
275 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrmv");
276
277 cblas_ztrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
278 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
279 n,
280 reinterpret_cast<const double *>(A), ldA,
281 reinterpret_cast<const double *>(x), incX);
282 }
283
284 #endif // HAVE_CBLAS
285
286 } // namespace flens
287
288 #endif // CXXBLAS_LEVEL2_TRMV_TCC
289
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TRMV_TCC
34 #define CXXBLAS_LEVEL2_TRMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 trmv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A, IndexType ldA,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 trmv_generic(RowMajor, upLo, transA, diag, n, A, ldA, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
56 VX _x;
57 dotu_generic(n-i, A+i*(ldA+1), IndexType(1),
58 x+iX, incX, _x);
59 x[iX] = _x;
60 }
61 } else { /* diag==Unit */
62 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
63 VX _x;
64 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
65 x+iX+incX, incX, _x);
66 x[iX] += _x;
67 }
68 }
69 } else { /* upLo==Lower */
70 if (diag==NonUnit) {
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 VX _x;
73 dotu_generic(i+1, A+i*ldA, IndexType(1),
74 x, incX, _x);
75 x[iX] = _x;
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
79 VX _x;
80 dotu_generic(i, A+i*ldA, IndexType(1),
81 x, incX, _x);
82 x[iX] += _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
91 VX _x;
92 dot_generic(n-i, A+i*(ldA+1), IndexType(1),
93 x+iX, incX, _x);
94 x[iX] = _x;
95 }
96 } else { /* diag==Unit */
97 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
98 VX _x;
99 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
100 x+iX+incX, incX, _x);
101 x[iX] += _x;
102 }
103 }
104 } else { /* upLo==Lower */
105 if (diag==NonUnit) {
106 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
107 VX _x;
108 dot_generic(i+1, A+i*ldA, IndexType(1),
109 x, incX, _x);
110 x[iX] = _x;
111 }
112 } else { /* diag==Unit */
113 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
114 VX _x;
115 dot_generic(i, A+i*ldA, IndexType(1),
116 x, incX, _x);
117 x[iX] += _x;
118 }
119 }
120 }
121 }
122 if (transA==Trans) {
123 if (upLo==Upper) {
124 if (diag==NonUnit) {
125 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
126 VX _x;
127 dotu_generic(i+1, A+i, IndexType(ldA),
128 x, incX, _x);
129 x[iX] = _x;
130 }
131 } else { /* diag==Unit */
132 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
133 VX _x;
134 dotu_generic(i, A+i, IndexType(ldA),
135 x, incX, _x);
136 x[iX] += _x;
137 }
138 }
139 } else { /* upLo==Lower */
140 if (diag==NonUnit) {
141 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
142 VX _x;
143 dotu_generic(n-i, A+i*(ldA+1), IndexType(ldA),
144 x+iX, incX, _x);
145 x[iX] = _x;
146 }
147 } else {
148 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
149 VX _x;
150 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
151 x+iX+incX, incX, _x);
152 x[iX] += _x;
153 }
154 }
155 }
156 }
157 if (transA==ConjTrans) {
158 if (upLo==Upper) {
159 if (diag==NonUnit) {
160 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
161 VX _x;
162 dot_generic(i+1, A+i, IndexType(ldA),
163 x, incX, _x);
164 x[iX] = _x;
165 }
166 } else { /* diag==Unit */
167 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
168 VX _x;
169 dot_generic(i, A+i, IndexType(ldA),
170 x, incX, _x);
171 x[iX] += _x;
172 }
173 }
174 } else { /* upLo==Lower */
175 if (diag==NonUnit) {
176 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
177 VX _x;
178 dot_generic(n-i, A+i*(ldA+1), IndexType(ldA),
179 x+iX, incX, _x);
180 x[iX] = _x;
181 }
182 } else {
183 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
184 VX _x;
185 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
186 x+iX+incX, incX, _x);
187 x[iX] += _x;
188 }
189 }
190 }
191 }
192 }
193
194 template <typename IndexType, typename MA, typename VX>
195 void
196 trmv(StorageOrder order, StorageUpLo upLo,
197 Transpose transA, Diag diag,
198 IndexType n,
199 const MA *A, IndexType ldA,
200 VX *x, IndexType incX)
201 {
202 CXXBLAS_DEBUG_OUT("trmv_generic");
203
204 if (incX<0) {
205 x -= incX*(n-1);
206 }
207 trmv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
208 }
209
210 #ifdef HAVE_CBLAS
211
212 // strmv
213 template <typename IndexType>
214 typename If<IndexType>::isBlasCompatibleInteger
215 trmv(StorageOrder order, StorageUpLo upLo,
216 Transpose transA, Diag diag,
217 IndexType n,
218 const float *A, IndexType ldA,
219 float *x, IndexType incX)
220 {
221 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strmv");
222
223 cblas_strmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
224 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
225 n,
226 A, ldA,
227 x, incX);
228 }
229
230 // dtrmv
231 template <typename IndexType>
232 typename If<IndexType>::isBlasCompatibleInteger
233 trmv(StorageOrder order, StorageUpLo upLo,
234 Transpose transA, Diag diag,
235 IndexType n,
236 const double *A, IndexType ldA,
237 double *x, IndexType incX)
238 {
239 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrmv");
240
241 cblas_dtrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
242 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
243 n,
244 A, ldA,
245 x, incX);
246 }
247
248 // ctrmv
249 template <typename IndexType>
250 typename If<IndexType>::isBlasCompatibleInteger
251 trmv(StorageOrder order, StorageUpLo upLo,
252 Transpose transA, Diag diag,
253 IndexType n,
254 const ComplexFloat *A, IndexType ldA,
255 ComplexFloat *x, IndexType incX)
256 {
257 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrmv");
258
259 cblas_ctrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
260 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
261 n,
262 reinterpret_cast<const float *>(A), ldA,
263 reinterpret_cast<const float *>(x), incX);
264 }
265
266 // ztrmv
267 template <typename IndexType>
268 typename If<IndexType>::isBlasCompatibleInteger
269 trmv(StorageOrder order, StorageUpLo upLo,
270 Transpose transA, Diag diag,
271 IndexType n,
272 const ComplexDouble *A, IndexType ldA,
273 ComplexDouble *x, IndexType incX)
274 {
275 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrmv");
276
277 cblas_ztrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
278 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
279 n,
280 reinterpret_cast<const double *>(A), ldA,
281 reinterpret_cast<const double *>(x), incX);
282 }
283
284 #endif // HAVE_CBLAS
285
286 } // namespace flens
287
288 #endif // CXXBLAS_LEVEL2_TRMV_TCC
289