1 /*
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TRSV_TCC
34 #define CXXBLAS_LEVEL2_TRSV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 trsv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A, IndexType ldA,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Lower) ? Upper : Lower;
49 }
50 if (transA==NoTrans) {
51 if (upLo==Upper) {
52 if (diag==NonUnit) {
53 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
54 VX _x;
55 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
56 x+iX+incX, incX, _x);
57 x[iX] -= _x;
58 x[iX] /= A[i*(ldA+1)];
59 }
60 } else { /* diag==Unit */
61 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
62 VX _x;
63 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
64 x+iX+incX, incX, _x);
65 x[iX] -= _x;
66 }
67 }
68 } else { /* upLo==Lower */
69 if (diag==NonUnit) {
70 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
71 VX _x;
72 dotu_generic(i, A+i*ldA, IndexType(1),
73 x, incX, _x);
74 x[iX] -= _x;
75 x[iX] /= A[i*(ldA+1)];
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
79 VX _x;
80 dotu_generic(i, A+i*ldA, IndexType(1),
81 x, incX, _x);
82 x[iX] -= _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
91 VX _x;
92 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
93 x+iX+incX, incX, _x);
94 x[iX] -= _x;
95 x[iX] /= conjugate(A[i*(ldA+1)]);
96 }
97 } else { /* diag==Unit */
98 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
99 VX _x;
100 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
101 x+iX+incX, incX, _x);
102 x[iX] -= _x;
103 }
104 }
105 } else { /* upLo==Lower */
106 if (diag==NonUnit) {
107 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
108 VX _x;
109 dot_generic(i, A+i*ldA, IndexType(1),
110 x, incX, _x);
111 x[iX] -= _x;
112 x[iX] /= conjugate(A[i*(ldA+1)]);
113 }
114 } else { /* diag==Unit */
115 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
116 VX _x;
117 dot_generic(i, A+i*ldA, IndexType(1),
118 x, incX, _x);
119 x[iX] -= _x;
120 }
121 }
122 }
123 }
124 if (transA==Trans) {
125 if (upLo==Upper) {
126 if (diag==NonUnit) {
127 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
128 VX _x;
129 dotu_generic(i, A+i, IndexType(ldA),
130 x, incX, _x);
131 x[iX] -= _x;
132 x[iX] /= A[i*(ldA+1)];
133 }
134 } else { /* diag==Unit */
135 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
136 VX _x;
137 dotu_generic(i, A+i, IndexType(ldA),
138 x, incX, _x);
139 x[iX] -= _x;
140 }
141 }
142 } else { /* upLo==Lower */
143 if (diag==NonUnit) {
144 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
145 VX _x;
146 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
147 x+iX+incX, incX, _x);
148 x[iX] -= _x;
149 x[iX] /= A[i*(ldA+1)];
150 }
151 } else {
152 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
153 VX _x;
154 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
155 x+iX+incX, incX, _x);
156 x[iX] -= _x;
157 }
158 }
159 }
160 }
161 if (transA==ConjTrans) {
162 if (upLo==Upper) {
163 if (diag==NonUnit) {
164 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
165 VX _x;
166 dot_generic(i, A+i, IndexType(ldA),
167 x, incX, _x);
168 x[iX] -= _x;
169 x[iX] /= conjugate(A[i*(ldA+1)]);
170 }
171 } else { /* diag==Unit */
172 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
173 VX _x;
174 dot_generic(i, A+i, IndexType(ldA),
175 x, incX, _x);
176 x[iX] -= _x;
177 }
178 }
179 } else { /* upLo==Lower */
180 if (diag==NonUnit) {
181 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
182 VX _x;
183 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
184 x+iX+incX, incX, _x);
185 x[iX] -= _x;
186 x[iX] /= conjugate(A[i*(ldA+1)]);
187 }
188 } else {
189 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
190 VX _x;
191 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
192 x+iX+incX, incX, _x);
193 x[iX] -= _x;
194 }
195 }
196 }
197 }
198 }
199
200 template <typename IndexType, typename MA, typename VX>
201 void
202 trsv(StorageOrder order, StorageUpLo upLo,
203 Transpose transA, Diag diag,
204 IndexType n,
205 const MA *A, IndexType ldA,
206 VX *x, IndexType incX)
207 {
208 CXXBLAS_DEBUG_OUT("trsv_generic");
209
210 if (incX<0) {
211 x -= incX*(n-1);
212 }
213 trsv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
214 }
215
216 #ifdef HAVE_CBLAS
217
218 // strsv
219 template <typename IndexType>
220 typename If<IndexType>::isBlasCompatibleInteger
221 trsv(StorageOrder order, StorageUpLo upLo,
222 Transpose transA, Diag diag,
223 IndexType n,
224 const float *A, IndexType ldA,
225 float *x, IndexType incX)
226 {
227 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strsv");
228
229 cblas_strsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
230 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
231 n,
232 A, ldA,
233 x, incX);
234 }
235
236 // dtrsv
237 template <typename IndexType>
238 typename If<IndexType>::isBlasCompatibleInteger
239 trsv(StorageOrder order, StorageUpLo upLo,
240 Transpose transA, Diag diag,
241 IndexType n,
242 const double *A, IndexType ldA,
243 double *x, IndexType incX)
244 {
245 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrsv");
246
247 cblas_dtrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
248 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
249 n,
250 A, ldA,
251 x, incX);
252 }
253
254 // ctrsv
255 template <typename IndexType>
256 typename If<IndexType>::isBlasCompatibleInteger
257 trsv(StorageOrder order, StorageUpLo upLo,
258 Transpose transA, Diag diag,
259 IndexType n,
260 const ComplexFloat *A, IndexType ldA,
261 ComplexFloat *x, IndexType incX)
262 {
263 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrsv");
264
265 cblas_ctrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
266 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
267 n,
268 reinterpret_cast<const float *>(A), ldA,
269 reinterpret_cast<const float *>(x), incX);
270 }
271
272 // ztrsv
273 template <typename IndexType>
274 typename If<IndexType>::isBlasCompatibleInteger
275 trsv(StorageOrder order, StorageUpLo upLo,
276 Transpose transA, Diag diag,
277 IndexType n,
278 const ComplexDouble *A, IndexType ldA,
279 ComplexDouble *x, IndexType incX)
280 {
281 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrsv");
282
283 cblas_ztrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
284 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
285 n,
286 reinterpret_cast<const double *>(A), ldA,
287 reinterpret_cast<const double *>(x), incX);
288 }
289
290 #endif // HAVE_CBLAS
291
292 } // namespace cxxblas
293
294 #endif // CXXBLAS_LEVEL2_TRSV_TCC
295
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TRSV_TCC
34 #define CXXBLAS_LEVEL2_TRSV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 trsv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A, IndexType ldA,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Lower) ? Upper : Lower;
49 }
50 if (transA==NoTrans) {
51 if (upLo==Upper) {
52 if (diag==NonUnit) {
53 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
54 VX _x;
55 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
56 x+iX+incX, incX, _x);
57 x[iX] -= _x;
58 x[iX] /= A[i*(ldA+1)];
59 }
60 } else { /* diag==Unit */
61 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
62 VX _x;
63 dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
64 x+iX+incX, incX, _x);
65 x[iX] -= _x;
66 }
67 }
68 } else { /* upLo==Lower */
69 if (diag==NonUnit) {
70 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
71 VX _x;
72 dotu_generic(i, A+i*ldA, IndexType(1),
73 x, incX, _x);
74 x[iX] -= _x;
75 x[iX] /= A[i*(ldA+1)];
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
79 VX _x;
80 dotu_generic(i, A+i*ldA, IndexType(1),
81 x, incX, _x);
82 x[iX] -= _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
91 VX _x;
92 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
93 x+iX+incX, incX, _x);
94 x[iX] -= _x;
95 x[iX] /= conjugate(A[i*(ldA+1)]);
96 }
97 } else { /* diag==Unit */
98 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
99 VX _x;
100 dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
101 x+iX+incX, incX, _x);
102 x[iX] -= _x;
103 }
104 }
105 } else { /* upLo==Lower */
106 if (diag==NonUnit) {
107 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
108 VX _x;
109 dot_generic(i, A+i*ldA, IndexType(1),
110 x, incX, _x);
111 x[iX] -= _x;
112 x[iX] /= conjugate(A[i*(ldA+1)]);
113 }
114 } else { /* diag==Unit */
115 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
116 VX _x;
117 dot_generic(i, A+i*ldA, IndexType(1),
118 x, incX, _x);
119 x[iX] -= _x;
120 }
121 }
122 }
123 }
124 if (transA==Trans) {
125 if (upLo==Upper) {
126 if (diag==NonUnit) {
127 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
128 VX _x;
129 dotu_generic(i, A+i, IndexType(ldA),
130 x, incX, _x);
131 x[iX] -= _x;
132 x[iX] /= A[i*(ldA+1)];
133 }
134 } else { /* diag==Unit */
135 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
136 VX _x;
137 dotu_generic(i, A+i, IndexType(ldA),
138 x, incX, _x);
139 x[iX] -= _x;
140 }
141 }
142 } else { /* upLo==Lower */
143 if (diag==NonUnit) {
144 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
145 VX _x;
146 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
147 x+iX+incX, incX, _x);
148 x[iX] -= _x;
149 x[iX] /= A[i*(ldA+1)];
150 }
151 } else {
152 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
153 VX _x;
154 dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
155 x+iX+incX, incX, _x);
156 x[iX] -= _x;
157 }
158 }
159 }
160 }
161 if (transA==ConjTrans) {
162 if (upLo==Upper) {
163 if (diag==NonUnit) {
164 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
165 VX _x;
166 dot_generic(i, A+i, IndexType(ldA),
167 x, incX, _x);
168 x[iX] -= _x;
169 x[iX] /= conjugate(A[i*(ldA+1)]);
170 }
171 } else { /* diag==Unit */
172 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
173 VX _x;
174 dot_generic(i, A+i, IndexType(ldA),
175 x, incX, _x);
176 x[iX] -= _x;
177 }
178 }
179 } else { /* upLo==Lower */
180 if (diag==NonUnit) {
181 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
182 VX _x;
183 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
184 x+iX+incX, incX, _x);
185 x[iX] -= _x;
186 x[iX] /= conjugate(A[i*(ldA+1)]);
187 }
188 } else {
189 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
190 VX _x;
191 dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
192 x+iX+incX, incX, _x);
193 x[iX] -= _x;
194 }
195 }
196 }
197 }
198 }
199
200 template <typename IndexType, typename MA, typename VX>
201 void
202 trsv(StorageOrder order, StorageUpLo upLo,
203 Transpose transA, Diag diag,
204 IndexType n,
205 const MA *A, IndexType ldA,
206 VX *x, IndexType incX)
207 {
208 CXXBLAS_DEBUG_OUT("trsv_generic");
209
210 if (incX<0) {
211 x -= incX*(n-1);
212 }
213 trsv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
214 }
215
216 #ifdef HAVE_CBLAS
217
218 // strsv
219 template <typename IndexType>
220 typename If<IndexType>::isBlasCompatibleInteger
221 trsv(StorageOrder order, StorageUpLo upLo,
222 Transpose transA, Diag diag,
223 IndexType n,
224 const float *A, IndexType ldA,
225 float *x, IndexType incX)
226 {
227 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strsv");
228
229 cblas_strsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
230 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
231 n,
232 A, ldA,
233 x, incX);
234 }
235
236 // dtrsv
237 template <typename IndexType>
238 typename If<IndexType>::isBlasCompatibleInteger
239 trsv(StorageOrder order, StorageUpLo upLo,
240 Transpose transA, Diag diag,
241 IndexType n,
242 const double *A, IndexType ldA,
243 double *x, IndexType incX)
244 {
245 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrsv");
246
247 cblas_dtrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
248 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
249 n,
250 A, ldA,
251 x, incX);
252 }
253
254 // ctrsv
255 template <typename IndexType>
256 typename If<IndexType>::isBlasCompatibleInteger
257 trsv(StorageOrder order, StorageUpLo upLo,
258 Transpose transA, Diag diag,
259 IndexType n,
260 const ComplexFloat *A, IndexType ldA,
261 ComplexFloat *x, IndexType incX)
262 {
263 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrsv");
264
265 cblas_ctrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
266 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
267 n,
268 reinterpret_cast<const float *>(A), ldA,
269 reinterpret_cast<const float *>(x), incX);
270 }
271
272 // ztrsv
273 template <typename IndexType>
274 typename If<IndexType>::isBlasCompatibleInteger
275 trsv(StorageOrder order, StorageUpLo upLo,
276 Transpose transA, Diag diag,
277 IndexType n,
278 const ComplexDouble *A, IndexType ldA,
279 ComplexDouble *x, IndexType incX)
280 {
281 CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrsv");
282
283 cblas_ztrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
284 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
285 n,
286 reinterpret_cast<const double *>(A), ldA,
287 reinterpret_cast<const double *>(x), incX);
288 }
289
290 #endif // HAVE_CBLAS
291
292 } // namespace cxxblas
293
294 #endif // CXXBLAS_LEVEL2_TRSV_TCC
295