1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TBSV_TCC
34 #define CXXBLAS_LEVEL2_TBSV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename MA, typename VX>
42 void
43 tbsv_generic(StorageOrder order, StorageUpLo upLo,
44 Transpose transA, Diag diag,
45 IndexType n, IndexType k,
46 const MA *A, IndexType ldA,
47 VX *x, IndexType incX)
48 {
49 using std::max;
50 using std::min;
51
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 upLo = (upLo==Upper) ? Lower : Upper;
55 }
56
57 if (transA==NoTrans) {
58 if (upLo==Upper) {
59 if (diag==NonUnit) {
60 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
61 IndexType len = min(k+1, n-i);
62
63 VX _x;
64 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
65 x+iX+incX, IndexType(incX),
66 _x);
67 x[iX] -= _x;
68 x[iX] /= *(A+ldA*i);
69 }
70 } else { /* diag==Unit */
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 IndexType len = min(k+1, n-i);
73
74 VX _x;
75 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
76 x+iX+incX, IndexType(incX),
77 _x);
78 x[iX] -= _x;
79 }
80 }
81 } else { /* upLo==Lower */
82 if (diag==NonUnit) {
83 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
84 IndexType iA = max(k-i, IndexType(0));
85 IndexType len = min(k, i) + 1;
86 IndexType _i = max(i-k, IndexType(0));
87
88 VX _x;
89 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
90 x+_i*incX, IndexType(incX),
91 _x);
92 x[iX] -= _x;
93 x[iX] /= *(A+ldA*i+iA + len-1);
94 }
95 } else { /* diag==Unit */
96 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
97 IndexType iA = max(k-i, IndexType(0));
98 IndexType len = min(k, i) + 1;
99 IndexType _i = max(i-k, IndexType(0));
100
101 VX _x;
102 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103 x+_i*incX, IndexType(incX),
104 _x);
105 x[iX] -= _x;
106 }
107 }
108 }
109 }
110 if (transA==Conj) {
111 if (upLo==Upper) {
112 if (diag==NonUnit) {
113 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
114 IndexType len = min(k+1, n-i);
115
116 VX _x;
117 dot_generic(len-1, A+ldA*i+1, IndexType(1),
118 x+iX+incX, IndexType(incX),
119 _x);
120 x[iX] -= _x;
121 x[iX] /= conjugate(A[ldA*i]);
122 }
123 } else { /* diag==Unit */
124 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
125 IndexType len = min(k+1, n-i);
126
127 VX _x;
128 dot_generic(len-1, A+ldA*i+1, IndexType(1),
129 x+iX+incX, IndexType(incX),
130 _x);
131 x[iX] -= _x;
132 }
133 }
134 } else { /* upLo==Lower */
135 if (diag==NonUnit) {
136 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
137 IndexType iA = max(k-i, IndexType(0));
138 IndexType len = min(k, i) + 1;
139 IndexType _i = max(i-k, IndexType(0));
140
141 VX _x;
142 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
143 x+_i*incX, IndexType(incX),
144 _x);
145 x[iX] -= _x;
146 x[iX] /= conjugate(A[ldA*i+iA + len-1]);
147 }
148 } else { /* diag==Unit */
149 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
150 IndexType iA = max(k-i, IndexType(0));
151 IndexType len = min(k, i) + 1;
152 IndexType _i = max(i-k, IndexType(0));
153
154 VX _x;
155 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
156 x+_i*incX, IndexType(incX),
157 _x);
158 x[iX] -= _x;
159 }
160 }
161 }
162 }
163 if (transA==Trans) {
164 if (upLo==Upper) {
165 if (diag==NonUnit) {
166 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
167 IndexType len = min(k+1, n-i);
168
169 x[iX] /= *(A+ldA*i);
170 axpy_generic(len-1, -x[iX],
171 A+ldA*i+1, IndexType(1),
172 x+iX+incX, incX);
173 }
174 } else { /* diag==Unit */
175 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
176 IndexType len = min(k+1, n-i);
177
178 axpy_generic(len-1, -x[iX],
179 A+ldA*i+1, IndexType(1),
180 x+iX+incX, incX);
181 }
182 }
183 } else { /* upLo==Lower */
184 if (diag==NonUnit) {
185 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
186 IndexType iA = max(k-i, IndexType(0));
187 IndexType len = min(k, i) + 1;
188 IndexType iY = max(i-k, IndexType(0))*incX;
189
190 x[iX] /= *(A+ldA*i+iA+len-1);
191 axpy_generic(len-1, -x[iX],
192 A+ldA*i+iA, IndexType(1),
193 x+iY, incX);
194 }
195 } else {
196 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
197 IndexType iA = max(k-i, IndexType(0));
198 IndexType len = min(k, i) + 1;
199 IndexType iY = max(i-k, IndexType(0))*incX;
200
201 axpy_generic(len-1, -x[iX],
202 A+ldA*i+iA, IndexType(1),
203 x+iY, incX);
204 }
205 }
206 }
207 }
208 if (transA==ConjTrans) {
209 if (upLo==Upper) {
210 if (diag==NonUnit) {
211 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
212 IndexType len = min(k+1, n-i);
213
214 x[iX] /= conjugate(A[ldA*i]);
215 acxpy_generic(len-1, -x[iX],
216 A+ldA*i+1, IndexType(1),
217 x+iX+incX, incX);
218 }
219 } else { /* diag==Unit */
220 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
221 IndexType len = min(k+1, n-i);
222
223 acxpy_generic(len-1, -x[iX],
224 A+ldA*i+1, IndexType(1),
225 x+iX+incX, incX);
226 }
227 }
228 } else { /* upLo==Lower */
229 if (diag==NonUnit) {
230 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
231 IndexType iA = max(k-i, IndexType(0));
232 IndexType len = min(k, i) + 1;
233 IndexType iY = max(i-k, IndexType(0))*incX;
234
235 x[iX] /= conjugate(A[ldA*i+iA+len-1]);
236 acxpy_generic(len-1, -x[iX],
237 A+ldA*i+iA, IndexType(1),
238 x+iY, incX);
239 }
240 } else {
241 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
242 IndexType iA = max(k-i, IndexType(0));
243 IndexType len = min(k, i) + 1;
244 IndexType iY = max(i-k, IndexType(0))*incX;
245
246 acxpy_generic(len-1, -x[iX],
247 A+ldA*i+iA, IndexType(1),
248 x+iY, incX);
249 }
250 }
251 }
252 }
253 }
254
255 //------------------------------------------------------------------------------
256
257 template <typename IndexType, typename MA, typename VX>
258 void
259 tbsv(StorageOrder order, StorageUpLo upLo,
260 Transpose transA, Diag diag,
261 IndexType n, IndexType k,
262 const MA *A, IndexType ldA,
263 VX *x, IndexType incX)
264 {
265 CXXBLAS_DEBUG_OUT("tbsv_generic");
266
267 if (n==0) {
268 return;
269 }
270 if (incX<0) {
271 x -= incX*(n-1);
272 }
273 tbsv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
274 }
275
276 } // namespace cxxblas
277
278 #endif // CXXBLAS_LEVEL2_TBSV_TCC
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TBSV_TCC
34 #define CXXBLAS_LEVEL2_TBSV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename MA, typename VX>
42 void
43 tbsv_generic(StorageOrder order, StorageUpLo upLo,
44 Transpose transA, Diag diag,
45 IndexType n, IndexType k,
46 const MA *A, IndexType ldA,
47 VX *x, IndexType incX)
48 {
49 using std::max;
50 using std::min;
51
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 upLo = (upLo==Upper) ? Lower : Upper;
55 }
56
57 if (transA==NoTrans) {
58 if (upLo==Upper) {
59 if (diag==NonUnit) {
60 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
61 IndexType len = min(k+1, n-i);
62
63 VX _x;
64 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
65 x+iX+incX, IndexType(incX),
66 _x);
67 x[iX] -= _x;
68 x[iX] /= *(A+ldA*i);
69 }
70 } else { /* diag==Unit */
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 IndexType len = min(k+1, n-i);
73
74 VX _x;
75 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
76 x+iX+incX, IndexType(incX),
77 _x);
78 x[iX] -= _x;
79 }
80 }
81 } else { /* upLo==Lower */
82 if (diag==NonUnit) {
83 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
84 IndexType iA = max(k-i, IndexType(0));
85 IndexType len = min(k, i) + 1;
86 IndexType _i = max(i-k, IndexType(0));
87
88 VX _x;
89 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
90 x+_i*incX, IndexType(incX),
91 _x);
92 x[iX] -= _x;
93 x[iX] /= *(A+ldA*i+iA + len-1);
94 }
95 } else { /* diag==Unit */
96 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
97 IndexType iA = max(k-i, IndexType(0));
98 IndexType len = min(k, i) + 1;
99 IndexType _i = max(i-k, IndexType(0));
100
101 VX _x;
102 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103 x+_i*incX, IndexType(incX),
104 _x);
105 x[iX] -= _x;
106 }
107 }
108 }
109 }
110 if (transA==Conj) {
111 if (upLo==Upper) {
112 if (diag==NonUnit) {
113 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
114 IndexType len = min(k+1, n-i);
115
116 VX _x;
117 dot_generic(len-1, A+ldA*i+1, IndexType(1),
118 x+iX+incX, IndexType(incX),
119 _x);
120 x[iX] -= _x;
121 x[iX] /= conjugate(A[ldA*i]);
122 }
123 } else { /* diag==Unit */
124 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
125 IndexType len = min(k+1, n-i);
126
127 VX _x;
128 dot_generic(len-1, A+ldA*i+1, IndexType(1),
129 x+iX+incX, IndexType(incX),
130 _x);
131 x[iX] -= _x;
132 }
133 }
134 } else { /* upLo==Lower */
135 if (diag==NonUnit) {
136 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
137 IndexType iA = max(k-i, IndexType(0));
138 IndexType len = min(k, i) + 1;
139 IndexType _i = max(i-k, IndexType(0));
140
141 VX _x;
142 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
143 x+_i*incX, IndexType(incX),
144 _x);
145 x[iX] -= _x;
146 x[iX] /= conjugate(A[ldA*i+iA + len-1]);
147 }
148 } else { /* diag==Unit */
149 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
150 IndexType iA = max(k-i, IndexType(0));
151 IndexType len = min(k, i) + 1;
152 IndexType _i = max(i-k, IndexType(0));
153
154 VX _x;
155 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
156 x+_i*incX, IndexType(incX),
157 _x);
158 x[iX] -= _x;
159 }
160 }
161 }
162 }
163 if (transA==Trans) {
164 if (upLo==Upper) {
165 if (diag==NonUnit) {
166 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
167 IndexType len = min(k+1, n-i);
168
169 x[iX] /= *(A+ldA*i);
170 axpy_generic(len-1, -x[iX],
171 A+ldA*i+1, IndexType(1),
172 x+iX+incX, incX);
173 }
174 } else { /* diag==Unit */
175 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
176 IndexType len = min(k+1, n-i);
177
178 axpy_generic(len-1, -x[iX],
179 A+ldA*i+1, IndexType(1),
180 x+iX+incX, incX);
181 }
182 }
183 } else { /* upLo==Lower */
184 if (diag==NonUnit) {
185 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
186 IndexType iA = max(k-i, IndexType(0));
187 IndexType len = min(k, i) + 1;
188 IndexType iY = max(i-k, IndexType(0))*incX;
189
190 x[iX] /= *(A+ldA*i+iA+len-1);
191 axpy_generic(len-1, -x[iX],
192 A+ldA*i+iA, IndexType(1),
193 x+iY, incX);
194 }
195 } else {
196 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
197 IndexType iA = max(k-i, IndexType(0));
198 IndexType len = min(k, i) + 1;
199 IndexType iY = max(i-k, IndexType(0))*incX;
200
201 axpy_generic(len-1, -x[iX],
202 A+ldA*i+iA, IndexType(1),
203 x+iY, incX);
204 }
205 }
206 }
207 }
208 if (transA==ConjTrans) {
209 if (upLo==Upper) {
210 if (diag==NonUnit) {
211 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
212 IndexType len = min(k+1, n-i);
213
214 x[iX] /= conjugate(A[ldA*i]);
215 acxpy_generic(len-1, -x[iX],
216 A+ldA*i+1, IndexType(1),
217 x+iX+incX, incX);
218 }
219 } else { /* diag==Unit */
220 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
221 IndexType len = min(k+1, n-i);
222
223 acxpy_generic(len-1, -x[iX],
224 A+ldA*i+1, IndexType(1),
225 x+iX+incX, incX);
226 }
227 }
228 } else { /* upLo==Lower */
229 if (diag==NonUnit) {
230 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
231 IndexType iA = max(k-i, IndexType(0));
232 IndexType len = min(k, i) + 1;
233 IndexType iY = max(i-k, IndexType(0))*incX;
234
235 x[iX] /= conjugate(A[ldA*i+iA+len-1]);
236 acxpy_generic(len-1, -x[iX],
237 A+ldA*i+iA, IndexType(1),
238 x+iY, incX);
239 }
240 } else {
241 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
242 IndexType iA = max(k-i, IndexType(0));
243 IndexType len = min(k, i) + 1;
244 IndexType iY = max(i-k, IndexType(0))*incX;
245
246 acxpy_generic(len-1, -x[iX],
247 A+ldA*i+iA, IndexType(1),
248 x+iY, incX);
249 }
250 }
251 }
252 }
253 }
254
255 //------------------------------------------------------------------------------
256
257 template <typename IndexType, typename MA, typename VX>
258 void
259 tbsv(StorageOrder order, StorageUpLo upLo,
260 Transpose transA, Diag diag,
261 IndexType n, IndexType k,
262 const MA *A, IndexType ldA,
263 VX *x, IndexType incX)
264 {
265 CXXBLAS_DEBUG_OUT("tbsv_generic");
266
267 if (n==0) {
268 return;
269 }
270 if (incX<0) {
271 x -= incX*(n-1);
272 }
273 tbsv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
274 }
275
276 } // namespace cxxblas
277
278 #endif // CXXBLAS_LEVEL2_TBSV_TCC