1 /*
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TBMV_TCC
34 #define CXXBLAS_LEVEL2_TBMV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename MA, typename VX>
42 void
43 tbmv_generic(StorageOrder order, StorageUpLo upLo,
44 Transpose transA, Diag diag,
45 IndexType n, IndexType k,
46 const MA *A, IndexType ldA,
47 VX *x, IndexType incX)
48 {
49 using std::max;
50 using std::min;
51
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 upLo = (upLo==Upper) ? Lower : Upper;
55 tbmv_generic(RowMajor, upLo, transA, diag, n, k, A, ldA, x, incX);
56 return;
57 }
58
59 if (transA==NoTrans) {
60 if (upLo==Upper) {
61 if (diag==NonUnit) {
62 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
63 IndexType len = min(k+1, n-i);
64
65 VX _x;
66 dotu_generic(len, A+ldA*i, IndexType(1),
67 x+iX, IndexType(incX),
68 _x);
69 x[iX] = _x;
70 }
71 } else { /* diag==Unit */
72 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
73 IndexType len = min(k+1, n-i);
74
75 VX _x;
76 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
77 x+iX+incX, IndexType(incX),
78 _x);
79 x[iX] += _x;
80 }
81 }
82 } else { /* upLo==Lower */
83 if (diag==NonUnit) {
84 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
85 IndexType iA = max(k-i, IndexType(0));
86 IndexType len = min(k, i) + 1;
87 IndexType _i = max(i-k, IndexType(0));
88
89 VX _x;
90 dotu_generic(len, A+ldA*i+iA, IndexType(1),
91 x+_i*incX, IndexType(incX),
92 _x);
93 x[iX] = _x;
94 }
95 } else { /* diag==Unit */
96 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
97 IndexType iA = max(k-i, IndexType(0));
98 IndexType len = min(k, i) + 1;
99 IndexType _i = max(i-k, IndexType(0));
100
101 VX _x;
102 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103 x+_i*incX, IndexType(incX),
104 _x);
105 x[iX] += _x;
106 }
107 }
108 }
109 }
110 if (transA==Conj) {
111 if (upLo==Upper) {
112 if (diag==NonUnit) {
113 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
114 IndexType len = min(k+1, n-i);
115
116 VX _x;
117 dot_generic(len, A+ldA*i, IndexType(1),
118 x+iX, IndexType(incX),
119 _x);
120 x[iX] = _x;
121 }
122 } else { /* diag==Unit */
123 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
124 IndexType len = min(k+1, n-i);
125
126 VX _x;
127 dot_generic(len-1, A+ldA*i+1, IndexType(1),
128 x+iX+incX, IndexType(incX),
129 _x);
130 x[iX] += _x;
131 }
132 }
133 } else { /* upLo==Lower */
134 if (diag==NonUnit) {
135 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
136 IndexType iA = max(k-i, IndexType(0));
137 IndexType len = min(k, i) + 1;
138 IndexType _i = max(i-k, IndexType(0));
139
140 VX _x;
141 dot_generic(len, A+ldA*i+iA, IndexType(1),
142 x+_i*incX, IndexType(incX),
143 _x);
144 x[iX] = _x;
145 }
146 } else { /* diag==Unit */
147 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
148 IndexType iA = max(k-i, IndexType(0));
149 IndexType len = min(k, i) + 1;
150 IndexType _i = max(i-k, IndexType(0));
151
152 VX _x;
153 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
154 x+_i*incX, IndexType(incX),
155 _x);
156 x[iX] += _x;
157 }
158 }
159 }
160 }
161 if (transA==Trans) {
162 if (upLo==Upper) {
163 if (diag==NonUnit) {
164 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
165 IndexType len = min(k+1, n-i);
166
167 axpy_generic(len-1, x[iX],
168 A+ldA*i+1, IndexType(1),
169 x+iX+incX, incX);
170 x[iX] *= *(A+ldA*i);
171 }
172 } else { /* diag==Unit */
173 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
174 IndexType len = min(k+1, n-i);
175
176 axpy_generic(len-1, x[iX],
177 A+ldA*i+1, IndexType(1),
178 x+iX+incX, incX);
179 }
180 }
181 } else { /* upLo==Lower */
182 if (diag==NonUnit) {
183 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
184 IndexType iA = max(k-i, IndexType(0));
185 IndexType len = min(k, i) + 1;
186 IndexType iY = max(i-k, IndexType(0))*incX;
187
188 axpy_generic(len-1, x[iX],
189 A+ldA*i+iA, IndexType(1),
190 x+iY, incX);
191 x[iX] *= *(A+ldA*i+iA+len-1);
192 }
193 } else {
194 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
195 IndexType iA = max(k-i, IndexType(0));
196 IndexType len = min(k, i) + 1;
197 IndexType iY = max(i-k, IndexType(0))*incX;
198
199 axpy_generic(len-1, x[iX],
200 A+ldA*i+iA, IndexType(1),
201 x+iY, incX);
202 }
203 }
204 }
205 }
206 if (transA==ConjTrans) {
207 if (upLo==Upper) {
208 if (diag==NonUnit) {
209 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
210 IndexType len = min(k+1, n-i);
211
212 acxpy_generic(len-1, x[iX],
213 A+ldA*i+1, IndexType(1),
214 x+iX+incX, incX);
215 x[iX] *= conjugate(A[ldA*i]);
216 }
217 } else { /* diag==Unit */
218 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
219 IndexType len = min(k+1, n-i);
220
221 acxpy_generic(len-1, x[iX],
222 A+ldA*i+1, IndexType(1),
223 x+iX+incX, incX);
224 }
225 }
226 } else { /* upLo==Lower */
227 if (diag==NonUnit) {
228 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
229 IndexType iA = max(k-i, IndexType(0));
230 IndexType len = min(k, i) + 1;
231 IndexType iY = max(i-k, IndexType(0))*incX;
232
233 acxpy_generic(len-1, x[iX],
234 A+ldA*i+iA, IndexType(1),
235 x+iY, incX);
236 x[iX] *= conjugate(A[ldA*i+iA+len-1]);
237 }
238 } else {
239 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
240 IndexType iA = max(k-i, IndexType(0));
241 IndexType len = min(k, i) + 1;
242 IndexType iY = max(i-k, IndexType(0))*incX;
243
244 acxpy_generic(len-1, x[iX],
245 A+ldA*i+iA, IndexType(1),
246 x+iY, incX);
247 }
248 }
249 }
250 }
251 }
252
253 //------------------------------------------------------------------------------
254
255 template <typename IndexType, typename MA, typename VX>
256 void
257 tbmv(StorageOrder order, StorageUpLo upLo,
258 Transpose transA, Diag diag,
259 IndexType n, IndexType k,
260 const MA *A, IndexType ldA,
261 VX *x, IndexType incX)
262 {
263 CXXBLAS_DEBUG_OUT("tbmv_generic");
264
265 if (n==0) {
266 return;
267 }
268 if (incX<0) {
269 x -= incX*(n-1);
270 }
271 tbmv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
272 }
273
274 } // namespace cxxblas
275
276 #endif // CXXBLAS_LEVEL2_TBMV_TCC
2 * Copyright (c) 2009, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TBMV_TCC
34 #define CXXBLAS_LEVEL2_TBMV_TCC 1
35
36 #include <complex>
37 #include <cxxblas/level1/level1.h>
38
39 namespace cxxblas {
40
41 template <typename IndexType, typename MA, typename VX>
42 void
43 tbmv_generic(StorageOrder order, StorageUpLo upLo,
44 Transpose transA, Diag diag,
45 IndexType n, IndexType k,
46 const MA *A, IndexType ldA,
47 VX *x, IndexType incX)
48 {
49 using std::max;
50 using std::min;
51
52 if (order==ColMajor) {
53 transA = Transpose(transA^Trans);
54 upLo = (upLo==Upper) ? Lower : Upper;
55 tbmv_generic(RowMajor, upLo, transA, diag, n, k, A, ldA, x, incX);
56 return;
57 }
58
59 if (transA==NoTrans) {
60 if (upLo==Upper) {
61 if (diag==NonUnit) {
62 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
63 IndexType len = min(k+1, n-i);
64
65 VX _x;
66 dotu_generic(len, A+ldA*i, IndexType(1),
67 x+iX, IndexType(incX),
68 _x);
69 x[iX] = _x;
70 }
71 } else { /* diag==Unit */
72 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
73 IndexType len = min(k+1, n-i);
74
75 VX _x;
76 dotu_generic(len-1, A+ldA*i+1, IndexType(1),
77 x+iX+incX, IndexType(incX),
78 _x);
79 x[iX] += _x;
80 }
81 }
82 } else { /* upLo==Lower */
83 if (diag==NonUnit) {
84 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
85 IndexType iA = max(k-i, IndexType(0));
86 IndexType len = min(k, i) + 1;
87 IndexType _i = max(i-k, IndexType(0));
88
89 VX _x;
90 dotu_generic(len, A+ldA*i+iA, IndexType(1),
91 x+_i*incX, IndexType(incX),
92 _x);
93 x[iX] = _x;
94 }
95 } else { /* diag==Unit */
96 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
97 IndexType iA = max(k-i, IndexType(0));
98 IndexType len = min(k, i) + 1;
99 IndexType _i = max(i-k, IndexType(0));
100
101 VX _x;
102 dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103 x+_i*incX, IndexType(incX),
104 _x);
105 x[iX] += _x;
106 }
107 }
108 }
109 }
110 if (transA==Conj) {
111 if (upLo==Upper) {
112 if (diag==NonUnit) {
113 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
114 IndexType len = min(k+1, n-i);
115
116 VX _x;
117 dot_generic(len, A+ldA*i, IndexType(1),
118 x+iX, IndexType(incX),
119 _x);
120 x[iX] = _x;
121 }
122 } else { /* diag==Unit */
123 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
124 IndexType len = min(k+1, n-i);
125
126 VX _x;
127 dot_generic(len-1, A+ldA*i+1, IndexType(1),
128 x+iX+incX, IndexType(incX),
129 _x);
130 x[iX] += _x;
131 }
132 }
133 } else { /* upLo==Lower */
134 if (diag==NonUnit) {
135 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
136 IndexType iA = max(k-i, IndexType(0));
137 IndexType len = min(k, i) + 1;
138 IndexType _i = max(i-k, IndexType(0));
139
140 VX _x;
141 dot_generic(len, A+ldA*i+iA, IndexType(1),
142 x+_i*incX, IndexType(incX),
143 _x);
144 x[iX] = _x;
145 }
146 } else { /* diag==Unit */
147 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
148 IndexType iA = max(k-i, IndexType(0));
149 IndexType len = min(k, i) + 1;
150 IndexType _i = max(i-k, IndexType(0));
151
152 VX _x;
153 dot_generic(len-1, A+ldA*i+iA, IndexType(1),
154 x+_i*incX, IndexType(incX),
155 _x);
156 x[iX] += _x;
157 }
158 }
159 }
160 }
161 if (transA==Trans) {
162 if (upLo==Upper) {
163 if (diag==NonUnit) {
164 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
165 IndexType len = min(k+1, n-i);
166
167 axpy_generic(len-1, x[iX],
168 A+ldA*i+1, IndexType(1),
169 x+iX+incX, incX);
170 x[iX] *= *(A+ldA*i);
171 }
172 } else { /* diag==Unit */
173 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
174 IndexType len = min(k+1, n-i);
175
176 axpy_generic(len-1, x[iX],
177 A+ldA*i+1, IndexType(1),
178 x+iX+incX, incX);
179 }
180 }
181 } else { /* upLo==Lower */
182 if (diag==NonUnit) {
183 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
184 IndexType iA = max(k-i, IndexType(0));
185 IndexType len = min(k, i) + 1;
186 IndexType iY = max(i-k, IndexType(0))*incX;
187
188 axpy_generic(len-1, x[iX],
189 A+ldA*i+iA, IndexType(1),
190 x+iY, incX);
191 x[iX] *= *(A+ldA*i+iA+len-1);
192 }
193 } else {
194 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
195 IndexType iA = max(k-i, IndexType(0));
196 IndexType len = min(k, i) + 1;
197 IndexType iY = max(i-k, IndexType(0))*incX;
198
199 axpy_generic(len-1, x[iX],
200 A+ldA*i+iA, IndexType(1),
201 x+iY, incX);
202 }
203 }
204 }
205 }
206 if (transA==ConjTrans) {
207 if (upLo==Upper) {
208 if (diag==NonUnit) {
209 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
210 IndexType len = min(k+1, n-i);
211
212 acxpy_generic(len-1, x[iX],
213 A+ldA*i+1, IndexType(1),
214 x+iX+incX, incX);
215 x[iX] *= conjugate(A[ldA*i]);
216 }
217 } else { /* diag==Unit */
218 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
219 IndexType len = min(k+1, n-i);
220
221 acxpy_generic(len-1, x[iX],
222 A+ldA*i+1, IndexType(1),
223 x+iX+incX, incX);
224 }
225 }
226 } else { /* upLo==Lower */
227 if (diag==NonUnit) {
228 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
229 IndexType iA = max(k-i, IndexType(0));
230 IndexType len = min(k, i) + 1;
231 IndexType iY = max(i-k, IndexType(0))*incX;
232
233 acxpy_generic(len-1, x[iX],
234 A+ldA*i+iA, IndexType(1),
235 x+iY, incX);
236 x[iX] *= conjugate(A[ldA*i+iA+len-1]);
237 }
238 } else {
239 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
240 IndexType iA = max(k-i, IndexType(0));
241 IndexType len = min(k, i) + 1;
242 IndexType iY = max(i-k, IndexType(0))*incX;
243
244 acxpy_generic(len-1, x[iX],
245 A+ldA*i+iA, IndexType(1),
246 x+iY, incX);
247 }
248 }
249 }
250 }
251 }
252
253 //------------------------------------------------------------------------------
254
255 template <typename IndexType, typename MA, typename VX>
256 void
257 tbmv(StorageOrder order, StorageUpLo upLo,
258 Transpose transA, Diag diag,
259 IndexType n, IndexType k,
260 const MA *A, IndexType ldA,
261 VX *x, IndexType incX)
262 {
263 CXXBLAS_DEBUG_OUT("tbmv_generic");
264
265 if (n==0) {
266 return;
267 }
268 if (incX<0) {
269 x -= incX*(n-1);
270 }
271 tbmv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
272 }
273
274 } // namespace cxxblas
275
276 #endif // CXXBLAS_LEVEL2_TBMV_TCC