1 /*
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TPSV_TCC
34 #define CXXBLAS_LEVEL2_TPSV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 tpsv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 tpsv_generic(RowMajor, upLo, transA, diag, n, A, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
56 VX _x;
57 dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
58 x+iX+incX, incX, _x);
59 x[iX] -= _x;
60 x[iX] /= *(A+i*(2*n-i+1)/2);
61 }
62 } else { /* diag==Unit */
63 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
64 VX _x;
65 dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
66 x+iX+incX, incX, _x);
67 x[iX] -= _x;
68 }
69 }
70 } else { /* upLo==Lower */
71 if (diag==NonUnit) {
72 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
73 VX _x;
74 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
75 x, incX, _x);
76 x[iX] -= _x;
77 x[iX] /= *(A+i*(i+3)/2);
78 }
79 } else { /* diag==Unit */
80 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
81 VX _x;
82 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
83 x, incX, _x);
84 x[iX] -= _x;
85 }
86 }
87 }
88 }
89 if (transA==Conj) {
90 if (upLo==Upper) {
91 if (diag==NonUnit) {
92 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
93 VX _x;
94 dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
95 x+iX+incX, incX, _x);
96 x[iX] -= _x;
97 x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
98 }
99 } else { /* diag==Unit */
100 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
101 VX _x;
102 dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
103 x+iX+incX, incX, _x);
104 x[iX] -= _x;
105 }
106 }
107 } else { /* upLo==Lower */
108 if (diag==NonUnit) {
109 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
110 VX _x;
111 dot_generic(i, A+i*(i+1)/2, IndexType(1),
112 x, incX, _x);
113 x[iX] -= _x;
114 x[iX] /= conjugate(A[i*(i+3)/2]);
115 }
116 } else { /* diag==Unit */
117 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
118 VX _x;
119 dot_generic(i, A+i*(i+1)/2, IndexType(1),
120 x, incX, _x);
121 x[iX] -= _x;
122 }
123 }
124 }
125 }
126 if (transA==Trans) {
127 if (upLo==Upper) {
128 if (diag==NonUnit) {
129 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
130 x[iX] /= *(A+i*(2*n-i+1)/2);
131 axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
132 x+iX+incX, incX);
133 }
134 } else { /* diag==Unit */
135 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
136 axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
137 x+iX+incX, incX);
138 }
139 }
140 } else { /* upLo==Lower */
141 if (diag==NonUnit) {
142 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
143 x[iX] /= *(A+i*(i+3)/2);
144 axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
145 x, incX);
146 }
147 } else {
148 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
149 axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
150 x, incX);
151 }
152 }
153 }
154 }
155 if (transA==ConjTrans) {
156 if (upLo==Upper) {
157 if (diag==NonUnit) {
158 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
159 x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
160 acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
161 x+iX+incX, incX);
162 }
163 } else { /* diag==Unit */
164 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
165 acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
166 x+iX+incX, incX);
167 }
168 }
169 } else { /* upLo==Lower */
170 if (diag==NonUnit) {
171 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
172 x[iX] /= conjugate(A[i*(i+3)/2]);
173 acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
174 x, incX);
175 }
176 } else {
177 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
178 acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
179 x, incX);
180 }
181 }
182 }
183 }
184 }
185
186 template <typename IndexType, typename MA, typename VX>
187 void
188 tpsv(StorageOrder order, StorageUpLo upLo,
189 Transpose transA, Diag diag,
190 IndexType n,
191 const MA *A,
192 VX *x, IndexType incX)
193 {
194 CXXBLAS_DEBUG_OUT("tpsv_generic");
195
196 if (incX<0) {
197 x -= incX*(n-1);
198 }
199 tpsv_generic(order, upLo, transA, diag, n, A, x, incX);
200 }
201
202 } // namespace flens
203
204 #endif // CXXBLAS_LEVEL2_TPSV_TCC
205
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TPSV_TCC
34 #define CXXBLAS_LEVEL2_TPSV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 tpsv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 tpsv_generic(RowMajor, upLo, transA, diag, n, A, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
56 VX _x;
57 dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
58 x+iX+incX, incX, _x);
59 x[iX] -= _x;
60 x[iX] /= *(A+i*(2*n-i+1)/2);
61 }
62 } else { /* diag==Unit */
63 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
64 VX _x;
65 dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
66 x+iX+incX, incX, _x);
67 x[iX] -= _x;
68 }
69 }
70 } else { /* upLo==Lower */
71 if (diag==NonUnit) {
72 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
73 VX _x;
74 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
75 x, incX, _x);
76 x[iX] -= _x;
77 x[iX] /= *(A+i*(i+3)/2);
78 }
79 } else { /* diag==Unit */
80 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
81 VX _x;
82 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
83 x, incX, _x);
84 x[iX] -= _x;
85 }
86 }
87 }
88 }
89 if (transA==Conj) {
90 if (upLo==Upper) {
91 if (diag==NonUnit) {
92 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
93 VX _x;
94 dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
95 x+iX+incX, incX, _x);
96 x[iX] -= _x;
97 x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
98 }
99 } else { /* diag==Unit */
100 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
101 VX _x;
102 dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
103 x+iX+incX, incX, _x);
104 x[iX] -= _x;
105 }
106 }
107 } else { /* upLo==Lower */
108 if (diag==NonUnit) {
109 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
110 VX _x;
111 dot_generic(i, A+i*(i+1)/2, IndexType(1),
112 x, incX, _x);
113 x[iX] -= _x;
114 x[iX] /= conjugate(A[i*(i+3)/2]);
115 }
116 } else { /* diag==Unit */
117 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
118 VX _x;
119 dot_generic(i, A+i*(i+1)/2, IndexType(1),
120 x, incX, _x);
121 x[iX] -= _x;
122 }
123 }
124 }
125 }
126 if (transA==Trans) {
127 if (upLo==Upper) {
128 if (diag==NonUnit) {
129 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
130 x[iX] /= *(A+i*(2*n-i+1)/2);
131 axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
132 x+iX+incX, incX);
133 }
134 } else { /* diag==Unit */
135 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
136 axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
137 x+iX+incX, incX);
138 }
139 }
140 } else { /* upLo==Lower */
141 if (diag==NonUnit) {
142 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
143 x[iX] /= *(A+i*(i+3)/2);
144 axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
145 x, incX);
146 }
147 } else {
148 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
149 axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
150 x, incX);
151 }
152 }
153 }
154 }
155 if (transA==ConjTrans) {
156 if (upLo==Upper) {
157 if (diag==NonUnit) {
158 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
159 x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
160 acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
161 x+iX+incX, incX);
162 }
163 } else { /* diag==Unit */
164 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
165 acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
166 x+iX+incX, incX);
167 }
168 }
169 } else { /* upLo==Lower */
170 if (diag==NonUnit) {
171 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
172 x[iX] /= conjugate(A[i*(i+3)/2]);
173 acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
174 x, incX);
175 }
176 } else {
177 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
178 acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
179 x, incX);
180 }
181 }
182 }
183 }
184 }
185
186 template <typename IndexType, typename MA, typename VX>
187 void
188 tpsv(StorageOrder order, StorageUpLo upLo,
189 Transpose transA, Diag diag,
190 IndexType n,
191 const MA *A,
192 VX *x, IndexType incX)
193 {
194 CXXBLAS_DEBUG_OUT("tpsv_generic");
195
196 if (incX<0) {
197 x -= incX*(n-1);
198 }
199 tpsv_generic(order, upLo, transA, diag, n, A, x, incX);
200 }
201
202 } // namespace flens
203
204 #endif // CXXBLAS_LEVEL2_TPSV_TCC
205