1 /*
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TPMV_TCC
34 #define CXXBLAS_LEVEL2_TPMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 tpmv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 tpmv_generic(RowMajor, upLo, transA, diag, n, A, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
56 VX _x;
57 dotu_generic(n-i, A+i*(2*n-i+1)/2, IndexType(1),
58 x+iX, incX, _x);
59 x[iX] = _x;
60 }
61 } else { /* diag==Unit */
62 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
63 VX _x;
64 dotu_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
65 x+iX+incX, incX, _x);
66 x[iX] += _x;
67 }
68 }
69 } else { /* upLo==Lower */
70 if (diag==NonUnit) {
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 VX _x;
73 dotu_generic(i+1, A+i*(i+1)/2, IndexType(1),
74 x, incX, _x);
75 x[iX] = _x;
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
79 VX _x;
80 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
81 x, incX, _x);
82 x[iX] += _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
91 VX _x;
92 dot_generic(n-i, A+i*(2*n-i+1)/2, IndexType(1),
93 x+iX, incX, _x);
94 x[iX] = _x;
95 }
96 } else { /* diag==Unit */
97 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
98 VX _x;
99 dot_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
100 x+iX+incX, incX, _x);
101 x[iX] += _x;
102 }
103 }
104 } else { /* upLo==Lower */
105 if (diag==NonUnit) {
106 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
107 VX _x;
108 dot_generic(i+1, A+i*(i+1)/2, IndexType(1),
109 x, incX, _x);
110 x[iX] = _x;
111 }
112 } else { /* diag==Unit */
113 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
114 VX _x;
115 dot_generic(i, A+i*(i+1)/2, IndexType(1),
116 x, incX, _x);
117 x[iX] += _x;
118 }
119 }
120 }
121 }
122 if (transA==Trans) {
123 if (upLo==Upper) {
124 if (diag==NonUnit) {
125 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
126 axpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
127 x+iX+incX, incX);
128 x[iX] *= *(A+i*(2*n-i+1)/2);
129 }
130 } else { /* diag==Unit */
131 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
132 axpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
133 x+iX+incX, incX);
134 }
135 }
136 } else { /* upLo==Lower */
137 if (diag==NonUnit) {
138 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
139 axpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
140 x, incX);
141 x[iX] *= *(A+i*(i+3)/2);
142 }
143 } else {
144 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
145 axpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
146 x, incX);
147 }
148 }
149 }
150 }
151 if (transA==ConjTrans) {
152 if (upLo==Upper) {
153 if (diag==NonUnit) {
154 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
155 acxpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
156 x+iX+incX, incX);
157 x[iX] *= conjugate(A[i*(2*n-i+1)/2]);
158 }
159 } else { /* diag==Unit */
160 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
161 acxpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
162 x+iX+incX, incX);
163 }
164 }
165 } else { /* upLo==Lower */
166 if (diag==NonUnit) {
167 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
168 acxpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
169 x, incX);
170 x[iX] *= conjugate(A[i*(i+3)/2]);
171 }
172 } else {
173 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
174 acxpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
175 x, incX);
176 }
177 }
178 }
179 }
180 }
181
182 template <typename IndexType, typename MA, typename VX>
183 void
184 tpmv(StorageOrder order, StorageUpLo upLo,
185 Transpose transA, Diag diag,
186 IndexType n,
187 const MA *A,
188 VX *x, IndexType incX)
189 {
190 CXXBLAS_DEBUG_OUT("tpmv_generic");
191
192 if (incX<0) {
193 x -= incX*(n-1);
194 }
195 tpmv_generic(order, upLo, transA, diag, n, A, x, incX);
196 }
197
198 } // namespace flens
199
200 #endif // CXXBLAS_LEVEL2_TPMV_TCC
201
2 * Copyright (c) 2010, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef CXXBLAS_LEVEL2_TPMV_TCC
34 #define CXXBLAS_LEVEL2_TPMV_TCC 1
35
36 namespace cxxblas {
37
38 template <typename IndexType, typename MA, typename VX>
39 void
40 tpmv_generic(StorageOrder order, StorageUpLo upLo,
41 Transpose transA, Diag diag,
42 IndexType n,
43 const MA *A,
44 VX *x, IndexType incX)
45 {
46 if (order==ColMajor) {
47 transA = Transpose(transA^Trans);
48 upLo = (upLo==Upper) ? Lower : Upper;
49 tpmv_generic(RowMajor, upLo, transA, diag, n, A, x, incX);
50 return;
51 }
52 if (transA==NoTrans) {
53 if (upLo==Upper) {
54 if (diag==NonUnit) {
55 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
56 VX _x;
57 dotu_generic(n-i, A+i*(2*n-i+1)/2, IndexType(1),
58 x+iX, incX, _x);
59 x[iX] = _x;
60 }
61 } else { /* diag==Unit */
62 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
63 VX _x;
64 dotu_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
65 x+iX+incX, incX, _x);
66 x[iX] += _x;
67 }
68 }
69 } else { /* upLo==Lower */
70 if (diag==NonUnit) {
71 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
72 VX _x;
73 dotu_generic(i+1, A+i*(i+1)/2, IndexType(1),
74 x, incX, _x);
75 x[iX] = _x;
76 }
77 } else { /* diag==Unit */
78 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
79 VX _x;
80 dotu_generic(i, A+i*(i+1)/2, IndexType(1),
81 x, incX, _x);
82 x[iX] += _x;
83 }
84 }
85 }
86 }
87 if (transA==Conj) {
88 if (upLo==Upper) {
89 if (diag==NonUnit) {
90 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
91 VX _x;
92 dot_generic(n-i, A+i*(2*n-i+1)/2, IndexType(1),
93 x+iX, incX, _x);
94 x[iX] = _x;
95 }
96 } else { /* diag==Unit */
97 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
98 VX _x;
99 dot_generic(n-i-1, A+i*(2*n-i+1)/2+1, IndexType(1),
100 x+iX+incX, incX, _x);
101 x[iX] += _x;
102 }
103 }
104 } else { /* upLo==Lower */
105 if (diag==NonUnit) {
106 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
107 VX _x;
108 dot_generic(i+1, A+i*(i+1)/2, IndexType(1),
109 x, incX, _x);
110 x[iX] = _x;
111 }
112 } else { /* diag==Unit */
113 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
114 VX _x;
115 dot_generic(i, A+i*(i+1)/2, IndexType(1),
116 x, incX, _x);
117 x[iX] += _x;
118 }
119 }
120 }
121 }
122 if (transA==Trans) {
123 if (upLo==Upper) {
124 if (diag==NonUnit) {
125 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
126 axpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
127 x+iX+incX, incX);
128 x[iX] *= *(A+i*(2*n-i+1)/2);
129 }
130 } else { /* diag==Unit */
131 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
132 axpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
133 x+iX+incX, incX);
134 }
135 }
136 } else { /* upLo==Lower */
137 if (diag==NonUnit) {
138 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
139 axpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
140 x, incX);
141 x[iX] *= *(A+i*(i+3)/2);
142 }
143 } else {
144 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
145 axpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
146 x, incX);
147 }
148 }
149 }
150 }
151 if (transA==ConjTrans) {
152 if (upLo==Upper) {
153 if (diag==NonUnit) {
154 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
155 acxpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
156 x+iX+incX, incX);
157 x[iX] *= conjugate(A[i*(2*n-i+1)/2]);
158 }
159 } else { /* diag==Unit */
160 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
161 acxpy_generic(n-1-i, x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
162 x+iX+incX, incX);
163 }
164 }
165 } else { /* upLo==Lower */
166 if (diag==NonUnit) {
167 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
168 acxpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
169 x, incX);
170 x[iX] *= conjugate(A[i*(i+3)/2]);
171 }
172 } else {
173 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
174 acxpy_generic(i, x[iX], A+i*(i+1)/2, IndexType(1),
175 x, incX);
176 }
177 }
178 }
179 }
180 }
181
182 template <typename IndexType, typename MA, typename VX>
183 void
184 tpmv(StorageOrder order, StorageUpLo upLo,
185 Transpose transA, Diag diag,
186 IndexType n,
187 const MA *A,
188 VX *x, IndexType incX)
189 {
190 CXXBLAS_DEBUG_OUT("tpmv_generic");
191
192 if (incX<0) {
193 x -= incX*(n-1);
194 }
195 tpmv_generic(order, upLo, transA, diag, n, A, x, incX);
196 }
197
198 } // namespace flens
199
200 #endif // CXXBLAS_LEVEL2_TPMV_TCC
201