1 /*
  2  *   Copyright (c) 2010, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_TRSV_TCC
 34 #define CXXBLAS_LEVEL2_TRSV_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename MA, typename VX>
 39 void
 40 trsv_generic(StorageOrder order, StorageUpLo upLo,
 41              Transpose transA, Diag diag,
 42              IndexType n,
 43              const MA *A, IndexType ldA,
 44              VX *x, IndexType incX)
 45 {
 46     if (order==ColMajor) {
 47         transA = Transpose(transA^Trans);
 48         upLo = (upLo==Lower) ? Upper : Lower;
 49     }
 50     if (transA==NoTrans) {
 51         if (upLo==Upper) {
 52             if (diag==NonUnit) {
 53                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 54                     VX _x;
 55                     dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
 56                                         x+iX+incX, incX, _x);
 57                     x[iX] -= _x;
 58                     x[iX] /= A[i*(ldA+1)];
 59                 }
 60             } else { /* diag==Unit */
 61                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 62                     VX _x;
 63                     dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
 64                                         x+iX+incX, incX, _x);
 65                     x[iX] -= _x;
 66                 }
 67             }
 68         } else { /* upLo==Lower */
 69             if (diag==NonUnit) {
 70                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 71                     VX _x;
 72                     dotu_generic(i, A+i*ldA, IndexType(1),
 73                                     x, incX, _x);
 74                     x[iX] -= _x;
 75                     x[iX] /= A[i*(ldA+1)];
 76                 }
 77             } else { /* diag==Unit */
 78                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 79                     VX _x;
 80                     dotu_generic(i, A+i*ldA, IndexType(1),
 81                                     x, incX, _x);
 82                     x[iX] -= _x;
 83                 }
 84             }
 85         }
 86     }
 87     if (transA==Conj) {
 88         if (upLo==Upper) {
 89             if (diag==NonUnit) {
 90                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 91                     VX _x;
 92                     dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
 93                                        x+iX+incX, incX, _x);
 94                     x[iX] -= _x;
 95                     x[iX] /= conjugate(A[i*(ldA+1)]);
 96                 }
 97             } else { /* diag==Unit */
 98                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 99                     VX _x;
100                     dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
101                                        x+iX+incX, incX, _x);
102                     x[iX] -= _x;
103                 }
104             }
105         } else { /* upLo==Lower */
106             if (diag==NonUnit) {
107                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
108                     VX _x;
109                     dot_generic(i, A+i*ldA, IndexType(1),
110                                    x, incX, _x);
111                     x[iX] -= _x;
112                     x[iX] /= conjugate(A[i*(ldA+1)]);
113                 }
114             } else { /* diag==Unit */
115                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
116                     VX _x;
117                     dot_generic(i, A+i*ldA, IndexType(1),
118                                    x, incX, _x);
119                     x[iX] -= _x;
120                 }
121             }
122         }
123     }
124     if (transA==Trans) {
125         if (upLo==Upper) {
126             if (diag==NonUnit) {
127                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
128                     VX _x;
129                     dotu_generic(i, A+i, IndexType(ldA),
130                                     x, incX, _x);
131                     x[iX] -= _x;
132                     x[iX] /= A[i*(ldA+1)];
133                 }
134             } else { /* diag==Unit */
135                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
136                     VX _x;
137                     dotu_generic(i, A+i, IndexType(ldA),
138                                     x, incX, _x);
139                     x[iX] -= _x;
140                 }
141             }
142         } else { /* upLo==Lower */
143             if (diag==NonUnit) {
144                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
145                     VX _x;
146                     dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
147                                         x+iX+incX, incX, _x);
148                     x[iX] -= _x;
149                     x[iX] /= A[i*(ldA+1)];
150                 }
151             } else {
152                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
153                     VX _x;
154                     dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
155                                         x+iX+incX, incX, _x);
156                     x[iX] -= _x;
157                 }
158             }
159         }
160     }
161     if (transA==ConjTrans) {
162         if (upLo==Upper) {
163             if (diag==NonUnit) {
164                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
165                     VX _x;
166                     dot_generic(i, A+i, IndexType(ldA),
167                                    x, incX, _x);
168                     x[iX] -= _x;
169                     x[iX] /= conjugate(A[i*(ldA+1)]);
170                 }
171             } else { /* diag==Unit */
172                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
173                     VX _x;
174                     dot_generic(i, A+i, IndexType(ldA),
175                                    x, incX, _x);
176                     x[iX] -= _x;
177                 }
178             }
179         } else { /* upLo==Lower */
180             if (diag==NonUnit) {
181                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
182                     VX _x;
183                     dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
184                                        x+iX+incX, incX, _x);
185                     x[iX] -= _x;
186                     x[iX] /= conjugate(A[i*(ldA+1)]);
187                 }
188             } else {
189                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
190                     VX _x;
191                     dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
192                                        x+iX+incX, incX, _x);
193                     x[iX] -= _x;
194                 }
195             }
196         }
197     }
198 }
199 
200 template <typename IndexType, typename MA, typename VX>
201 void
202 trsv(StorageOrder order, StorageUpLo upLo,
203      Transpose transA, Diag diag,
204      IndexType n,
205      const MA *A, IndexType ldA,
206      VX *x, IndexType incX)
207 {
208     CXXBLAS_DEBUG_OUT("trsv_generic");
209 
210     if (incX<0) {
211         x -= incX*(n-1);
212     }
213     trsv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
214 }
215 
216 #ifdef HAVE_CBLAS
217 
218 // strsv
219 template <typename IndexType>
220 typename If<IndexType>::isBlasCompatibleInteger
221 trsv(StorageOrder order, StorageUpLo upLo,
222      Transpose transA, Diag diag,
223      IndexType n,
224      const float *A, IndexType ldA,
225      float *x, IndexType incX)
226 {
227     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strsv");
228 
229     cblas_strsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
230                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
231                 n,
232                 A, ldA,
233                 x, incX);
234 }
235 
236 // dtrsv
237 template <typename IndexType>
238 typename If<IndexType>::isBlasCompatibleInteger
239 trsv(StorageOrder order, StorageUpLo upLo,
240      Transpose transA, Diag diag,
241      IndexType n,
242      const double *A, IndexType ldA,
243      double *x, IndexType incX)
244 {
245     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrsv");
246 
247     cblas_dtrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
248                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
249                 n,
250                 A, ldA,
251                 x, incX);
252 }
253 
254 // ctrsv
255 template <typename IndexType>
256 typename If<IndexType>::isBlasCompatibleInteger
257 trsv(StorageOrder order, StorageUpLo upLo,
258      Transpose transA, Diag diag,
259      IndexType n,
260      const ComplexFloat *A, IndexType ldA,
261      ComplexFloat *x, IndexType incX)
262 {
263     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrsv");
264 
265     cblas_ctrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
266                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
267                 n,
268                 reinterpret_cast<const float *>(A), ldA,
269                 reinterpret_cast<const float *>(x), incX);
270 }
271 
272 // ztrsv
273 template <typename IndexType>
274 typename If<IndexType>::isBlasCompatibleInteger
275 trsv(StorageOrder order, StorageUpLo upLo,
276      Transpose transA, Diag diag,
277      IndexType n,
278      const ComplexDouble *A, IndexType ldA,
279      ComplexDouble *x, IndexType incX)
280 {
281     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrsv");
282 
283     cblas_ztrsv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
284                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
285                 n,
286                 reinterpret_cast<const double *>(A), ldA,
287                 reinterpret_cast<const double *>(x), incX);
288 }
289 
290 #endif // HAVE_CBLAS
291 
292 // namespace cxxblas
293 
294 #endif // CXXBLAS_LEVEL2_TRSV_TCC
295