1 /*
  2  *   Copyright (c) 2010, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_TRMV_TCC
 34 #define CXXBLAS_LEVEL2_TRMV_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename MA, typename VX>
 39 void
 40 trmv_generic(StorageOrder order, StorageUpLo upLo,
 41              Transpose transA, Diag diag,
 42              IndexType n,
 43              const MA *A, IndexType ldA,
 44              VX *x, IndexType incX)
 45 {
 46     if (order==ColMajor) {
 47         transA = Transpose(transA^Trans);
 48         upLo = (upLo==Upper) ? Lower : Upper;
 49         trmv_generic(RowMajor, upLo, transA, diag, n, A, ldA, x, incX);
 50         return;
 51     }
 52     if (transA==NoTrans) {
 53         if (upLo==Upper) {
 54             if (diag==NonUnit) {
 55                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 56                     VX _x;
 57                     dotu_generic(n-i, A+i*(ldA+1), IndexType(1),
 58                                      x+iX, incX, _x);
 59                     x[iX] = _x;
 60                 }
 61             } else { /* diag==Unit */
 62                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
 63                     VX _x;
 64                     dotu_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
 65                                        x+iX+incX, incX, _x);
 66                     x[iX] += _x;
 67                 }
 68             }
 69         } else { /* upLo==Lower */
 70             if (diag==NonUnit) {
 71                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 72                     VX _x;
 73                     dotu_generic(i+1, A+i*ldA, IndexType(1),
 74                                       x, incX, _x);
 75                     x[iX] = _x;
 76                 }
 77             } else { /* diag==Unit */
 78                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
 79                     VX _x;
 80                     dotu_generic(i, A+i*ldA, IndexType(1),
 81                                     x, incX, _x);
 82                     x[iX] += _x;
 83                 }
 84             }
 85         }
 86     }
 87     if (transA==Conj) {
 88         if (upLo==Upper) {
 89             if (diag==NonUnit) {
 90                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 91                     VX _x;
 92                     dot_generic(n-i, A+i*(ldA+1), IndexType(1),
 93                                       x+iX, incX, _x);
 94                     x[iX] = _x;
 95                 }
 96             } else { /* diag==Unit */
 97                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
 98                     VX _x;
 99                     dot_generic(n-i-1, A+i*(ldA+1)+1, IndexType(1),
100                                         x+iX+incX, incX, _x);
101                     x[iX] += _x;
102                 }
103             }
104         } else { /* upLo==Lower */
105             if (diag==NonUnit) {
106                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
107                     VX _x;
108                     dot_generic(i+1, A+i*ldA, IndexType(1),
109                                      x, incX, _x);
110                     x[iX] = _x;
111                 }
112             } else { /* diag==Unit */
113                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
114                     VX _x;
115                     dot_generic(i, A+i*ldA, IndexType(1),
116                                    x, incX, _x);
117                     x[iX] += _x;
118                 }
119             }
120         }
121     }
122     if (transA==Trans) {
123         if (upLo==Upper) {
124             if (diag==NonUnit) {
125                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
126                     VX _x;
127                     dotu_generic(i+1, A+i, IndexType(ldA),
128                                      x, incX, _x);
129                     x[iX] = _x;
130                 }
131             } else { /* diag==Unit */
132                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
133                     VX _x;
134                     dotu_generic(i, A+i, IndexType(ldA),
135                                    x, incX, _x);
136                     x[iX] += _x;
137                 }
138             }
139         } else { /* upLo==Lower */
140             if (diag==NonUnit) {
141                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
142                     VX _x;
143                     dotu_generic(n-i, A+i*(ldA+1), IndexType(ldA),
144                                       x+iX, incX, _x);
145                     x[iX] = _x;
146                 }
147             } else {
148                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
149                     VX _x;
150                     dotu_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
151                                         x+iX+incX, incX, _x);
152                     x[iX] += _x;
153                 }
154             }
155         }
156     }
157     if (transA==ConjTrans) {
158         if (upLo==Upper) {
159             if (diag==NonUnit) {
160                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
161                     VX _x;
162                     dot_generic(i+1, A+i, IndexType(ldA),
163                                      x, incX, _x);
164                     x[iX] = _x;
165                 }
166             } else { /* diag==Unit */
167                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
168                     VX _x;
169                     dot_generic(i, A+i, IndexType(ldA),
170                                    x, incX, _x);
171                     x[iX] += _x;
172                 }
173             }
174         } else { /* upLo==Lower */
175             if (diag==NonUnit) {
176                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
177                     VX _x;
178                     dot_generic(n-i, A+i*(ldA+1), IndexType(ldA),
179                                      x+iX, incX, _x);
180                     x[iX] = _x;
181                 }
182             } else {
183                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
184                     VX _x;
185                     dot_generic(n-i-1, A+i*(ldA+1)+ldA, IndexType(ldA),
186                                        x+iX+incX, incX, _x);
187                     x[iX] += _x;
188                 }
189             }
190         }
191     }
192 }
193 
194 template <typename IndexType, typename MA, typename VX>
195 void
196 trmv(StorageOrder order, StorageUpLo upLo,
197      Transpose transA, Diag diag,
198      IndexType n,
199      const MA *A, IndexType ldA,
200      VX *x, IndexType incX)
201 {
202     CXXBLAS_DEBUG_OUT("trmv_generic");
203 
204     if (incX<0) {
205         x -= incX*(n-1);
206     }
207     trmv_generic(order, upLo, transA, diag, n, A, ldA, x, incX);
208 }
209 
210 #ifdef HAVE_CBLAS
211 
212 // strmv
213 template <typename IndexType>
214 typename If<IndexType>::isBlasCompatibleInteger
215 trmv(StorageOrder order, StorageUpLo upLo,
216      Transpose transA, Diag diag,
217      IndexType n,
218      const float *A, IndexType ldA,
219      float *x, IndexType incX)
220 {
221     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_strmv");
222 
223     cblas_strmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
224                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
225                 n,
226                 A, ldA,
227                 x, incX);
228 }
229 
230 // dtrmv
231 template <typename IndexType>
232 typename If<IndexType>::isBlasCompatibleInteger
233 trmv(StorageOrder order, StorageUpLo upLo,
234      Transpose transA, Diag diag,
235      IndexType n,
236      const double *A, IndexType ldA,
237      double *x, IndexType incX)
238 {
239     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dtrmv");
240 
241     cblas_dtrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
242                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
243                 n,
244                 A, ldA,
245                 x, incX);
246 }
247 
248 // ctrmv
249 template <typename IndexType>
250 typename If<IndexType>::isBlasCompatibleInteger
251 trmv(StorageOrder order, StorageUpLo upLo,
252      Transpose transA, Diag diag,
253      IndexType n,
254      const ComplexFloat *A, IndexType ldA,
255      ComplexFloat *x, IndexType incX)
256 {
257     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ctrmv");
258 
259     cblas_ctrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
260                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
261                 n,
262                 reinterpret_cast<const float *>(A), ldA,
263                 reinterpret_cast<const float *>(x), incX);
264 }
265 
266 // ztrmv
267 template <typename IndexType>
268 typename If<IndexType>::isBlasCompatibleInteger
269 trmv(StorageOrder order, StorageUpLo upLo,
270      Transpose transA, Diag diag,
271      IndexType n,
272      const ComplexDouble *A, IndexType ldA,
273      ComplexDouble *x, IndexType incX)
274 {
275     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ztrmv");
276 
277     cblas_ztrmv(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
278                 CBLAS::getCblasType(transA), CBLAS::getCblasType(diag),
279                 n,
280                 reinterpret_cast<const double *>(A), ldA,
281                 reinterpret_cast<const double *>(x), incX);
282 }
283 
284 #endif // HAVE_CBLAS
285 
286 // namespace flens
287 
288 #endif // CXXBLAS_LEVEL2_TRMV_TCC
289