1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_TBSV_TCC
 34 #define CXXBLAS_LEVEL2_TBSV_TCC 1
 35 
 36 #include <complex>
 37 #include <cxxblas/level1/level1.h>
 38 
 39 namespace cxxblas {
 40 
 41 template <typename IndexType, typename MA, typename VX>
 42 void
 43 tbsv_generic(StorageOrder order, StorageUpLo upLo,
 44              Transpose transA, Diag diag,
 45              IndexType n, IndexType k,
 46              const MA *A, IndexType ldA,
 47              VX *x, IndexType incX)
 48 {
 49     using std::max;
 50     using std::min;
 51 
 52     if (order==ColMajor) {
 53         transA = Transpose(transA^Trans);
 54         upLo = (upLo==Upper) ? Lower : Upper;
 55     }
 56 
 57     if (transA==NoTrans) {
 58         if (upLo==Upper) {
 59             if (diag==NonUnit) {
 60                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 61                     IndexType len = min(k+1, n-i);
 62 
 63                     VX _x;
 64                     dotu_generic(len-1, A+ldA*i+1, IndexType(1),
 65                                         x+iX+incX, IndexType(incX),
 66                                         _x);
 67                     x[iX] -= _x;
 68                     x[iX] /= *(A+ldA*i);
 69                 }
 70             } else { /* diag==Unit */
 71                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 72                     IndexType len = min(k+1, n-i);
 73 
 74                     VX _x;
 75                     dotu_generic(len-1, A+ldA*i+1, IndexType(1),
 76                                         x+iX+incX, IndexType(incX),
 77                                         _x);
 78                     x[iX] -= _x;
 79                 }
 80             }
 81         } else { /* upLo==Lower */
 82             if (diag==NonUnit) {
 83                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 84                     IndexType iA = max(k-i, IndexType(0));
 85                     IndexType len = min(k, i) + 1;
 86                     IndexType _i = max(i-k, IndexType(0));
 87 
 88                     VX _x;
 89                     dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
 90                                         x+_i*incX, IndexType(incX),
 91                                         _x);
 92                     x[iX] -= _x;
 93                     x[iX] /= *(A+ldA*i+iA + len-1);
 94                 }
 95             } else { /* diag==Unit */
 96                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 97                     IndexType iA = max(k-i, IndexType(0));
 98                     IndexType len = min(k, i) + 1;
 99                     IndexType _i = max(i-k, IndexType(0));
100 
101                     VX _x;
102                     dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103                                         x+_i*incX, IndexType(incX),
104                                         _x);
105                     x[iX] -= _x;
106                 }
107             }
108         }
109     }
110     if (transA==Conj) {
111         if (upLo==Upper) {
112             if (diag==NonUnit) {
113                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
114                     IndexType len = min(k+1, n-i);
115 
116                     VX _x;
117                     dot_generic(len-1, A+ldA*i+1, IndexType(1),
118                                        x+iX+incX, IndexType(incX),
119                                        _x);
120                     x[iX] -= _x;
121                     x[iX] /= conjugate(A[ldA*i]);
122                 }
123             } else { /* diag==Unit */
124                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
125                     IndexType len = min(k+1, n-i);
126 
127                     VX _x;
128                     dot_generic(len-1, A+ldA*i+1, IndexType(1),
129                                        x+iX+incX, IndexType(incX),
130                                        _x);
131                     x[iX] -= _x;
132                 }
133             }
134         } else { /* upLo==Lower */
135             if (diag==NonUnit) {
136                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
137                     IndexType iA = max(k-i, IndexType(0));
138                     IndexType len = min(k, i) + 1;
139                     IndexType _i = max(i-k, IndexType(0));
140 
141                     VX _x;
142                     dot_generic(len-1, A+ldA*i+iA, IndexType(1),
143                                        x+_i*incX, IndexType(incX),
144                                        _x);
145                     x[iX] -= _x;
146                     x[iX] /= conjugate(A[ldA*i+iA + len-1]);
147                 }
148             } else { /* diag==Unit */
149                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
150                     IndexType iA = max(k-i, IndexType(0));
151                     IndexType len = min(k, i) + 1;
152                     IndexType _i = max(i-k, IndexType(0));
153 
154                     VX _x;
155                     dot_generic(len-1, A+ldA*i+iA, IndexType(1),
156                                        x+_i*incX, IndexType(incX),
157                                        _x);
158                     x[iX] -= _x;
159                 }
160             }
161         }
162     }
163     if (transA==Trans) {
164         if (upLo==Upper) {
165             if (diag==NonUnit) {
166                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
167                     IndexType len = min(k+1, n-i);
168 
169                     x[iX] /= *(A+ldA*i);
170                     axpy_generic(len-1, -x[iX],
171                                         A+ldA*i+1, IndexType(1),
172                                         x+iX+incX, incX);
173                 }
174             } else { /* diag==Unit */
175                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
176                     IndexType len = min(k+1, n-i);
177 
178                     axpy_generic(len-1, -x[iX],
179                                         A+ldA*i+1, IndexType(1),
180                                         x+iX+incX, incX);
181                 }
182             }
183         } else { /* upLo==Lower */
184             if (diag==NonUnit) {
185                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
186                     IndexType iA = max(k-i, IndexType(0));
187                     IndexType len = min(k, i) + 1;
188                     IndexType iY = max(i-k, IndexType(0))*incX;
189 
190                     x[iX] /= *(A+ldA*i+iA+len-1);
191                     axpy_generic(len-1, -x[iX],
192                                         A+ldA*i+iA, IndexType(1),
193                                         x+iY, incX);
194                 }
195             } else {
196                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
197                     IndexType iA = max(k-i, IndexType(0));
198                     IndexType len = min(k, i) + 1;
199                     IndexType iY = max(i-k, IndexType(0))*incX;
200 
201                     axpy_generic(len-1, -x[iX],
202                                         A+ldA*i+iA, IndexType(1),
203                                         x+iY, incX);
204                 }
205             }
206         }
207     }
208     if (transA==ConjTrans) {
209         if (upLo==Upper) {
210             if (diag==NonUnit) {
211                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
212                     IndexType len = min(k+1, n-i);
213 
214                     x[iX] /= conjugate(A[ldA*i]);
215                     acxpy_generic(len-1, -x[iX],
216                                          A+ldA*i+1, IndexType(1),
217                                          x+iX+incX, incX);
218                 }
219             } else { /* diag==Unit */
220                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
221                     IndexType len = min(k+1, n-i);
222 
223                     acxpy_generic(len-1, -x[iX],
224                                          A+ldA*i+1, IndexType(1),
225                                          x+iX+incX, incX);
226                 }
227             }
228         } else { /* upLo==Lower */
229             if (diag==NonUnit) {
230                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
231                     IndexType iA = max(k-i, IndexType(0));
232                     IndexType len = min(k, i) + 1;
233                     IndexType iY = max(i-k, IndexType(0))*incX;
234 
235                     x[iX] /= conjugate(A[ldA*i+iA+len-1]);
236                     acxpy_generic(len-1, -x[iX],
237                                          A+ldA*i+iA, IndexType(1),
238                                          x+iY, incX);
239                 }
240             } else {
241                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
242                     IndexType iA = max(k-i, IndexType(0));
243                     IndexType len = min(k, i) + 1;
244                     IndexType iY = max(i-k, IndexType(0))*incX;
245 
246                     acxpy_generic(len-1, -x[iX],
247                                          A+ldA*i+iA, IndexType(1),
248                                          x+iY, incX);
249                 }
250             }
251         }
252     }
253 }
254 
255 //------------------------------------------------------------------------------
256 
257 template <typename IndexType, typename MA, typename VX>
258 void
259 tbsv(StorageOrder order, StorageUpLo upLo,
260      Transpose transA, Diag diag,
261      IndexType n, IndexType k,
262      const MA *A, IndexType ldA,
263      VX *x, IndexType incX)
264 {
265     CXXBLAS_DEBUG_OUT("tbsv_generic");
266 
267     if (n==0) {
268         return;
269     }
270     if (incX<0) {
271         x -= incX*(n-1);
272     }
273     tbsv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
274 }
275 
276 // namespace cxxblas
277 
278 #endif // CXXBLAS_LEVEL2_TBSV_TCC