1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_TBMV_TCC
 34 #define CXXBLAS_LEVEL2_TBMV_TCC 1
 35 
 36 #include <complex>
 37 #include <cxxblas/level1/level1.h>
 38 
 39 namespace cxxblas {
 40 
 41 template <typename IndexType, typename MA, typename VX>
 42 void
 43 tbmv_generic(StorageOrder order, StorageUpLo upLo,
 44              Transpose transA, Diag diag,
 45              IndexType n, IndexType k,
 46              const MA *A, IndexType ldA,
 47              VX *x, IndexType incX)
 48 {
 49     using std::max;
 50     using std::min;
 51 
 52     if (order==ColMajor) {
 53         transA = Transpose(transA^Trans);
 54         upLo = (upLo==Upper) ? Lower : Upper;
 55         tbmv_generic(RowMajor, upLo, transA, diag, n, k, A, ldA, x, incX);
 56         return;
 57     }
 58 
 59     if (transA==NoTrans) {
 60         if (upLo==Upper) {
 61             if (diag==NonUnit) {
 62                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 63                     IndexType len = min(k+1, n-i);
 64 
 65                     VX _x;
 66                     dotu_generic(len, A+ldA*i, IndexType(1),
 67                                       x+iX, IndexType(incX),
 68                                       _x);
 69                     x[iX] = _x;
 70                 }
 71             } else { /* diag==Unit */
 72                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 73                     IndexType len = min(k+1, n-i);
 74 
 75                     VX _x;
 76                     dotu_generic(len-1, A+ldA*i+1, IndexType(1),
 77                                         x+iX+incX, IndexType(incX),
 78                                         _x);
 79                     x[iX] += _x;
 80                 }
 81             }
 82         } else { /* upLo==Lower */
 83             if (diag==NonUnit) {
 84                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 85                     IndexType iA = max(k-i, IndexType(0));
 86                     IndexType len = min(k, i) + 1;
 87                     IndexType _i = max(i-k, IndexType(0));
 88 
 89                     VX _x;
 90                     dotu_generic(len, A+ldA*i+iA, IndexType(1),
 91                                       x+_i*incX, IndexType(incX),
 92                                       _x);
 93                     x[iX] = _x;
 94                 }
 95             } else { /* diag==Unit */
 96                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 97                     IndexType iA = max(k-i, IndexType(0));
 98                     IndexType len = min(k, i) + 1;
 99                     IndexType _i = max(i-k, IndexType(0));
100 
101                     VX _x;
102                     dotu_generic(len-1, A+ldA*i+iA, IndexType(1),
103                                         x+_i*incX, IndexType(incX),
104                                         _x);
105                     x[iX] += _x;
106                 }
107             }
108         }
109     }
110     if (transA==Conj) {
111         if (upLo==Upper) {
112             if (diag==NonUnit) {
113                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
114                     IndexType len = min(k+1, n-i);
115 
116                     VX _x;
117                     dot_generic(len, A+ldA*i, IndexType(1),
118                                      x+iX, IndexType(incX),
119                                      _x);
120                     x[iX] = _x;
121                 }
122             } else { /* diag==Unit */
123                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
124                     IndexType len = min(k+1, n-i);
125 
126                     VX _x;
127                     dot_generic(len-1, A+ldA*i+1, IndexType(1),
128                                        x+iX+incX, IndexType(incX),
129                                        _x);
130                     x[iX] += _x;
131                 }
132             }
133         } else { /* upLo==Lower */
134             if (diag==NonUnit) {
135                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
136                     IndexType iA = max(k-i, IndexType(0));
137                     IndexType len = min(k, i) + 1;
138                     IndexType _i = max(i-k, IndexType(0));
139 
140                     VX _x;
141                     dot_generic(len, A+ldA*i+iA, IndexType(1),
142                                      x+_i*incX, IndexType(incX),
143                                      _x);
144                     x[iX] = _x;
145                 }
146             } else { /* diag==Unit */
147                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
148                     IndexType iA = max(k-i, IndexType(0));
149                     IndexType len = min(k, i) + 1;
150                     IndexType _i = max(i-k, IndexType(0));
151 
152                     VX _x;
153                     dot_generic(len-1, A+ldA*i+iA, IndexType(1),
154                                        x+_i*incX, IndexType(incX),
155                                        _x);
156                     x[iX] += _x;
157                 }
158             }
159         }
160     }
161     if (transA==Trans) {
162         if (upLo==Upper) {
163             if (diag==NonUnit) {
164                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
165                     IndexType len = min(k+1, n-i);
166 
167                     axpy_generic(len-1, x[iX],
168                                         A+ldA*i+1, IndexType(1),
169                                         x+iX+incX, incX);
170                     x[iX] *= *(A+ldA*i);
171                 }
172             } else { /* diag==Unit */
173                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
174                     IndexType len = min(k+1, n-i);
175 
176                     axpy_generic(len-1, x[iX],
177                                         A+ldA*i+1, IndexType(1),
178                                         x+iX+incX, incX);
179                 }
180             }
181         } else { /* upLo==Lower */
182             if (diag==NonUnit) {
183                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
184                     IndexType iA = max(k-i, IndexType(0));
185                     IndexType len = min(k, i) + 1;
186                     IndexType iY = max(i-k, IndexType(0))*incX;
187 
188                     axpy_generic(len-1, x[iX],
189                                         A+ldA*i+iA, IndexType(1),
190                                         x+iY, incX);
191                     x[iX] *= *(A+ldA*i+iA+len-1);
192                 }
193             } else {
194                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
195                     IndexType iA = max(k-i, IndexType(0));
196                     IndexType len = min(k, i) + 1;
197                     IndexType iY = max(i-k, IndexType(0))*incX;
198 
199                     axpy_generic(len-1, x[iX],
200                                         A+ldA*i+iA, IndexType(1),
201                                         x+iY, incX);
202                 }
203             }
204         }
205     }
206     if (transA==ConjTrans) {
207         if (upLo==Upper) {
208             if (diag==NonUnit) {
209                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
210                     IndexType len = min(k+1, n-i);
211 
212                     acxpy_generic(len-1, x[iX],
213                                          A+ldA*i+1, IndexType(1),
214                                          x+iX+incX, incX);
215                     x[iX] *= conjugate(A[ldA*i]);
216                 }
217             } else { /* diag==Unit */
218                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
219                     IndexType len = min(k+1, n-i);
220 
221                     acxpy_generic(len-1, x[iX],
222                                          A+ldA*i+1, IndexType(1),
223                                          x+iX+incX, incX);
224                 }
225             }
226         } else { /* upLo==Lower */
227             if (diag==NonUnit) {
228                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
229                     IndexType iA = max(k-i, IndexType(0));
230                     IndexType len = min(k, i) + 1;
231                     IndexType iY = max(i-k, IndexType(0))*incX;
232 
233                     acxpy_generic(len-1, x[iX],
234                                          A+ldA*i+iA, IndexType(1),
235                                          x+iY, incX);
236                     x[iX] *= conjugate(A[ldA*i+iA+len-1]);
237                 }
238             } else {
239                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
240                     IndexType iA = max(k-i, IndexType(0));
241                     IndexType len = min(k, i) + 1;
242                     IndexType iY = max(i-k, IndexType(0))*incX;
243 
244                     acxpy_generic(len-1, x[iX],
245                                          A+ldA*i+iA, IndexType(1),
246                                          x+iY, incX);
247                 }
248             }
249         }
250     }
251 }
252 
253 //------------------------------------------------------------------------------
254 
255 template <typename IndexType, typename MA, typename VX>
256 void
257 tbmv(StorageOrder order, StorageUpLo upLo,
258      Transpose transA, Diag diag,
259      IndexType n, IndexType k,
260      const MA *A, IndexType ldA,
261      VX *x, IndexType incX)
262 {
263     CXXBLAS_DEBUG_OUT("tbmv_generic");
264 
265     if (n==0) {
266         return;
267     }
268     if (incX<0) {
269         x -= incX*(n-1);
270     }
271     tbmv_generic(order, upLo, transA, diag, n, k, A, ldA, x, incX);
272 }
273 
274 // namespace cxxblas
275 
276 #endif // CXXBLAS_LEVEL2_TBMV_TCC