1 /*
  2  *   Copyright (c) 2010, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL2_TPSV_TCC
 34 #define CXXBLAS_LEVEL2_TPSV_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename MA, typename VX>
 39 void
 40 tpsv_generic(StorageOrder order, StorageUpLo upLo,
 41              Transpose transA, Diag diag,
 42              IndexType n,
 43              const MA *A,
 44              VX *x, IndexType incX)
 45 {
 46     if (order==ColMajor) {
 47         transA = Transpose(transA^Trans);
 48         upLo = (upLo==Upper) ? Lower : Upper;
 49         tpsv_generic(RowMajor, upLo, transA, diag, n, A, x, incX);
 50         return;
 51     }
 52     if (transA==NoTrans) {
 53         if (upLo==Upper) {
 54             if (diag==NonUnit) {
 55                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 56                     VX _x;
 57                     dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
 58                                         x+iX+incX, incX, _x);
 59                     x[iX] -= _x;
 60                     x[iX] /= *(A+i*(2*n-i+1)/2);
 61                 }
 62             } else { /* diag==Unit */
 63                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 64                     VX _x;
 65                     dotu_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
 66                                         x+iX+incX, incX, _x);
 67                     x[iX] -= _x;
 68                 }
 69             }
 70         } else { /* upLo==Lower */
 71             if (diag==NonUnit) {
 72                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
 73                     VX _x;
 74                     dotu_generic(i, A+i*(i+1)/2, IndexType(1),
 75                                     x, incX, _x);
 76                     x[iX] -= _x;
 77                     x[iX] /= *(A+i*(i+3)/2);
 78                 }
 79             } else { /* diag==Unit */
 80                 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
 81                     VX _x;
 82                     dotu_generic(i, A+i*(i+1)/2, IndexType(1),
 83                                     x, incX, _x);
 84                     x[iX] -= _x;
 85                 }
 86             }
 87         }
 88     }
 89     if (transA==Conj) {
 90         if (upLo==Upper) {
 91             if (diag==NonUnit) {
 92                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
 93                     VX _x;
 94                     dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
 95                                        x+iX+incX, incX, _x);
 96                     x[iX] -= _x;
 97                     x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
 98                 }
 99             } else { /* diag==Unit */
100                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
101                     VX _x;
102                     dot_generic(n-1-i, A+i*(2*n-i+1)/2+1, IndexType(1),
103                                        x+iX+incX, incX, _x);
104                     x[iX] -= _x;
105                 }
106             }
107         } else { /* upLo==Lower */
108             if (diag==NonUnit) {
109                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
110                     VX _x;
111                     dot_generic(i, A+i*(i+1)/2, IndexType(1),
112                                    x, incX, _x);
113                     x[iX] -= _x;
114                     x[iX] /= conjugate(A[i*(i+3)/2]);
115                 }
116             } else { /* diag==Unit */
117                 for (IndexType i=1, iX=i*incX; i<n; ++i, iX+=incX) {
118                     VX _x;
119                     dot_generic(i, A+i*(i+1)/2, IndexType(1),
120                                    x, incX, _x);
121                     x[iX] -= _x;
122                 }
123             }
124         }
125     }
126     if (transA==Trans) {
127         if (upLo==Upper) {
128             if (diag==NonUnit) {
129                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
130                     x[iX] /= *(A+i*(2*n-i+1)/2);
131                     axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
132                                                 x+iX+incX, incX);
133                 }
134             } else { /* diag==Unit */
135                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
136                     axpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
137                                                 x+iX+incX, incX);
138                 }
139             }
140         } else { /* upLo==Lower */
141             if (diag==NonUnit) {
142                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
143                     x[iX] /= *(A+i*(i+3)/2);
144                     axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
145                                             x, incX);
146                 }
147             } else {
148                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
149                     axpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
150                                             x, incX);
151                 }
152             }
153         }
154     }
155     if (transA==ConjTrans) {
156         if (upLo==Upper) {
157             if (diag==NonUnit) {
158                 for (IndexType i=0, iX=0; i<n; ++i, iX+=incX) {
159                     x[iX] /= conjugate(A[i*(2*n-i+1)/2]);
160                     acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
161                                                  x+iX+incX, incX);
162                 }
163             } else { /* diag==Unit */
164                 for (IndexType i=0, iX=0; i<n-1; ++i, iX+=incX) {
165                     acxpy_generic(n-1-i, -x[iX], A+i*(2*n-i+1)/2+1, IndexType(1),
166                                                  x+iX+incX, incX);
167                 }
168             }
169         } else { /* upLo==Lower */
170             if (diag==NonUnit) {
171                 for (IndexType i=n-1, iX=i*incX; i>=0; --i, iX-=incX) {
172                     x[iX] /= conjugate(A[i*(i+3)/2]);
173                     acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
174                                              x, incX);
175                 }
176             } else {
177                 for (IndexType i=n-1, iX=i*incX; i>0; --i, iX-=incX) {
178                     acxpy_generic(i, -x[iX], A+i*(i+1)/2, IndexType(1),
179                                              x, incX);
180                 }
181             }
182         }
183     }
184 }
185 
186 template <typename IndexType, typename MA, typename VX>
187 void
188 tpsv(StorageOrder order, StorageUpLo upLo,
189      Transpose transA, Diag diag,
190      IndexType n,
191      const MA *A,
192      VX *x, IndexType incX)
193 {
194     CXXBLAS_DEBUG_OUT("tpsv_generic");
195 
196     if (incX<0) {
197         x -= incX*(n-1);
198     }
199     tpsv_generic(order, upLo, transA, diag, n, A, x, incX);
200 }
201 
202 // namespace flens
203 
204 #endif // CXXBLAS_LEVEL2_TPSV_TCC
205