1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL3_SYR2K_TCC
 34 #define CXXBLAS_LEVEL3_SYR2K_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 39           typename BETA, typename MC>
 40 void
 41 syr2k_generic(StorageOrder order, StorageUpLo upLoC,
 42               Transpose transAB,
 43               IndexType n, IndexType k,
 44               const ALPHA &alpha,
 45               const MA *A, IndexType ldA,
 46               const MB *B, IndexType ldB,
 47               const BETA &beta,
 48               MC *C, IndexType ldC)
 49 {
 50     if (order==ColMajor) {
 51         upLoC = (upLoC==Upper) ? Lower : Upper;
 52         transAB = Transpose(transAB^Trans);
 53         syr2k_generic(RowMajor, upLoC, transAB, n, k,
 54                       alpha, A, ldA, B, ldB, beta, C, ldC);
 55         return;
 56     }
 57     syscal(order, upLoC, n, beta, C, ldC);    
 58     if (k==0) {
 59         return;
 60     }
 61     if (transAB==NoTrans) {
 62         for (IndexType l=0; l<k; ++l) {
 63             syr2(order,  upLoC, n, alpha,
 64                  A+l, ldA, B+l, ldB,
 65                  C, ldC);
 66         }
 67     }
 68     if (transAB==Conj) {
 69         for (IndexType l=0; l<k; ++l) {
 70             syr2(order,  upLoC, n, alpha,
 71                  A+l, ldA, B+l, ldB,
 72                  C, ldC);
 73         }
 74     }
 75     if (transAB==Trans) {
 76         for (IndexType l=0; l<k; ++l) {
 77             syr2(order,  upLoC, n, alpha,
 78                  A+l*ldA, IndexType(1), B+l*ldB, IndexType(1),
 79                  C, ldC);
 80         }
 81     }
 82     if (transAB==ConjTrans) {
 83         for (IndexType l=0; l<k; ++l) {
 84             syr2(order,  upLoC, n, alpha,
 85                  A+l*ldA, IndexType(1), B+l*ldB, IndexType(1),
 86                  C, ldC);
 87         }
 88     }
 89 }
 90 
 91 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 92           typename BETA, typename MC>
 93 void
 94 syr2k(StorageOrder order, StorageUpLo upLo,
 95       Transpose trans,
 96       IndexType n, IndexType k,
 97       const ALPHA &alpha,
 98       const MA *A, IndexType ldA,
 99       const MB *B, IndexType ldB,
100       const BETA &beta,
101       MC *C, IndexType ldC)
102 {
103     CXXBLAS_DEBUG_OUT("syr2k_generic");
104 
105     syr2k_generic(order, upLo, trans, n, k,
106                   alpha, A, ldA, B, ldB,
107                   beta, C, ldC);
108 }
109 
110 #ifdef HAVE_CBLAS
111 
112 // ssyr2k
113 template <typename IndexType>
114 typename If<IndexType>::isBlasCompatibleInteger
115 syr2k(StorageOrder order, StorageUpLo upLo,
116       Transpose trans,
117       IndexType n, IndexType k,
118       float alpha,
119       const float *A, IndexType ldA,
120       const float *B, IndexType ldB,
121       float beta,
122       float *C, IndexType ldC)
123 {
124     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ssyr2k");
125 
126     cblas_ssyr2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
127                  CBLAS::getCblasType(trans),
128                  n, k,
129                  alpha,
130                  A, ldA,
131                  B, ldB,
132                  beta,
133                  C, ldC);
134 }
135 
136 // dsyr2k
137 template <typename IndexType>
138 typename If<IndexType>::isBlasCompatibleInteger
139 syr2k(StorageOrder order, StorageUpLo upLo,
140       Transpose trans,
141       IndexType n, IndexType k,
142       double alpha,
143       const double *A, IndexType ldA,
144       const double *B, IndexType ldB,
145       double beta,
146       double *C, IndexType ldC)
147 {
148     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dsyr2k");
149 
150     cblas_dsyr2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
151                  CBLAS::getCblasType(trans),
152                  n, k,
153                  alpha,
154                  A, ldA,
155                  B, ldB,
156                  beta,
157                  C, ldC);
158 }
159 
160 // csyr2k
161 template <typename IndexType>
162 typename If<IndexType>::isBlasCompatibleInteger
163 syr2k(StorageOrder order, StorageUpLo upLo,
164       Transpose trans,
165       IndexType n, IndexType k,
166       const ComplexFloat &alpha,
167       const ComplexFloat *A, IndexType ldA,
168       const ComplexFloat *B, IndexType ldB,
169       const ComplexFloat &beta,
170       ComplexFloat *C, IndexType ldC)
171 {
172     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_csyr2k");
173 
174     cblas_csyr2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
175                  CBLAS::getCblasType(trans),
176                  n, k,
177                  reinterpret_cast<const float *>(&alpha),
178                  reinterpret_cast<const float *>(A), ldA,
179                  reinterpret_cast<const float *>(B), ldB,
180                  reinterpret_cast<const float *>(&beta),
181                  reinterpret_cast<const float *>(C), ldC);
182 }
183 
184 // zsyr2k
185 template <typename IndexType>
186 typename If<IndexType>::isBlasCompatibleInteger
187 syr2k(StorageOrder order, StorageUpLo upLo,
188       Transpose trans,
189       IndexType n, IndexType k,
190       const ComplexDouble &alpha,
191       const ComplexDouble *A, IndexType ldA,
192       const ComplexDouble *B, IndexType ldB,
193       const ComplexDouble &beta,
194       ComplexDouble *C, IndexType ldC)
195 {
196     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zsyr2k");
197 
198     cblas_zsyr2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
199                  CBLAS::getCblasType(trans),
200                  n, k,
201                  reinterpret_cast<const double *>(&alpha),
202                  reinterpret_cast<const double *>(A), ldA,
203                  reinterpret_cast<const double *>(B), ldB,
204                  reinterpret_cast<const double *>(&beta),
205                  reinterpret_cast<const double *>(C), ldC);
206 }
207 
208 #endif // HAVE_CBLAS
209 
210 // namespace cxxblas
211 
212 #endif // CXXBLAS_LEVEL3_SYR2K_TCC
213