1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL3_SYRK_TCC
 34 #define CXXBLAS_LEVEL3_SYRK_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA,
 39           typename BETA, typename MC>
 40 void
 41 syrk_generic(StorageOrder order, StorageUpLo upLoC,
 42              Transpose transA,
 43              IndexType n, IndexType k,
 44              const ALPHA &alpha,
 45              const MA *A, IndexType ldA,
 46              const BETA &beta,
 47              MC *C, IndexType ldC)
 48 {
 49     if (order==ColMajor) {
 50         upLoC = (upLoC==Upper) ? Lower : Upper;
 51         transA = Transpose(transA^Trans);
 52         syrk_generic(RowMajor, upLoC, transA, n, k,
 53                      alpha, A, ldA, beta, C, ldC);
 54         return;
 55     }
 56     syscal(order, upLoC, n, beta, C, ldC);    
 57     if (k==0) {
 58         return;
 59     }
 60     if (transA==NoTrans) {
 61         for (IndexType l=0; l<k; ++l) {
 62             syr(order,  upLoC, n, alpha, A+l, ldA, C, ldC);
 63         }
 64     }
 65     if (transA==Conj) {
 66         for (IndexType l=0; l<k; ++l) {
 67             syr(order,  upLoC, n, alpha, A+l, ldA, C, ldC);
 68         }
 69     }
 70     if (transA==Trans) {
 71         for (IndexType l=0; l<k; ++l) {
 72             syr(order,  upLoC, n, alpha, A+l*ldA, IndexType(1), C, ldC);
 73         }
 74     }
 75     if (transA==ConjTrans) {
 76         for (IndexType l=0; l<k; ++l) {
 77             syr(order,  upLoC, n, alpha, A+l*ldA, IndexType(1), C, ldC);
 78         }
 79     }
 80 }
 81 
 82 template <typename IndexType, typename ALPHA, typename MA,
 83           typename BETA, typename MC>
 84 void
 85 syrk(StorageOrder order, StorageUpLo upLo,
 86      Transpose trans,
 87      IndexType n, IndexType k,
 88      const ALPHA &alpha,
 89      const MA *A, IndexType ldA,
 90      const BETA &beta,
 91      MC *C, IndexType ldC)
 92 {
 93     CXXBLAS_DEBUG_OUT("syrk_generic");
 94 
 95     syrk_generic(order, upLo, trans, n, k, alpha, A, ldA, beta, C, ldC);
 96 }
 97 
 98 #ifdef HAVE_CBLAS
 99 
100 // ssyrk
101 template <typename IndexType>
102 typename If<IndexType>::isBlasCompatibleInteger
103 syrk(StorageOrder order, StorageUpLo upLo,
104      Transpose trans,
105      IndexType n, IndexType k,
106      float alpha,
107      const float *A, IndexType ldA,
108      float beta,
109      float *C, IndexType ldC)
110 {
111     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_ssyrk");
112 
113     cblas_ssyrk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
114                 CBLAS::getCblasType(trans),
115                 n, k,
116                 alpha,
117                 A, ldA,
118                 beta,
119                 C, ldC);
120 }
121 
122 // dsyrk
123 template <typename IndexType>
124 typename If<IndexType>::isBlasCompatibleInteger
125 syrk(StorageOrder order, StorageUpLo upLo,
126      Transpose trans,
127      IndexType n, IndexType k,
128      double alpha,
129      const double *A, IndexType ldA,
130      double beta,
131      double *C, IndexType ldC)
132 {
133     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_dsyrk");
134 
135     cblas_dsyrk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
136                 CBLAS::getCblasType(trans),
137                 n, k,
138                 alpha,
139                 A, ldA,
140                 beta,
141                 C, ldC);
142 }
143 
144 // csyrk
145 template <typename IndexType>
146 typename If<IndexType>::isBlasCompatibleInteger
147 syrk(StorageOrder order, StorageUpLo upLo,
148      Transpose trans,
149      IndexType n, IndexType k,
150      const ComplexFloat &alpha,
151      const ComplexFloat *A, IndexType ldA,
152      const ComplexFloat &beta,
153      ComplexFloat *C, IndexType ldC)
154 {
155     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_csyrk");
156 
157     cblas_csyrk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
158                 CBLAS::getCblasType(trans),
159                 n, k,
160                 reinterpret_cast<const float *>(&alpha),
161                 reinterpret_cast<const float *>(A), ldA,
162                 reinterpret_cast<const float *>(&beta),
163                 reinterpret_cast<const float *>(C), ldC);
164 }
165 
166 // zsyrk
167 template <typename IndexType>
168 typename If<IndexType>::isBlasCompatibleInteger
169 syrk(StorageOrder order, StorageUpLo upLo,
170      Transpose trans,
171      IndexType n, IndexType k,
172      const ComplexDouble &alpha,
173      const ComplexDouble *A, IndexType ldA,
174      const ComplexDouble &beta,
175      ComplexDouble *C, IndexType ldC)
176 {
177     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zsyrk");
178 
179     cblas_zsyrk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
180                 CBLAS::getCblasType(trans),
181                 n, k,
182                 reinterpret_cast<const double *>(&alpha),
183                 reinterpret_cast<const double *>(A), ldA,
184                 reinterpret_cast<const double *>(&beta),
185                 reinterpret_cast<const double *>(C), ldC);
186 }
187 
188 #endif // HAVE_CBLAS
189 
190 // namespace cxxblas
191 
192 #endif // CXXBLAS_LEVEL3_SYRK_TCC