1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL3_HERK_TCC
 34 #define CXXBLAS_LEVEL3_HERK_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA,
 39           typename BETA, typename MC>
 40 void
 41 herk_generic(StorageOrder order, StorageUpLo upLoC,
 42              Transpose transA, IndexType n, IndexType k,
 43              const ALPHA &alpha, const MA *A, IndexType ldA,
 44              const BETA &beta, MC *C, IndexType ldC)
 45 {
 46     if (n==0) {
 47         return;
 48     }
 49 
 50     if (order==ColMajor) {
 51         upLoC = (upLoC==Upper) ? Lower : Upper;
 52         transA = Transpose(transA^ConjTrans);
 53         herk_generic(RowMajor, upLoC, transA, n, k,
 54                      alpha, A, ldA, beta, C, ldC);
 55         return;
 56     }
 57     hescal(order, upLoC, n, beta, C, ldC); 
 58     if (transA==NoTrans) {
 59         for (IndexType l=0; l<k; ++l) {
 60             her(order,  upLoC, n, alpha, A+l, ldA, C, ldC);
 61         }
 62     }
 63     if (transA==Conj) {
 64         assert(0);
 65     }
 66     if (transA==Trans) {
 67         assert(0);
 68     }
 69     if (transA==ConjTrans) {
 70         for (IndexType l=0; l<k; ++l) {
 71             her(order,  upLoC, Conj, n, alpha, A+l*ldA, IndexType(1), C, ldC);
 72         }
 73     }
 74 }
 75 
 76 template <typename IndexType, typename ALPHA, typename MA,
 77           typename BETA, typename MC>
 78 void
 79 herk(StorageOrder order, StorageUpLo upLo,
 80      Transpose trans, IndexType n, IndexType k,
 81      const ALPHA &alpha,
 82      const MA *A, IndexType ldA,
 83      const BETA &beta,
 84      MC *C, IndexType ldC)
 85 {
 86     CXXBLAS_DEBUG_OUT("herk_generic");
 87 
 88     herk_generic(order, upLo, trans, n, k, alpha, A, ldA, beta, C, ldC);
 89 }
 90 
 91 #ifdef HAVE_CBLAS
 92 
 93 // cherk
 94 template <typename IndexType>
 95 typename If<IndexType>::isBlasCompatibleInteger
 96 herk(StorageOrder order, StorageUpLo upLo,
 97      Transpose trans, IndexType n, IndexType k,
 98      float alpha,
 99      const ComplexFloat *A, IndexType ldA,
100      float beta,
101      ComplexFloat *C, IndexType ldC)
102 {
103     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cherk");
104 
105     cblas_cherk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
106                 CBLAS::getCblasType(trans), n, k,
107                 alpha,
108                 reinterpret_cast<const float *>(A), ldA,
109                 beta,
110                 reinterpret_cast<const float *>(C), ldC);
111 }
112 
113 // zherk
114 template <typename IndexType>
115 typename If<IndexType>::isBlasCompatibleInteger
116 herk(StorageOrder order, StorageUpLo upLo,
117      Transpose trans, IndexType n, IndexType k,
118      double alpha,
119      const ComplexDouble *A, IndexType ldA,
120      double beta,
121      ComplexDouble *C, IndexType ldC)
122 {
123     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zherk");
124 
125     cblas_zherk(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
126                 CBLAS::getCblasType(trans), n, k,
127                 alpha,
128                 reinterpret_cast<const double *>(A), ldA,
129                 beta,
130                 reinterpret_cast<const double *>(C), ldC);
131 }
132 
133 #endif // HAVE_CBLAS
134 
135 // namespace cxxblas
136 
137 #endif // CXXBLAS_LEVEL3_HERK_TCC