1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL3_HER2K_TCC
 34 #define CXXBLAS_LEVEL3_HER2K_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 39           typename BETA, typename MC>
 40 void
 41 her2k_generic(StorageOrder order, StorageUpLo upLoC,
 42               Transpose transAB,
 43               IndexType n, IndexType k,
 44               const ALPHA &alpha,
 45               const MA *A, IndexType ldA,
 46               const MB *B, IndexType ldB,
 47               const BETA &beta,
 48               MC *C, IndexType ldC)
 49 {
 50     if (order==ColMajor) {
 51         upLoC = (upLoC==Upper) ? Lower : Upper;
 52         transAB = Transpose(transAB^ConjTrans);
 53         her2k_generic(RowMajor, upLoC, transAB, n, k,
 54                       conjugate(alpha), A, ldA, B, ldB,
 55                       beta, C, ldC);
 56         return;
 57     }
 58     hescal(order, upLoC, n, beta, C, ldC);    
 59     if (k==0) {
 60         return;
 61     }
 62 
 63     if (transAB==NoTrans) {
 64         for (IndexType l=0; l<k; ++l) {
 65             her2(order,  upLoC, n, alpha,
 66                  A+l, ldA, B+l, ldB,
 67                  C, ldC);
 68         }
 69     }
 70     if (transAB==Conj) {
 71         assert(0);
 72     }
 73     if (transAB==Trans) {
 74         assert(0);
 75     }
 76     if (transAB==ConjTrans) {
 77         for (IndexType l=0; l<k; ++l) {
 78             her2(order,  upLoC, Conj, n, conjugate(alpha),
 79                  A+l*ldA, IndexType(1), B+l*ldB, IndexType(1),
 80                  C, ldC);
 81         }
 82     }
 83 }
 84 
 85 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 86           typename BETA, typename MC>
 87 void
 88 her2k(StorageOrder order, StorageUpLo upLo,
 89       Transpose trans,
 90       IndexType n, IndexType k,
 91       const ALPHA &alpha,
 92       const MA *A, IndexType ldA,
 93       const MB *B, IndexType ldB,
 94       const BETA &beta,
 95       MC *C, IndexType ldC)
 96 {
 97     CXXBLAS_DEBUG_OUT("her2k_generic");
 98 
 99     her2k_generic(order, upLo, trans, n, k,
100                   alpha, A, ldA, B, ldB,
101                   beta,
102                   C, ldC);
103 }
104 
105 #ifdef HAVE_CBLAS
106 
107 // cher2k
108 template <typename IndexType>
109 typename If<IndexType>::isBlasCompatibleInteger
110 her2k(StorageOrder order, StorageUpLo upLo,
111       Transpose trans,
112       IndexType n, IndexType k,
113       const ComplexFloat &alpha,
114       const ComplexFloat *A, IndexType ldA,
115       const ComplexFloat *B, IndexType ldB,
116       float beta,
117       ComplexFloat *C, IndexType ldC)
118 {
119     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_cher2k");
120 
121     cblas_cher2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
122                  CBLAS::getCblasType(trans),
123                  n, k,
124                  reinterpret_cast<const float *>(&alpha),
125                  reinterpret_cast<const float *>(A), ldA,
126                  reinterpret_cast<const float *>(B), ldB,
127                  beta,
128                  reinterpret_cast<const float *>(C), ldC);
129 }
130 
131 // zher2k
132 template <typename IndexType>
133 typename If<IndexType>::isBlasCompatibleInteger
134 her2k(StorageOrder order, StorageUpLo upLo,
135       Transpose trans,
136       IndexType n, IndexType k,
137       const ComplexDouble &alpha,
138       const ComplexDouble *A, IndexType ldA,
139       const ComplexDouble *B, IndexType ldB,
140       double beta,
141       ComplexDouble *C, IndexType ldC)
142 {
143     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zher2k");
144 
145     cblas_zher2k(CBLAS::getCblasType(order), CBLAS::getCblasType(upLo),
146                  CBLAS::getCblasType(trans),
147                  n, k,
148                  reinterpret_cast<const double *>(&alpha),
149                  reinterpret_cast<const double *>(A), ldA,
150                  reinterpret_cast<const double *>(B), ldB,
151                  beta,
152                  reinterpret_cast<const double *>(C), ldC);
153 }
154 
155 #endif // HAVE_CBLAS
156 
157 // namespace cxxblas
158 
159 #endif // CXXBLAS_LEVEL3_HER2K_TCC
160