1 /*
  2  *   Copyright (c) 2009, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef CXXBLAS_LEVEL3_HEMM_TCC
 34 #define CXXBLAS_LEVEL3_HEMM_TCC 1
 35 
 36 namespace cxxblas {
 37 
 38 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 39           typename BETA, typename MC>
 40 void
 41 hemm_generic(StorageOrder order,
 42              Side sideA, StorageUpLo upLoA,
 43              IndexType m, IndexType n,
 44              const ALPHA &alpha,
 45              const MA *A, IndexType ldA,
 46              const MB *B, IndexType ldB,
 47              const BETA &beta,
 48              MC *C, IndexType ldC)
 49 {
 50     if (order==ColMajor) {
 51         upLoA = (upLoA==Upper) ? Lower : Upper;
 52         sideA = (sideA==Left) ? Right : Left;
 53         hemm_generic(RowMajor, sideA, upLoA, n, m,
 54                      alpha, A, ldA, B, ldB,
 55                      beta,
 56                      C, ldC);
 57         return;
 58     }
 59     gescal(order, m, n, beta, C, ldC);
 60     if (sideA==Right) {
 61         for (IndexType i=0; i<m; ++i) {
 62             hemv(order, upLoA, Conj, n, alpha, A, ldA, B+i*ldB, IndexType(1),
 63                  BETA(1), C+i*ldC, IndexType(1));
 64         }
 65     }
 66     if (sideA==Left) {
 67         for (IndexType j=0; j<n; ++j) {
 68             hemv(order, upLoA, NoTrans, m, alpha, A, ldA, B+j, ldB,
 69                  BETA(1), C+j, ldC);
 70         }
 71     }
 72 }
 73 
 74 template <typename IndexType, typename ALPHA, typename MA, typename MB,
 75           typename BETA, typename MC>
 76 void
 77 hemm(StorageOrder order,
 78      Side side, StorageUpLo upLo,
 79      IndexType m, IndexType n,
 80      const ALPHA &alpha,
 81      const MA *A, IndexType ldA,
 82      const MB *B, IndexType ldB,
 83      const BETA &beta,
 84      MC *C, IndexType ldC)
 85 {
 86     CXXBLAS_DEBUG_OUT("hemm_generic");
 87 
 88     hemm_generic(order, side, upLo, m, n, alpha, A, ldA, B, ldB, beta, C, ldC);
 89 }
 90 
 91 #ifdef HAVE_CBLAS
 92 
 93 template <typename IndexType>
 94 typename If<IndexType>::isBlasCompatibleInteger
 95 hemm(StorageOrder order,
 96      Side side, StorageUpLo upLo,
 97      IndexType m, IndexType n,
 98      const ComplexFloat &alpha,
 99      const ComplexFloat *A, IndexType ldA,
100      const ComplexFloat *B, IndexType ldB,
101      const ComplexFloat &beta,
102      ComplexFloat *C, IndexType ldC)
103 {
104     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_chemm");
105 
106     cblas_chemm(CBLAS::getCblasType(order),
107                 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
108                 m, n,
109                 reinterpret_cast<const float *>(&alpha),
110                 reinterpret_cast<const float *>(A), ldA,
111                 reinterpret_cast<const float *>(B), ldB,
112                 reinterpret_cast<const float *>(&beta),
113                 reinterpret_cast<float *>(C), ldC);
114 }
115 
116 template <typename IndexType>
117 typename If<IndexType>::isBlasCompatibleInteger
118 hemm(StorageOrder order,
119      Side side, StorageUpLo upLo,
120      IndexType m, IndexType n,
121      const ComplexDouble &alpha,
122      const ComplexDouble *A, IndexType ldA,
123      const ComplexDouble *B, IndexType ldB,
124      const ComplexDouble &beta,
125      ComplexDouble *C, IndexType ldC)
126 {
127     CXXBLAS_DEBUG_OUT("[" BLAS_IMPL "] cblas_zhemm");
128 
129     cblas_zhemm(CBLAS::getCblasType(order),
130                 CBLAS::getCblasType(side), CBLAS::getCblasType(upLo),
131                 m, n,
132                 reinterpret_cast<const double *>(&alpha),
133                 reinterpret_cast<const double *>(A), ldA,
134                 reinterpret_cast<const double *>(B), ldB,
135                 reinterpret_cast<const double *>(&beta),
136                 reinterpret_cast<double *>(C), ldC);
137 }
138 
139 #endif // HAVE_CBLAS
140 
141 // namespace cxxblas
142 
143 #endif // CXXBLAS_LEVEL3_HEMM_TCC