1 
  2 
  3 
  4 
  5 
  6 
  7 
  8 
  9 
 10 
 11 
 12 
 13 
 14 
 15 
 16 
 17 
 18 
 19 
 20 
 21 
 22 
 23 
 24 
 25 
 26 
 27 
 28 
 29 
 30 
 31 
 32 
 33 
 34 
 35 
 36 
 37 
 38 
 39 
 40 
 41 
 42 
 43 
 44 
 45 
 46 
 47 
 48 
 49 
 50 
 51 
 52 
 53 
 54 
 55 
 56 
 57 
 58 
 59 
 60 
 61 
 62 
 63 
 64 
 65 
 66 
 67 
 68 
 69 
 70 
 71 
 72 
 73 
 74 
 75 
 76 
 77 
 78 
 79 
 80 
 81 
 82 
 83 
 84 
 85 
 86 
 87 
 88 
 89 
 90 
 91 
 92 
 93 
 94 
 95 
 96 
 97 
 98 
 99 
100 
101 
102 
103 
104 
105 
106 
107 
108 
109 
110 
111 
112 
113 
114 
115 
116 
117 
118 
119 
120 
121 
122 
123 
124 
125 
126 
127 
128 
 
 | 
 
/* 
 * Copyright (C) 2014, The University of Texas at Austin 
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are 
 * met: 
 *  - Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer. 
 *  - Redistributions in binary form must reproduce the above copyright 
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the distribution. 
 *  - Neither the name of The University of Texas at Austin nor the names 
 *    of its contributors may be used to endorse or promote products 
 *    derived from this software without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 * 
 */ 
 
/* 
 * Copyright (C) 2014-2015, Michael Lehn 
 * 
 * ulmBLAS adopted general ideas from BLIS.  Using micro kernels from BLIS 
 * only requires minor modifications, 
 * 
 */ 
 
#ifndef ULMBLAS_IMPL_LEVEL3_PACK_TRUSPACK_TCC 
#define ULMBLAS_IMPL_LEVEL3_PACK_TRUSPACK_TCC 1 
 
#include <ulmblas/impl/auxiliary/conjugate.h> 
#include <ulmblas/impl/level3/pack/trlpack.h> 
#include <ulmblas/impl/level3/ukernel/ugemm.h> 
 
namespace ulmBLAS { 
 
template <typename IndexType, typename TU, typename Buffer> 
static void 
truspack_MRxk(IndexType   k, 
              bool        conj, 
              bool        unit, 
              const TU    *U, 
              IndexType   incRowU, 
              IndexType   incColU, 
              Buffer      *buffer) 
{ 
    const IndexType MR  = BlockSizeUGemm<Buffer>::MR; 
 
    for (IndexType j=0; j<MR; ++j) { 
        for (IndexType i=0; i<j; ++i) { 
            buffer[i] = conjugate(U[i*incRowU], conj); 
        } 
        buffer[j] = (unit) ? Buffer(1) 
                           : conjugate(Buffer(1)/U[j*incRowU], conj); 
        for (IndexType i=j+1; i<MR; ++i) { 
            buffer[i] = Buffer(0); 
        } 
        buffer += MR; 
        U      += incColU; 
    } 
    if (!conj) { 
        for (IndexType j=0; j<k-MR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                buffer[i] = U[i*incRowU]; 
            } 
            buffer += MR; 
            U      += incColU; 
        } 
    } else { 
        for (IndexType j=0; j<k-MR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                buffer[i] = conjugate(U[i*incRowU]); 
            } 
            buffer += MR; 
            U      += incColU; 
        } 
    } 
} 
 
template <typename IndexType, typename TU, typename Buffer> 
void 
truspack(IndexType   mc, 
         bool        conj, 
         bool        unit, 
         const TU    *U, 
         IndexType   incRowU, 
         IndexType   incColU, 
         Buffer      *buffer) 
{ 
    const IndexType MR  = BlockSizeUGemm<Buffer>::MR; 
    const IndexType mp  = mc / MR; 
    const IndexType mr_ = mc % MR; 
 
    for (IndexType i=0; i<mp; ++i) { 
        truspack_MRxk(mc-i*MR, conj, unit, U, incRowU, incColU, buffer); 
        buffer += (mc-i*MR)*MR; 
        U      += MR*(incRowU+incColU); 
    } 
 
    if (mr_>0) { 
        for (IndexType j=0; j<mr_; ++j) { 
            for (IndexType i=0; i<j; ++i) { 
                buffer[i] = conjugate(U[i*incRowU], conj); 
            } 
            buffer[j] = (unit) ? Buffer(1) 
                               : conjugate(Buffer(1)/U[j*incRowU], conj); 
            for (IndexType i=j+1; i<MR; ++i) { 
                buffer[i] = Buffer(0); 
            } 
            buffer += MR; 
            U      += incColU; 
        } 
    } 
} 
 
} // namespace ulmBLAS 
 
#endif // ULMBLAS_IMPL_LEVEL3_PACK_TRUSPACK_TCC 
 
 |