| 
  12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 
 | 
/** Copyright (C) 2014, The University of Texas at Austin
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  - Neither the name of The University of Texas at Austin nor the names
 *    of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
 
 /*
 * Copyright (C) 2014-2015, Michael Lehn
 *
 * ulmBLAS adopted general ideas from BLIS.  Using micro kernels from BLIS
 * only requires minor modifications,
 *
 */
 
 #ifndef ULMBLAS_IMPL_LEVEL3_PACK_TRUPACK_TCC
 #define ULMBLAS_IMPL_LEVEL3_PACK_TRUPACK_TCC 1
 
 #include <ulmblas/impl/auxiliary/conjugate.h>
 #include <ulmblas/impl/level3/pack/trupack.h>
 #include <ulmblas/impl/level3/ukernel/ugemm.h>
 
 namespace ulmBLAS {
 
 template <typename IndexType, typename TL, typename Buffer>
 static void
 trupack_MRxk(IndexType   k,
 bool        conj,
 bool        unit,
 const TL    *U,
 IndexType   incRowU,
 IndexType   incColU,
 Buffer      *buffer)
 {
 const IndexType MR  = BlockSizeUGemm<Buffer>::MR;
 
 if (!conj) {
 for (IndexType j=0; j<MR; ++j) {
 for (IndexType i=0; i<j; ++i) {
 buffer[i] = U[i*incRowU];
 }
 buffer[j] = (unit) ? Buffer(1) : U[j*incRowU];
 for (IndexType i=j+1; i<MR; ++i) {
 buffer[i] = Buffer(0);
 }
 buffer += MR;
 U      += incColU;
 }
 } else {
 for (IndexType j=0; j<MR; ++j) {
 for (IndexType i=0; i<j; ++i) {
 buffer[i] = conjugate(U[i*incRowU]);
 }
 buffer[j] = (unit) ? Buffer(1) : conjugate(U[j*incRowU]);
 for (IndexType i=j+1; i<MR; ++i) {
 buffer[i] = Buffer(0);
 }
 buffer += MR;
 U      += incColU;
 }
 }
 for (IndexType j=0; j<k-MR; ++j) {
 for (IndexType i=0; i<MR; ++i) {
 buffer[i] = conjugate(U[i*incRowU], conj);
 }
 buffer += MR;
 U      += incColU;
 }
 }
 
 template <typename IndexType, typename TU, typename Buffer>
 void
 trupack(IndexType   mc,
 bool        conj,
 bool        unit,
 const TU    *U,
 IndexType   incRowU,
 IndexType   incColU,
 Buffer      *buffer)
 {
 const IndexType MR  = BlockSizeUGemm<Buffer>::MR;
 const IndexType mp  = mc / MR;
 const IndexType mr_ = mc % MR;
 
 for (IndexType i=0; i<mp; ++i) {
 trupack_MRxk(mc-i*MR, conj, unit, U, incRowU, incColU, buffer);
 buffer += (mc-i*MR)*MR;
 U      += MR*(incRowU+incColU);
 }
 if (mr_>0) {
 for (IndexType j=0; j<mr_; ++j) {
 for (IndexType i=0; i<j; ++i) {
 buffer[i] = conjugate(U[i*incRowU], conj);
 }
 buffer[j] = (unit) ? Buffer(1) : conjugate(U[j*incRowU], conj);
 for (IndexType i=j+1; i<MR; ++i) {
 buffer[i] = Buffer(0);
 }
 buffer += MR;
 U      += incColU;
 }
 }
 }
 
 } // namespace ulmBLAS
 
 #endif // ULMBLAS_IMPL_LEVEL3_PACK_TRUPACK_TCC
 
 |