1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#ifndef ULMBLAS_IMPL_LEVEL1EXTENSIONS_GEAXPY_TCC
#define ULMBLAS_IMPL_LEVEL1EXTENSIONS_GEAXPY_TCC 1
#include <ulmblas/impl/level1extensions/geaxpy.h>
#include <ulmblas/impl/level1/axpy.h>
#include <ulmblas/impl/auxiliary/conjugate.h>
namespace ulmBLAS {
template <typename IndexType, typename Alpha, typename MX, typename MY>
void
geaxpy(IndexType m,
IndexType n,
const Alpha &alpha,
const MX *X,
IndexType incRowX,
IndexType incColX,
MY *Y,
IndexType incRowY,
IndexType incColY)
{
const IndexType UnitStride(1);
if (m<=0 || n<=0 || alpha==Alpha(0)) {
return;
}
if (incRowX==UnitStride && incRowY==UnitStride) {
//
// X and Y are both column major
//
for (IndexType j=0; j<n; ++j) {
axpy(m, alpha,
&X[j*incColX], UnitStride,
&Y[j*incColY], UnitStride);
}
} else if (incColX==UnitStride && incColY==UnitStride) {
//
// X and Y are both row major
//
for (IndexType i=0; i<m; ++i) {
axpy(n, alpha,
&X[i*incRowX], UnitStride,
&Y[i*incRowY], UnitStride);
}
} else {
//
// General case
//
for (IndexType j=0; j<n; ++j) {
for (IndexType i=0; i<m; ++i) {
Y[i*incRowY+j*incColY] += alpha*X[i*incRowX+j*incColX];
}
}
}
}
template <typename IndexType, typename Alpha, typename MX, typename MY>
void
geacxpy(IndexType m,
IndexType n,
const Alpha &alpha,
const MX *X,
IndexType incRowX,
IndexType incColX,
MY *Y,
IndexType incRowY,
IndexType incColY)
{
const IndexType UnitStride(1);
if (m<=0 || n<=0 || alpha==Alpha(0)) {
return;
}
if (incRowX==UnitStride && incRowY==UnitStride) {
//
// X and Y are both column major
//
for (IndexType j=0; j<n; ++j) {
acxpy(m, alpha,
&X[j*incColX], UnitStride,
&Y[j*incColY], UnitStride);
}
} else if (incColX==UnitStride && incColY==UnitStride) {
//
// X and Y are both row major
//
for (IndexType i=0; i<m; ++i) {
acxpy(n, alpha,
&X[i*incRowX], UnitStride,
&Y[i*incRowY], UnitStride);
}
} else {
//
// General case
//
for (IndexType j=0; j<n; ++j) {
for (IndexType i=0; i<m; ++i) {
Y[i*incRowY+j*incColY] += alpha
*conjugate(X[i*incRowX+j*incColX]);
}
}
}
}
} // namespace ulmBLAS
#endif // ULMBLAS_IMPL_LEVEL1EXTENSIONS_GEAXPY_TCC 1
|