Skip to content

Commit a595b02

Browse files
Merge branch 'develop' of github.com:lattice/quda into feature/multi-rhs
2 parents b607f03 + e2415ef commit a595b02

40 files changed

+2186
-380
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,19 @@ if(BUILD_TYPE_VALID LESS 0)
6464
message(SEND_ERROR "Please specify a valid CMAKE_BUILD_TYPE type! Valid build types are:" "${VALID_BUILD_TYPES}")
6565
endif()
6666

67-
# QUDA may be built to run using HIP or CUDA, which we call the
67+
# QUDA may be built to run using CUDA, HIP or SYCL, which we call the
6868
# Target type. By default, the target is CUDA.
6969
if(DEFINED ENV{QUDA_TARGET})
7070
set(DEFTARGET $ENV{QUDA_TARGET})
7171
else()
7272
set(DEFTARGET "CUDA")
7373
endif()
7474

75-
set(VALID_TARGET_TYPES CUDA HIP)
75+
set(VALID_TARGET_TYPES CUDA HIP SYCL)
7676
set(QUDA_TARGET_TYPE
7777
"${DEFTARGET}"
7878
CACHE STRING "Choose the type of target, options are: ${VALID_TARGET_TYPES}")
79-
set_property(CACHE QUDA_TARGET_TYPE PROPERTY STRINGS CUDA HIP)
79+
set_property(CACHE QUDA_TARGET_TYPE PROPERTY STRINGS CUDA HIP SYCL)
8080

8181
string(TOUPPER ${QUDA_TARGET_TYPE} CHECK_TARGET_TYPE)
8282
list(FIND VALID_TARGET_TYPES ${CHECK_TARGET_TYPE} TARGET_TYPE_VALID)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ Advanced Scientific Computing (PASC21) [arXiv:2104.05615[hep-lat]].
266266
* Steven Gottlieb (Indiana University)
267267
* Kyriakos Hadjiyiannakou (Cyprus)
268268
* Dean Howarth (Lawrence Livermore Lab, Lawrence Berkeley Lab)
269+
* Xiangyu Jiang (Chinese Academy of Sciences)
269270
* Balint Joo (OLCF, Oak Ridge National Laboratory, formerly Jefferson Lab)
270271
* Hyung-Jin Kim (Samsung Advanced Institute of Technology)
271272
* Bartosz Kostrzewa (HPC/A-Lab, University of Bonn)

include/gauge_force_quda.h

Lines changed: 0 additions & 36 deletions
This file was deleted.

include/gauge_path_helper.cuh

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#pragma once
2+
3+
#include <gauge_field_order.h>
4+
#include <quda_matrix.h>
5+
#include <index_helper.cuh>
6+
#include <kernel.h>
7+
#include <thread_array.h>
8+
9+
namespace quda {
10+
11+
template <int dim_>
12+
struct paths {
13+
static constexpr int dim = dim_;
14+
const int num_paths;
15+
const int max_length;
16+
int *input_path[dim];
17+
const int *length;
18+
const double *path_coeff;
19+
int *buffer;
20+
int count;
21+
22+
paths(std::vector<int**>& input_path, std::vector<int>& length_h, std::vector<double>& path_coeff_h, int num_paths, int max_length) :
23+
num_paths(num_paths),
24+
max_length(max_length),
25+
count(0)
26+
{
27+
if (static_cast<int>(input_path.size()) != dim)
28+
errorQuda("Input path vector is of size %lu, expected %d", input_path.size(), dim);
29+
if (static_cast<int>(length_h.size()) != num_paths)
30+
errorQuda("Path length vector is of size %lu, expected %d", length_h.size(), num_paths);
31+
if (static_cast<int>(path_coeff_h.size()) != num_paths)
32+
errorQuda("Path coefficient vector is of size %lu, expected %d", path_coeff_h.size(), num_paths);
33+
34+
// create path struct in a single allocation
35+
size_t bytes = dim * num_paths * max_length * sizeof(int) + num_paths * sizeof(int);
36+
int pad = ((sizeof(double) - bytes % sizeof(double)) % sizeof(double))/sizeof(int);
37+
bytes += pad*sizeof(int) + num_paths*sizeof(double);
38+
39+
buffer = static_cast<int*>(pool_device_malloc(bytes));
40+
int *path_h = static_cast<int*>(safe_malloc(bytes));
41+
memset(path_h, 0, bytes);
42+
43+
for (int dir=0; dir<dim; dir++) {
44+
// flatten the input_path array for copying to the device
45+
for (int i = 0; i < num_paths; i++) {
46+
for (int j = 0; j < length_h[i]; j++) {
47+
path_h[dir * num_paths * max_length + i * max_length + j] = input_path[dir][i][j];
48+
if (dir==0) count++;
49+
}
50+
}
51+
}
52+
53+
// length array
54+
memcpy(path_h + dim * num_paths * max_length, length_h.data(), num_paths*sizeof(int));
55+
56+
// path_coeff array
57+
memcpy(path_h + dim * num_paths * max_length + num_paths + pad, path_coeff_h.data(), num_paths*sizeof(double));
58+
59+
qudaMemcpy(buffer, path_h, bytes, qudaMemcpyHostToDevice);
60+
host_free(path_h);
61+
62+
// finally set the pointers to the correct offsets in the buffer
63+
for (int d=0; d < dim; d++) this->input_path[d] = buffer + d*num_paths*max_length;
64+
length = buffer + dim*num_paths*max_length;
65+
path_coeff = reinterpret_cast<double*>(buffer + dim * num_paths * max_length + num_paths + pad);
66+
}
67+
68+
void free() {
69+
pool_device_free(buffer);
70+
}
71+
};
72+
73+
constexpr int flipDir(int dir) { return (7-dir); }
74+
constexpr bool isForwards(int dir) { return (dir <= 3); }
75+
76+
/**
77+
@brief Calculates an arbitary gauge path, returning the product matrix
78+
79+
@return The product of the gauge path
80+
@param[in] arg Kernel argumnt
81+
@param[in] x Full index array
82+
@param[in] parity Parity index (note: assumes that an offset from a non-zero dx is baked in)
83+
@param[in] path Gauge link path
84+
@param[in] length Length of gauge path
85+
@param[in] dx Temporary shared memory storage for relative coordinate shift
86+
*/
87+
template <typename Arg, typename I>
88+
__device__ __host__ inline typename Arg::Link
89+
computeGaugePath(const Arg &arg, int x[4], int parity, const int* path, int length, I& dx)
90+
{
91+
using Link = typename Arg::Link;
92+
93+
// linkA: current matrix
94+
// linkB: the loaded matrix in this round
95+
Link linkA, linkB;
96+
setIdentity(&linkA);
97+
98+
int nbr_oddbit = parity;
99+
100+
for (int j = 0; j < length; j++) {
101+
102+
int pathj = path[j];
103+
int lnkdir = isForwards(pathj) ? pathj : flipDir(pathj);
104+
105+
if (isForwards(pathj)) {
106+
linkB = arg.u(lnkdir, linkIndexShift(x,dx,arg.E), nbr_oddbit);
107+
linkA = linkA * linkB;
108+
dx[lnkdir]++; // now have to update to new location
109+
nbr_oddbit = nbr_oddbit^1;
110+
} else {
111+
dx[lnkdir]--; // if we are going backwards the link is on the adjacent site
112+
nbr_oddbit = nbr_oddbit^1;
113+
linkB = arg.u(lnkdir, linkIndexShift(x,dx,arg.E), nbr_oddbit);
114+
linkA = linkA * conj(linkB);
115+
}
116+
} //j
117+
118+
return linkA;
119+
}
120+
121+
}
122+

include/gauge_path_quda.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#pragma once
2+
3+
namespace quda
4+
{
5+
6+
/**
7+
@brief Compute the gauge-force contribution to the momentum
8+
@param[out] mom Momentum field
9+
@param[in] u Gauge field (extended when running on multiple GPUs)
10+
@param[in] coeff Step-size coefficient
11+
@param[in] input_path Host-array holding all path contributions for the gauge action
12+
@param[in] length Host array holding the length of all paths
13+
@param[in] path_coeff Coefficient of each path
14+
@param[in] num_paths Numer of paths
15+
@param[in] max_length Maximum length of each path
16+
*/
17+
void gaugeForce(GaugeField &mom, const GaugeField &u, double coeff, std::vector<int **> &input_path,
18+
std::vector<int> &length, std::vector<double> &path_coeff, int num_paths, int max_length);
19+
20+
/**
21+
@brief Compute the product of gauge-links along the given path
22+
@param[out] out Gauge field which the result is added to
23+
@param[in] u Gauge field (extended when running on multiple GPUs)
24+
@param[in] coeff Global coefficient for the result
25+
@param[in] input_path Host-array holding all path contributions
26+
@param[in] length Host array holding the length of all paths
27+
@param[in] path_coeff Coefficient of each path
28+
@param[in] num_paths Numer of paths
29+
@param[in] max_length Maximum length of each path
30+
*/
31+
void gaugePath(GaugeField &out, const GaugeField &u, double coeff, std::vector<int **> &input_path,
32+
std::vector<int> &length, std::vector<double> &path_coeff, int num_paths, int max_length);
33+
34+
/**
35+
@brief Compute the trace of an arbitrary set of gauge loops
36+
@param[in] u Gauge field (extended when running on multiple GPUs)
37+
@param[in, out] loop_traces Output traces of loops
38+
@param[in] input_path Host-array holding all path contributions for the gauge action
39+
@param[in] factor Multiplicative factor for each loop (i.e., volume normalization, etc)
40+
@param[in] length Host array holding the length of all paths
41+
@param[in] path_coeff Coefficient of each path
42+
@param[in] num_paths Numer of paths
43+
@param[in] path_max_length Maximum length of each path
44+
*/
45+
void gaugeLoopTrace(const GaugeField &u, std::vector<Complex> &loop_traces, double factor,
46+
std::vector<int **> &input_path, std::vector<int> &length, std::vector<double> &path_coeff_h,
47+
int num_paths, int path_max_length);
48+
49+
} // namespace quda

include/gauge_tools.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,13 @@ namespace quda
169169
*/
170170
void computeQChargeDensity(double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu);
171171

172+
/**
173+
* @brief Compute the trace of the Polyakov loop in a given dimension
174+
* @param[out] ploop The real and imaginary parts of the Polyakov loop
175+
* @param[in] gauge The gauge field upon which to compute the Polyakov loop
176+
* @param[in] dir The direction to compute the Polyakov loop in
177+
* @param[in] profile TimeProfile instance used for profiling.
178+
*/
179+
void gaugePolyakovLoop(double ploop[2], const GaugeField &u, int dir, TimeProfile &profile);
180+
172181
} // namespace quda

0 commit comments

Comments
 (0)