lattice
diff --git a/‎CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/gauge_force_quda.h‎
Lines changed: 0 additions & 36 deletions b/‎include/gauge_force_quda.h‎
Lines changed: 0 additions & 36 deletions
diff --git a/‎include/gauge_path_helper.cuh‎
Lines changed: 122 additions & 0 deletions b/‎include/gauge_path_helper.cuh‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎include/gauge_path_quda.h‎
Lines changed: 49 additions & 0 deletions b/‎include/gauge_path_quda.h‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎include/gauge_tools.h‎
Lines changed: 9 additions & 0 deletions b/‎include/gauge_tools.h‎
Lines changed: 9 additions & 0 deletions
@@ -64,19 +64,19 @@ if(BUILD_TYPE_VALID LESS 0)
   message(SEND_ERROR "Please specify a valid CMAKE_BUILD_TYPE type! Valid build types are:" "${VALID_BUILD_TYPES}")
 endif()
 
-# QUDA may be built to run using HIP or CUDA, which we call the
+# QUDA may be built to run using CUDA, HIP or SYCL, which we call the
 # Target type. By default, the target is CUDA.
 if(DEFINED ENV{QUDA_TARGET})
   set(DEFTARGET $ENV{QUDA_TARGET})
 else()
   set(DEFTARGET "CUDA")
 endif()
 
-set(VALID_TARGET_TYPES CUDA HIP)
+set(VALID_TARGET_TYPES CUDA HIP SYCL)
 set(QUDA_TARGET_TYPE
   "${DEFTARGET}"
   CACHE STRING "Choose the type of target, options are: ${VALID_TARGET_TYPES}")
-set_property(CACHE QUDA_TARGET_TYPE PROPERTY STRINGS CUDA HIP)
+set_property(CACHE QUDA_TARGET_TYPE PROPERTY STRINGS CUDA HIP SYCL)
 
 string(TOUPPER ${QUDA_TARGET_TYPE} CHECK_TARGET_TYPE)
 list(FIND VALID_TARGET_TYPES ${CHECK_TARGET_TYPE} TARGET_TYPE_VALID)
 
@@ -266,6 +266,7 @@ Advanced Scientific Computing (PASC21) [arXiv:2104.05615[hep-lat]].
 *  Steven Gottlieb (Indiana University) 
 *  Kyriakos Hadjiyiannakou (Cyprus)
 *  Dean Howarth (Lawrence Livermore Lab, Lawrence Berkeley Lab)
+*  Xiangyu Jiang (Chinese Academy of Sciences)
 *  Balint Joo (OLCF, Oak Ridge National Laboratory, formerly Jefferson Lab)
 *  Hyung-Jin Kim (Samsung Advanced Institute of Technology)
 *  Bartosz Kostrzewa (HPC/A-Lab, University of Bonn)
 
@@ -0,0 +1,122 @@
+#pragma once
+
+#include <gauge_field_order.h>
+#include <quda_matrix.h>
+#include <index_helper.cuh>
+#include <kernel.h>
+#include <thread_array.h>
+
+namespace quda {
+
+  template <int dim_>
+  struct paths {
+    static constexpr int dim = dim_;
+    const int num_paths;
+    const int max_length;
+    int *input_path[dim];
+    const int *length;
+    const double *path_coeff;
+    int *buffer;
+    int count;
+
+    paths(std::vector<int**>& input_path, std::vector<int>& length_h, std::vector<double>& path_coeff_h, int num_paths, int max_length) :
+      num_paths(num_paths),
+      max_length(max_length),
+      count(0)
+    {
+      if (static_cast<int>(input_path.size()) != dim)
+        errorQuda("Input path vector is of size %lu, expected %d", input_path.size(), dim);
+      if (static_cast<int>(length_h.size()) != num_paths)
+        errorQuda("Path length vector is of size %lu, expected %d", length_h.size(), num_paths);
+      if (static_cast<int>(path_coeff_h.size()) != num_paths)
+        errorQuda("Path coefficient vector is of size %lu, expected %d", path_coeff_h.size(), num_paths);
+
+      // create path struct in a single allocation
+      size_t bytes = dim * num_paths * max_length * sizeof(int) + num_paths * sizeof(int);
+      int pad = ((sizeof(double) - bytes % sizeof(double)) % sizeof(double))/sizeof(int);
+      bytes += pad*sizeof(int) + num_paths*sizeof(double);
+
+      buffer = static_cast<int*>(pool_device_malloc(bytes));
+      int *path_h = static_cast<int*>(safe_malloc(bytes));
+      memset(path_h, 0, bytes);
+
+      for (int dir=0; dir<dim; dir++) {
+        // flatten the input_path array for copying to the device
+        for (int i = 0; i < num_paths; i++) {
+          for (int j = 0; j < length_h[i]; j++) {
+            path_h[dir * num_paths * max_length + i * max_length + j] = input_path[dir][i][j];
+            if (dir==0) count++;
+          }
+        }
+      }
+
+      // length array
+      memcpy(path_h + dim * num_paths * max_length, length_h.data(), num_paths*sizeof(int));
+
+      // path_coeff array
+      memcpy(path_h + dim * num_paths * max_length + num_paths + pad, path_coeff_h.data(), num_paths*sizeof(double));
+
+      qudaMemcpy(buffer, path_h, bytes, qudaMemcpyHostToDevice);
+      host_free(path_h);
+
+      // finally set the pointers to the correct offsets in the buffer
+      for (int d=0; d < dim; d++) this->input_path[d] = buffer + d*num_paths*max_length;
+      length = buffer + dim*num_paths*max_length;
+      path_coeff = reinterpret_cast<double*>(buffer + dim * num_paths * max_length + num_paths + pad);
+    }
+
+    void free() {
+      pool_device_free(buffer);
+    }
+  };
+
+  constexpr int flipDir(int dir) { return (7-dir); }
+  constexpr bool isForwards(int dir) { return (dir <= 3); }
+
+  /**
+     @brief Calculates an arbitary gauge path, returning the product matrix
+
+     @return The product of the gauge path
+     @param[in] arg Kernel argumnt
+     @param[in] x Full index array
+     @param[in] parity Parity index (note: assumes that an offset from a non-zero dx is baked in)
+     @param[in] path Gauge link path
+     @param[in] length Length of gauge path
+     @param[in] dx Temporary shared memory storage for relative coordinate shift
+  */
+  template <typename Arg, typename I>
+  __device__ __host__ inline typename Arg::Link
+  computeGaugePath(const Arg &arg, int x[4], int parity, const int* path, int length, I& dx)
+  {
+    using Link = typename Arg::Link;
+
+    // linkA: current matrix
+    // linkB: the loaded matrix in this round
+    Link linkA, linkB;
+    setIdentity(&linkA);
+
+    int nbr_oddbit = parity;
+
+    for (int j = 0; j < length; j++) {
+
+      int pathj = path[j];
+      int lnkdir = isForwards(pathj) ? pathj : flipDir(pathj);
+
+      if (isForwards(pathj)) {
+        linkB = arg.u(lnkdir, linkIndexShift(x,dx,arg.E), nbr_oddbit);
+        linkA = linkA * linkB;
+        dx[lnkdir]++; // now have to update to new location
+        nbr_oddbit = nbr_oddbit^1;
+      } else {
+        dx[lnkdir]--; // if we are going backwards the link is on the adjacent site
+        nbr_oddbit = nbr_oddbit^1;
+        linkB = arg.u(lnkdir, linkIndexShift(x,dx,arg.E), nbr_oddbit);
+        linkA = linkA * conj(linkB);
+      }
+    } //j
+
+    return linkA;
+  }
+
+}
+
@@ -0,0 +1,49 @@
+#pragma once
+
+namespace quda
+{
+
+  /**
+     @brief Compute the gauge-force contribution to the momentum
+     @param[out] mom Momentum field
+     @param[in] u Gauge field (extended when running on multiple GPUs)
+     @param[in] coeff Step-size coefficient
+     @param[in] input_path Host-array holding all path contributions for the gauge action
+     @param[in] length Host array holding the length of all paths
+     @param[in] path_coeff Coefficient of each path
+     @param[in] num_paths Numer of paths
+     @param[in] max_length Maximum length of each path
+   */
+  void gaugeForce(GaugeField &mom, const GaugeField &u, double coeff, std::vector<int **> &input_path,
+                  std::vector<int> &length, std::vector<double> &path_coeff, int num_paths, int max_length);
+
+  /**
+     @brief Compute the product of gauge-links along the given path
+     @param[out] out Gauge field which the result is added to
+     @param[in] u Gauge field (extended when running on multiple GPUs)
+     @param[in] coeff Global coefficient for the result
+     @param[in] input_path Host-array holding all path contributions
+     @param[in] length Host array holding the length of all paths
+     @param[in] path_coeff Coefficient of each path
+     @param[in] num_paths Numer of paths
+     @param[in] max_length Maximum length of each path
+   */
+  void gaugePath(GaugeField &out, const GaugeField &u, double coeff, std::vector<int **> &input_path,
+                 std::vector<int> &length, std::vector<double> &path_coeff, int num_paths, int max_length);
+
+  /**
+     @brief Compute the trace of an arbitrary set of gauge loops
+     @param[in] u Gauge field (extended when running on multiple GPUs)
+     @param[in, out] loop_traces Output traces of loops
+     @param[in] input_path Host-array holding all path contributions for the gauge action
+     @param[in] factor Multiplicative factor for each loop (i.e., volume normalization, etc)
+     @param[in] length Host array holding the length of all paths
+     @param[in] path_coeff Coefficient of each path
+     @param[in] num_paths Numer of paths
+     @param[in] path_max_length Maximum length of each path
+   */
+  void gaugeLoopTrace(const GaugeField &u, std::vector<Complex> &loop_traces, double factor,
+                      std::vector<int **> &input_path, std::vector<int> &length, std::vector<double> &path_coeff_h,
+                      int num_paths, int path_max_length);
+
+} // namespace quda
@@ -169,4 +169,13 @@ namespace quda
   */
   void computeQChargeDensity(double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu);
 
+  /**
+   * @brief Compute the trace of the Polyakov loop in a given dimension
+   * @param[out] ploop The real and imaginary parts of the Polyakov loop
+   * @param[in] gauge The gauge field upon which to compute the Polyakov loop
+   * @param[in] dir The direction to compute the Polyakov loop in
+   * @param[in] profile TimeProfile instance used for profiling.
+   */
+  void gaugePolyakovLoop(double ploop[2], const GaugeField &u, int dir, TimeProfile &profile);
+
 } // namespace quda