lattice · weinbe2 · Oct 30, 2025 · Sep 26, 2022 · Sep 27, 2022 · Sep 27, 2022
@@ -166,9 +166,12 @@ option(QUDA_ARPACK_LOGGING "enable ARPACK logging (not available for NG)" OFF)
 # OpenBLAS
 option(QUDA_OPENBLAS "enable OpenBLAS" OFF)
 
+option(QUDA_QCD_PLUS_QED "Enable QCD+QED features (builds QUDA_RECONSTRUCT_9/13 instead of 8/12 for Wilson-type operators)" OFF)
+
 # Interface options
 option(QUDA_INTERFACE_QDP "build qdp interface" ON)
 option(QUDA_INTERFACE_MILC "build milc interface" ON)
+option(QUDA_INTERFACE_OPENQCD "build OpenQCD interface (for QCD+QED support, see QUDA_QCD_PLUS_QED)" OFF)
 option(QUDA_INTERFACE_CPS "build cps interface" OFF)
 option(QUDA_INTERFACE_QDPJIT "build qdpjit interface" OFF)
 option(QUDA_INTERFACE_BQCD "build bqcd interface" OFF)
@@ -318,6 +321,9 @@ if(QUDA_MPI AND QUDA_QMP)
     "Specifying QUDA_QMP and QUDA_MPI might result in undefined behavior. If you intend to use QMP set QUDA_MPI=OFF.")
 endif()
 
+if(QUDA_INTERFACE_OPENQCD AND QUDA_QMP)
+  message(SEND_ERROR "OpenQCD does not support QMP comms.")
+endif()
 
 if(QUDA_NVSHMEM AND NOT (QUDA_QMP OR QUDA_MPI))
   message(SEND_ERROR "Specifying QUDA_NVSHMEM requires either QUDA_QMP or QUDA_MPI.")

@@ -265,6 +265,7 @@ Advanced Scientific Computing (PASC21) [arXiv:2104.05615[hep-lat]].
 *  Joel Giedt (Rensselaer Polytechnic Institute) 
 *  Steven Gottlieb (Indiana University) 
 *  Anthony Grebe (Fermilab)
+*  Roman Gruber (ETH)
 *  Kyriakos Hadjiyiannakou (Cyprus)
 *  Ben Hoerz (Intel)
 *  Leon Hostetler (Indiana University)

@@ -159,6 +159,8 @@ namespace quda {
       inverse(param.inverse),
       clover(param.clover),
       cloverInv(param.cloverInv),
+      csw(param.csw),
+      coeff(param.coeff),
       twist_flavor(param.twist_flavor),
       mu2(param.mu2),
       epsilon2(param.epsilon2),

@@ -11,6 +11,7 @@
 #include <convert.h>
 #include <clover_field.h>
 #include <complex_quda.h>
+#include <index_helper.cuh>
 #include <quda_matrix.h>
 #include <color_spinor.h>
 #include <load_store.h>
@@ -637,7 +638,11 @@ namespace quda {
             errorQuda("Accessor reconstruct = %d does not match field reconstruct %d", enable_reconstruct,
                       clover.Reconstruct());
           if (clover.max_element(is_inverse) == 0.0 && isFixed<Float>::value)
+#ifdef BUILD_OPENQCD_INTERFACE
+            warningQuda("%p max_element(%d) appears unset", &clover, is_inverse); /* ignore if the SW-field is zero */
+#else
             errorQuda("%p max_element(%d) appears unset", &clover, is_inverse);
+#endif
           if (clover.Diagonal() == 0.0 && clover.Reconstruct()) errorQuda("%p diagonal appears unset", &clover);
           this->clover = clover_ ? clover_ : clover.data<Float *>(is_inverse);
         }
@@ -1015,6 +1020,97 @@ namespace quda {
         size_t Bytes() const { return length*sizeof(Float); }
       };
 
+      /**
+       * OpenQCD ordering for clover fields
+       */
+      template <typename Float, int length = 72> struct OpenQCDOrder {
+        static constexpr bool enable_reconstruct = false;
+        typedef typename mapper<Float>::type RegType;
+        Float *clover;
+        const int volumeCB;
+        const QudaTwistFlavorType twist_flavor;
+        const Float mu2;
+        const Float epsilon2;
+        const double coeff;
+        const double csw;
+        const double kappa;
+        const int dim[4]; // xyzt convention
+        const int L[4];   // txyz convention
+
+        OpenQCDOrder(const CloverField &clover, bool inverse, Float *clover_ = nullptr, void * = nullptr) :
+          volumeCB(clover.Stride()),
+          twist_flavor(clover.TwistFlavor()),
+          mu2(clover.Mu2()),
+          epsilon2(clover.Epsilon2()),
+          coeff(clover.Coeff()),
+          csw(clover.Csw()),
+          kappa(clover.Coeff() / clover.Csw()),
+          dim {clover.X()[0], clover.X()[1], clover.X()[2], clover.X()[3]}, // *local* lattice dimensions, xyzt
+          L {clover.X()[3], clover.X()[0], clover.X()[1], clover.X()[2]}    // *local* lattice dimensions, txyz
+        {
+          if (clover.Order() != QUDA_OPENQCD_CLOVER_ORDER) {
+            errorQuda("Invalid clover order %d for this accessor", clover.Order());
+          }
+          this->clover = clover_ ? clover_ : clover.data<Float *>(inverse);
+          if (clover.Coeff() == 0.0 || clover.Csw() == 0.0) { errorQuda("Neither coeff nor csw may be zero!"); }
+        }
+
+        QudaTwistFlavorType TwistFlavor() const { return twist_flavor; }
+        Float Mu2() const { return mu2; }
+        Float Epsilon2() const { return epsilon2; }
+
+        /**
+         * @brief      Gets the offset in Floats from the openQCD base pointer to
+         *             the spinor field.
+         *
+         * @param[in]  x_cb    Checkerboard index coming from quda
+         * @param[in]  parity  The parity coming from quda
+         *
+         * @return     The offset.
+         */
+        __device__ __host__ inline int getCloverOffset(int x_cb, int parity) const
+        {
+          int x_quda[4], x[4];
+          getCoords(x_quda, x_cb, dim, parity); // x_quda contains xyzt local Carthesian corrdinates
+          openqcd::rotate_coords(x_quda, x);    // xyzt -> txyz, x = openQCD local Carthesian lattice coordinate
+          return openqcd::ipt(x, L) * length;
+        }
+
+        /**
+         * @brief      Load a clover field at lattice point x_cb
+         *
+         * @param      v       The output clover matrix in QUDA order
+         * @param      x_cb    The checkerboarded lattice site
+         * @param      parity  The parity of the lattice site
+         */
+        __device__ __host__ inline void load(RegType v[length], int x_cb, int parity) const
+        {
+          int sign[36] = {-1, -1, -1, -1, -1, -1,                 // diagonals (idx 0-5)
+                          -1, +1, -1, +1, -1, -1, -1, -1, -1, -1, // column 0  (idx 6-15)
+                          -1, +1, -1, -1, -1, -1, -1, -1,         // column 1  (idx 16-23)
+                          -1, -1, -1, -1, -1, -1,                 // column 2  (idx 24-29)
+                          -1, +1, -1, +1,                         // column 3  (idx 30-33)
+                          -1, +1};                                // column 4  (idx 34-35)
+          int map[36] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 18, 19, 24, 25, 16, 17,
+                         12, 13, 20, 21, 26, 27, 14, 15, 22, 23, 28, 29, 30, 31, 32, 33, 34, 35};
+          const int M = length / 2;
+          int offset = getCloverOffset(x_cb, parity);
+          auto Ap = &clover[offset];     // A_+
+          auto Am = &clover[offset + M]; // A_-
+
+#pragma unroll
+          for (int i = 0; i < M; i++) {
+            v[i] = sign[i] * (kappa * Am[map[i]] - (i < 6));
+            v[M + i] = sign[i] * (kappa * Ap[map[i]] - (i < 6));
+          }
+        }
+
+        // FIXME implement the save routine for OpenQCD ordered fields
+        __device__ __host__ inline void save(RegType[length], int, int) const { }
+
+        size_t Bytes() const { return length * sizeof(Float); }
+      };
+
   } // namespace clover
 
   // Use traits to reduce the template explosion

@@ -232,6 +232,9 @@ namespace quda
       } else if (inv_param.dirac_order == QUDA_TIFR_PADDED_DIRAC_ORDER) {
         fieldOrder = QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER;
         siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
+      } else if (inv_param.dirac_order == QUDA_OPENQCD_DIRAC_ORDER) {
+        fieldOrder = QUDA_OPENQCD_FIELD_ORDER;
+        siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
       } else {
         errorQuda("Dirac order %d not supported", inv_param.dirac_order);
       }

@@ -1836,6 +1836,85 @@ namespace quda
       size_t Bytes() const { return nParity * volumeCB * Nc * Ns * 2 * sizeof(Float); }
     };
 
+    /**
+     * struct to define order of spinor fields in OpenQCD
+     *
+     * @tparam     Float  Underlying type of data (precision)
+     * @tparam     Ns     Number of spin degrees of freedom
+     * @tparam     Nc     Number of color degrees of freedom
+     */
+    template <typename Float, int Ns, int Nc> struct OpenQCDDiracOrder {
+      using Accessor = OpenQCDDiracOrder<Float, Ns, Nc>;
+      using real = typename mapper<Float>::type;
+      using complex = complex<real>;
+
+      static const int length = 2 * Ns * Nc; // 12 complex (2 floats) numbers per spinor color field
+      Float *field;
+      size_t offset;
+      Float *ghost[8];
+      int volumeCB;
+      int faceVolumeCB[4];
+      int nParity;
+      const int dim[4]; // xyzt convention
+      const int L[4];   // txyz convention
+
+      OpenQCDDiracOrder(const ColorSpinorField &a, int = 1, Float *field_ = 0, float * = 0) :
+        field(field_ ? field_ : a.data<Float *>()),
+        offset(a.Bytes() / (2 * sizeof(Float))), // TODO: What's this for??
+        volumeCB(a.VolumeCB()),
+        nParity(a.SiteSubset()),
+        dim {a.X(0), a.X(1), a.X(2), a.X(3)}, // *local* lattice dimensions, xyzt
+        L {a.X(3), a.X(0), a.X(1), a.X(2)}    // *local* lattice dimensions, txyz
+      {
+        if constexpr (length != 24) { errorQuda("Spinor field length %d not supported", length); }
+      }
+
+      /**
+       * @brief      Gets the offset in Floats from the openQCD base pointer to
+       *             the spinor field.
+       *
+       * @param[in]  x       Checkerboard index coming from quda
+       * @param[in]  parity  The parity coming from quda
+       *
+       * @return     The offset.
+       */
+      __device__ __host__ inline int getSpinorOffset(int x_cb, int parity) const
+      {
+        int x_quda[4], x[4];
+        getCoords(x_quda, x_cb, dim, parity); // x_quda contains xyzt local Carthesian corrdinates
+        openqcd::rotate_coords(x_quda, x);    // xyzt -> txyz, x = openQCD local Carthesian lattice coordinate
+        return openqcd::ipt(x, L) * length;
+      }
+
+      __device__ __host__ inline void load(complex v[length / 2], int x_cb, int parity = 0) const
+      {
+        auto in = &field[getSpinorOffset(x_cb, parity)];
+        block_load<complex, length / 2>(v, reinterpret_cast<const complex *>(in));
+      }
+
+      __device__ __host__ inline void save(const complex v[length / 2], int x_cb, int parity = 0) const
+      {
+        auto out = &field[getSpinorOffset(x_cb, parity)];
+        block_store<complex, length / 2>(reinterpret_cast<complex *>(out), v);
+      }
+
+      /**
+         @brief This accessor routine returns a colorspinor_wrapper to this object,
+         allowing us to overload various operators for manipulating at
+         the site level interms of matrix operations.
+         @param[in] x_cb Checkerboarded space-time index we are requesting
+         @param[in] parity Parity we are requesting
+         @return Instance of a colorspinor_wrapper that curries in access to
+         this field at the above coordinates.
+      */
+      __device__ __host__ inline auto operator()(int x_cb, int parity) const
+      {
+        return colorspinor_wrapper<real, Accessor>(*this, x_cb, parity);
+      }
+
+      size_t Bytes() const { return nParity * volumeCB * Nc * Ns * 2 * sizeof(Float); }
+    }; // openQCDDiracOrder
+
   } // namespace colorspinor
 
   // Use traits to reduce the template explosion

@@ -49,6 +49,13 @@ namespace quda
   */
   int comm_dim(int dim);
 
+  /**
+     Return whether the dimension dim is a C* dimension or not
+     @param dim Dimension which we are querying
+     @return C* dimension or nor
+  */
+  bool comm_dim_cstar(int dim);
+
   /**
      Return the global number of processes in the dimension dim
      @param dim Dimension which we are querying

@@ -41,6 +41,7 @@ namespace quda
     int (*coords)[QUDA_MAX_DIM];
     int my_rank;
     int my_coords[QUDA_MAX_DIM];
+    int cstar; // number of C* direction as per openQxD convention
     // It might be worth adding communicators to allow for efficient reductions:
     //   #if defined(MPI_COMMS)
     //     MPI_Comm comm;
@@ -126,9 +127,26 @@ namespace quda
   inline int comm_rank_displaced(const Topology *topo, const int displacement[])
   {
     int coords[QUDA_MAX_DIM];
-
-    for (int i = 0; i < QUDA_MAX_DIM; i++) {
-      coords[i] = (i < topo->ndim) ? mod(comm_coords(topo)[i] + displacement[i], comm_dims(topo)[i]) : 0;
+    int shift_integer;
+
+    int Nx_displacement = 0;
+    for (int i = QUDA_MAX_DIM - 1; i >= 0; i--) {
+      // cstar shift[x] shift[y] shift[z] shift[t]
+      // 0     0        0        0        0
+      // 1     0        0        0        0
+      // 2     0        1        0        0
+      // 3     0        1        1        0
+      if (i < topo->ndim && ((i == 1 && topo->cstar >= 2) || (i == 2 && topo->cstar >= 3))) {
+        // if we go over the boundary and have a shifted boundary condition,
+        // we shift Nx/2 ranks in x-direction:
+        // shift_integer       in { 0, 1, 2}
+        // (shift_integer - 1) in {-1, 0, 1}
+        shift_integer = (comm_coords(topo)[i] + displacement[i] + comm_dims(topo)[i]) / comm_dims(topo)[i];
+        Nx_displacement += (shift_integer - 1) * (comm_dims(topo)[0] / 2);
+      }
+      coords[i] = (i < topo->ndim) ?
+        mod(comm_coords(topo)[i] + displacement[i] + (i == 0 ? Nx_displacement : 0), comm_dims(topo)[i]) :
+        0;
     }
 
     return comm_rank_from_coords(topo, coords);
@@ -390,6 +408,12 @@ namespace quda
       return comm_dims(topo)[dim];
     }
 
+    bool comm_dim_cstar(int dim)
+    {
+      Topology *topo = comm_default_topology();
+      return (topo->cstar >= 2 && dim == 1) || (topo->cstar >= 3 && dim == 2);
+    }
+
     int comm_coord(int dim)
     {
       Topology *topo = comm_default_topology();

@@ -1133,6 +1133,15 @@ namespace quda {
     virtual QudaDiracType getDiracType() const override { return QUDA_MOBIUS_DOMAIN_WALLPC_EOFA_DIRAC; }
   };
 
+  /**
+   * @brief      Applies gamma matrices to spinor fields
+   *
+   * @param[out] out   Output field
+   * @param[in]  in    Input field
+   * @param[in]  dir   Direction index of gamma matrix
+   */
+  void ApplyGamma(cvector_ref<ColorSpinorField> &out, cvector_ref<const ColorSpinorField> &in, QudaGammaDirection_s dir);
+
   void gamma5(cvector_ref<ColorSpinorField> &out, cvector_ref<const ColorSpinorField> &in);
 
   /**

@@ -255,10 +255,11 @@ namespace quda
 
     /**
        @brief Computes Left/Right SVD from pre computed Right/Left
-       @param[in] evecs Computed eigenvectors of NormOp
-       @param[in] evals Computed eigenvalues of NormOp
+       @param[in,out] evecs Computed eigenvectors of NormOp
+       @param[in,out] evals Computed eigenvalues of NormOp
+       @param[in] dagger Whether NormOp was MdagM (false) or MMdag (true)
     */
-    void computeSVD(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals);
+    void computeSVD(std::vector<ColorSpinorField> &evecs, std::vector<Complex> &evals, bool dagger = false);
 
     /**
        @brief Compute eigenvalues and their residiua