trixi-framework
diff --git a/‎.buildkite/pipeline.yml
+58 b/‎.buildkite/pipeline.yml
+58
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎examples/fluid/dam_break_2d_gpu.jl
+4-1 b/‎examples/fluid/dam_break_2d_gpu.jl
+4-1
diff --git a/‎examples/fluid/dam_break_3d.jl
+1-1 b/‎examples/fluid/dam_break_3d.jl
+1-1
diff --git a/‎examples/fluid/hydrostatic_water_column_2d.jl
+1-1 b/‎examples/fluid/hydrostatic_water_column_2d.jl
+1-1
diff --git a/‎examples/fluid/periodic_channel_2d.jl
+1-1 b/‎examples/fluid/periodic_channel_2d.jl
+1-1
diff --git a/‎examples/fluid/pipe_flow_2d.jl
+1-1 b/‎examples/fluid/pipe_flow_2d.jl
+1-1
diff --git a/‎examples/fsi/dam_break_gate_2d.jl
+15-23 b/‎examples/fsi/dam_break_gate_2d.jl
+15-23
diff --git a/‎examples/solid/oscillating_beam_2d.jl
+1-1 b/‎examples/solid/oscillating_beam_2d.jl
+1-1
diff --git a/‎src/general/gpu.jl
+2 b/‎src/general/gpu.jl
+2
diff --git a/‎src/general/neighborhood_search.jl
+2 b/‎src/general/neighborhood_search.jl
+2
diff --git a/‎src/general/smoothing_kernels.jl
+1-1 b/‎src/general/smoothing_kernels.jl
+1-1
diff --git a/‎src/general/system.jl
+2-2 b/‎src/general/system.jl
+2-2
diff --git a/‎src/preprocessing/particle_packing/system.jl
+4 b/‎src/preprocessing/particle_packing/system.jl
+4
diff --git a/‎src/schemes/boundary/dummy_particles/dummy_particles.jl
+25-14 b/‎src/schemes/boundary/dummy_particles/dummy_particles.jl
+25-14
diff --git a/‎src/schemes/boundary/monaghan_kajtar/monaghan_kajtar.jl
+3-3 b/‎src/schemes/boundary/monaghan_kajtar/monaghan_kajtar.jl
+3-3
diff --git a/‎src/schemes/boundary/open_boundary/system.jl
+4 b/‎src/schemes/boundary/open_boundary/system.jl
+4
@@ -0,0 +1,58 @@
+steps:
+  - label: "CUDA"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1"
+    agents:
+      queue: "juliagpu"
+      cuda: "*"
+    command: |
+      julia --color=yes --project=test -e 'using Pkg; Pkg.add("CUDA"); Pkg.develop(path="."); Pkg.instantiate()'
+      julia --color=yes --project=test -e 'include("test/runtests.jl")'
+    env:
+      TRIXIPARTICLES_TEST: cuda
+    timeout_in_minutes: 60
+
+  - label: "AMDGPU"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1"
+    agents:
+      queue: "juliagpu"
+      rocm: "*"
+    command: |
+      julia --color=yes --project=test -e 'using Pkg; Pkg.add("AMDGPU"); Pkg.develop(path="."); Pkg.instantiate()'
+      julia --color=yes --project=test -e 'include("test/runtests.jl")'
+    env:
+      TRIXIPARTICLES_TEST: amdgpu
+    timeout_in_minutes: 60
+
+  - label: "Metal"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: "1"
+    agents:
+      queue: "juliaecosystem"
+      os: "macos"
+      arch: "aarch64"
+    command: |
+      julia --color=yes --project=test -e 'using Pkg; Pkg.add("Metal"); Pkg.develop(path="."); Pkg.instantiate()'
+      julia --color=yes --project=test -e 'include("test/runtests.jl")'
+    env:
+      TRIXIPARTICLES_TEST: metal
+    timeout_in_minutes: 60
+
+  # Doesn't work. Fails with segfault. See https://github.com/trixi-framework/TrixiParticles.jl/issues/484.
+  # - label: "oneAPI"
+  #   plugins:
+  #     - JuliaCI/julia#v1:
+  #         version: "1"
+  #   agents:
+  #     queue: "juliagpu"
+  #     intel: "*"
+  #   command: |
+  #     julia --color=yes --project=test -e 'using Pkg; Pkg.add("oneAPI"); Pkg.develop(path="."); Pkg.instantiate()'
+  #     julia --color=yes --project=test -e 'include("test/runtests.jl")'
+  #   env:
+  #     TRIXIPARTICLES_TEST: oneapi
+  #   timeout_in_minutes: 60
@@ -36,6 +36,7 @@ It offers intuitive configuration, robust pre- and post-processing, and vendor-a
 - Particle sampling of complex geometries from `.stl` and `.asc` files.
 - Output formats:
   - VTK
+- Support for GPUs by Nvidia, AMD and Apple (experimental)
 
 ## Examples
 We provide several example simulation setups in the `examples` folder (which can be accessed from Julia via `examples_dir()`).
 
@@ -9,7 +9,7 @@ using TrixiParticles
 # Load setup from dam break example
 trixi_include(@__MODULE__,
               joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
-              sol=nothing)
+              sol=nothing, ode=nothing)
 
 # Define a GPU-compatible neighborhood search
 min_corner = minimum(tank.boundary.coordinates, dims=2)
@@ -23,4 +23,7 @@ trixi_include(@__MODULE__,
               neighborhood_search=neighborhood_search,
               fluid_particle_spacing=fluid_particle_spacing,
               tspan=tspan,
+              density_diffusion=density_diffusion,
+              boundary_layers=boundary_layers, spacing_ratio=spacing_ratio,
+              boundary_model=boundary_model,
               data_type=nothing)
@@ -57,7 +57,7 @@ boundary_system = BoundarySPHSystem(tank.boundary, boundary_model)
 # ==========================================================================================
 # ==== Simulation
 semi = Semidiscretization(fluid_system, boundary_system)
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=10)
 saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
@@ -62,7 +62,7 @@ boundary_system = BoundarySPHSystem(tank.boundary, boundary_model, movement=noth
 # ==========================================================================================
 # ==== Simulation
 semi = Semidiscretization(fluid_system, boundary_system)
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=50)
 saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
@@ -60,7 +60,7 @@ periodic_box = PeriodicBox(min_corner=[0.0, -0.25], max_corner=[1.0, 0.75])
 neighborhood_search = GridNeighborhoodSearch{2}(; periodic_box)
 
 semi = Semidiscretization(fluid_system, boundary_system; neighborhood_search)
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=100)
 saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
@@ -125,7 +125,7 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
 semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
                           boundary_system)
 
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=100)
 saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
@@ -4,6 +4,9 @@
 # "Study of a complex fluid-structure dam-breaking benchmark problem using a multi-phase SPH method with APR".
 # In: Engineering Analysis with Boundary Elements 104 (2019), pages 240-258.
 # https://doi.org/10.1016/j.enganabound.2019.03.033
+#
+# Use a higher resolution and see the comments below regarding plate thickness
+# to reproduce the results from the paper.
 
 using TrixiParticles
 using OrdinaryDiffEq
@@ -14,7 +17,7 @@ using OrdinaryDiffEq
 # since "larger" particles don't fit through the slightly opened gate. Lower fluid
 # resolutions thereforce cause a later and more violent fluid impact against the gate.
 fluid_particle_spacing = 0.02
-n_particles_x = 5
+n_particles_x = 4
 
 # Change spacing ratio to 3 and boundary layers to 1 when using Monaghan-Kajtar boundary model
 boundary_layers = 3
@@ -54,13 +57,15 @@ is_moving(t) = t < 0.1
 
 gate_movement = BoundaryMovement(movement_function, is_moving)
 
-# Elastic plate/beam
+# Elastic plate/beam.
+# The paper is using a thickness of 0.004, which only works properly when a similar fluid
+# resolution is used. Increase resolution and change to 0.004 to reproduce the results.
 length_beam = 0.09
-thickness = 0.004
+thickness = 0.004 * 10
 solid_density = 1161.54
 
 # Young's modulus and Poisson ratio
-E = 3.5e6
+E = 3.5e6 / 10
 nu = 0.45
 
 # The structure starts at the position of the first particle and ends
@@ -123,24 +128,11 @@ solid_smoothing_kernel = WendlandC2Kernel{2}()
 hydrodynamic_densites = fluid_density * ones(size(solid.density))
 hydrodynamic_masses = hydrodynamic_densites * solid_particle_spacing^2
 
-k_solid = gravity * initial_fluid_size[2]
-beta_solid = fluid_particle_spacing / solid_particle_spacing
-boundary_model_solid = BoundaryModelMonaghanKajtar(k_solid, beta_solid,
-                                                   solid_particle_spacing,
-                                                   hydrodynamic_masses)
-
-# `BoundaryModelDummyParticles` usually produces better results, since Monaghan-Kajtar BCs
-# tend to introduce a non-physical gap between fluid and boundary.
-# However, `BoundaryModelDummyParticles` can only be used when the plate thickness is
-# at least two fluid particle spacings, so that the compact support is fully sampled,
-# or fluid particles can penetrate the solid.
-# For higher fluid resolutions, uncomment the code below for better results.
-#
-# boundary_model_solid = BoundaryModelDummyParticles(hydrodynamic_densites,
-#                                                    hydrodynamic_masses,
-#                                                    state_equation=state_equation,
-#                                                    AdamiPressureExtrapolation(),
-#                                                    smoothing_kernel, smoothing_length)
+boundary_model_solid = BoundaryModelDummyParticles(hydrodynamic_densites,
+                                                   hydrodynamic_masses,
+                                                   state_equation=state_equation,
+                                                   AdamiPressureExtrapolation(),
+                                                   smoothing_kernel, smoothing_length)
 
 solid_system = TotalLagrangianSPHSystem(solid,
                                         solid_smoothing_kernel, solid_smoothing_length,
@@ -152,7 +144,7 @@ solid_system = TotalLagrangianSPHSystem(solid,
 # ==== Simulation
 semi = Semidiscretization(fluid_system, boundary_system_tank,
                           boundary_system_gate, solid_system)
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=100)
 saving_callback = SolutionSavingCallback(dt=0.02, prefix="")
 
@@ -56,7 +56,7 @@ solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_lengt
 # ==== Simulation
 semi = Semidiscretization(solid_system,
                           neighborhood_search=PrecomputedNeighborhoodSearch{2}())
-ode = semidiscretize(semi, tspan)
+ode = semidiscretize(semi, tspan, data_type=nothing)
 
 info_callback = InfoCallback(interval=100)
 
 
@@ -10,9 +10,11 @@
 Adapt.@adapt_structure Semidiscretization
 Adapt.@adapt_structure WeaklyCompressibleSPHSystem
 Adapt.@adapt_structure DensityDiffusionAntuono
+Adapt.@adapt_structure EntropicallyDampedSPHSystem
 Adapt.@adapt_structure BoundarySPHSystem
 Adapt.@adapt_structure BoundaryModelDummyParticles
 Adapt.@adapt_structure BoundaryModelMonaghanKajtar
+Adapt.@adapt_structure BoundaryMovement
 Adapt.@adapt_structure TotalLagrangianSPHSystem
 
 # The initial conditions are only used for initialization, which happens before `adapt`ing
 
@@ -15,6 +15,8 @@ function PointNeighbors.foreach_point_neighbor(f, system::GPUSystem, neighbor_sy
                                                neighborhood_search;
                                                points=eachparticle(system),
                                                parallel=true)
+    @assert parallel != false
+
     # For `GPUSystem`s, explicitly pass the backend, so a `GPUSystem` with a CPU
     # backend will actually launch the KernelAbstractions.jl kernels on the CPU.
     foreach_point_neighbor(f, system_coords, neighbor_coords, neighborhood_search;
 
@@ -257,7 +257,7 @@ end
     return result
 end
 
-@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 2.5 * h
+@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
 
 @inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
 # `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.
 
@@ -3,7 +3,7 @@
 abstract type System{NDIMS, IC} end
 
 # When using KernelAbstractions.jl, the initial condition has been replaced by `nothing`
-GPUSystem = System{NDIMS, Nothing} where {NDIMS}
+const GPUSystem = System{<:Any, Nothing}
 
 abstract type FluidSystem{NDIMS, IC} <: System{NDIMS, IC} end
 timer_name(::FluidSystem) = "fluid"
@@ -26,7 +26,7 @@ end
 initialize!(system, neighborhood_search) = system
 
 @inline Base.ndims(::System{NDIMS}) where {NDIMS} = NDIMS
-@inline Base.eltype(system::System) = eltype(system.initial_condition)
+@inline Base.eltype(system::System) = error("eltype not implemented for system $system")
 
 # Number of integrated variables in the first component of the ODE system (coordinates)
 @inline u_nvariables(system) = ndims(system)
 
@@ -126,6 +126,10 @@ function Base.show(io::IO, ::MIME"text/plain", system::ParticlePackingSystem)
     end
 end
 
+@inline function Base.eltype(::ParticlePackingSystem{<:Any, ELTYPE}) where {ELTYPE}
+    return ELTYPE
+end
+
 @inline function v_nvariables(system::ParticlePackingSystem)
     return ndims(system) * 2
 end
 
@@ -369,27 +369,38 @@ function compute_pressure!(boundary_model,
         neighbor_coords = current_coordinates(u_neighbor_system, neighbor_system)
 
         # This is an optimization for simulations with large and complex boundaries.
-        # Especially, in 3D simulations with large and/or complex structures outside
+        # Especially in 3D simulations with large and/or complex structures outside
         # of areas with permanent flow.
-        # Note: The version iterating neighbors first is not thread parallelizable.
+        # Note: The version iterating neighbors first is not thread-parallelizable
+        #       and thus not GPU-compatible.
         # The factor is based on the achievable speed-up of the thread parallelizable version.
-        if nparticles(system) >
-           ceil(Int, Threads.nthreads() / 2) * nparticles(neighbor_system)
-            nhs = get_neighborhood_search(neighbor_system, system, semi)
-
-            # Loop over fluid particles and then the neighboring boundary particles to extrapolate fluid pressure to the boundaries
-            boundary_pressure_extrapolation_neighbor!(boundary_model, system,
-                                                      neighbor_system,
-                                                      system_coords, neighbor_coords, v,
-                                                      v_neighbor_system, nhs)
-        else
+        # Use the parallel version if the number of boundary particles is not much larger
+        # than the number of fluid particles.
+        n_boundary_particles = nparticles(system)
+        n_fluid_particles = nparticles(neighbor_system)
+        speedup = ceil(Int, Threads.nthreads() / 2)
+        parallelize = system isa GPUSystem ||
+                      n_boundary_particles < speedup * n_fluid_particles
+        if parallelize
             nhs = get_neighborhood_search(system, neighbor_system, semi)
 
-            # Loop over boundary particles and then the neighboring fluid particles to extrapolate fluid pressure to the boundaries
+            # Loop over boundary particles and then the neighboring fluid particles
+            # to extrapolate fluid pressure to the boundaries.
             boundary_pressure_extrapolation!(boundary_model, system,
                                              neighbor_system,
                                              system_coords, neighbor_coords, v,
                                              v_neighbor_system, nhs)
+        else
+            nhs = get_neighborhood_search(neighbor_system, system, semi)
+
+            # Loop over fluid particles and then the neighboring boundary particles
+            # to extrapolate fluid pressure to the boundaries.
+            # Note that this needs to be serial, as we are writing into the same
+            # pressure entry from different loop iterations.
+            boundary_pressure_extrapolation_neighbor!(boundary_model, system,
+                                                      neighbor_system,
+                                                      system_coords, neighbor_coords, v,
+                                                      v_neighbor_system, nhs)
         end
 
         @threaded system for particle in eachparticle(system)
@@ -472,7 +483,7 @@ end
     (; pressure, cache, viscosity, density_calculator) = boundary_model
     (; pressure_offset) = density_calculator
 
-    # Loop over all pairs of particles and neighbors within the kernel cutoff.
+    # Loop over all pairs of particles and neighbors within the kernel cutoff
     foreach_point_neighbor(system, neighbor_system, system_coords, neighbor_coords,
                            neighborhood_search;
                            points=eachparticle(system)) do particle, neighbor,
 
@@ -59,7 +59,7 @@ end
     # In order to avoid this, we clip the force at a "large" value, large enough to prevent
     # penetration when a reasonable `K` is used, but small enough to not cause instabilites
     # or super small time steps.
-    distance_from_singularity = max(0.01 * boundary_particle_spacing,
+    distance_from_singularity = max(boundary_particle_spacing / 100,
                                     distance - boundary_particle_spacing)
 
     return K / beta^(ndims(particle_system) - 1) * pos_diff /
@@ -72,11 +72,11 @@ end
 
     # TODO The neighborhood search fluid->boundary should use this search distance
     if q >= 2
-        return 0.0
+        return zero(eltype(r))
     end
 
     # (Monaghan, Kajtar, 2009, Section 4): The kernel should be normalized to 1.77 for q=0
-    return 1.77 / 32 * (1 + 5 / 2 * q + 2 * q^2) * (2 - q)^5
+    return (177 // 100) // 32 * (1 + 5 // 2 * q + 2 * q^2) * (2 - q)^5
 end
 
 @inline function particle_density(v, model::BoundaryModelMonaghanKajtar, system, particle)
 
@@ -170,6 +170,10 @@ function Base.show(io::IO, ::MIME"text/plain", system::OpenBoundarySPHSystem)
     end
 end
 
+@inline function Base.eltype(::OpenBoundarySPHSystem{<:Any, <:Any, <:Any, ELTYPE}) where {ELTYPE}
+    return ELTYPE
+end
+
 function reset_callback_flag!(system::OpenBoundarySPHSystem)
     system.update_callback_used[] = false