Skip to content

Commit 8ab9e4d

Browse files
authored
Use JuliaGPU Buildkite CI for GPU tests (#723)
* Use JuliaGPU Buildkite CI for GPU tests * Remove Codecov token * Add GPU tests * Print ENV * Fix merge error in runtests.jl * Add CI for AMDGPU and oneAPI * Fix GPU tests * Fix formatting of pipeline.yml * Disable `count_allocations` for GPU tests * Add single and double precision test * Add proper GPU tests (and fix problems) * Reformat * Disable double precision tests for oneAPI * Reformat * Reformat * Improve readability of boundary extrapolation condition * Fix EDAC and dam break plate gate on GPUs * Reformat * Fix open boundary system * Fix DEMSystem * Fix EDAC test * Undo Float32 literals in smoothing kernels * Fix allocations * First step to make packing work on GPUs * Second step to make packing system GPU compatible * Revert "Second step to make packing system GPU compatible" This reverts commit e1b86e3. * Revert "First step to make packing work on GPUs" This reverts commit c350289. * Fix particle packing system * Reformat * Fix doctests * Use manual trigger for buildkite * Revert "Use manual trigger for buildkite" This reverts commit 3ac3fc1. * Add trigger buildkite action * Fix action * Try action without if condition * Try again * Remove action again * Try more * Fix GPU tests * Reformat * Fix tests * Disable oneAPI tests * Fix * Add GPU support to README
1 parent aa6475b commit 8ab9e4d

File tree

30 files changed

+626
-157
lines changed

30 files changed

+626
-157
lines changed

.buildkite/pipeline.yml

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
steps:
2+
- label: "CUDA"
3+
plugins:
4+
- JuliaCI/julia#v1:
5+
version: "1"
6+
agents:
7+
queue: "juliagpu"
8+
cuda: "*"
9+
command: |
10+
julia --color=yes --project=test -e 'using Pkg; Pkg.add("CUDA"); Pkg.develop(path="."); Pkg.instantiate()'
11+
julia --color=yes --project=test -e 'include("test/runtests.jl")'
12+
env:
13+
TRIXIPARTICLES_TEST: cuda
14+
timeout_in_minutes: 60
15+
16+
- label: "AMDGPU"
17+
plugins:
18+
- JuliaCI/julia#v1:
19+
version: "1"
20+
agents:
21+
queue: "juliagpu"
22+
rocm: "*"
23+
command: |
24+
julia --color=yes --project=test -e 'using Pkg; Pkg.add("AMDGPU"); Pkg.develop(path="."); Pkg.instantiate()'
25+
julia --color=yes --project=test -e 'include("test/runtests.jl")'
26+
env:
27+
TRIXIPARTICLES_TEST: amdgpu
28+
timeout_in_minutes: 60
29+
30+
- label: "Metal"
31+
plugins:
32+
- JuliaCI/julia#v1:
33+
version: "1"
34+
agents:
35+
queue: "juliaecosystem"
36+
os: "macos"
37+
arch: "aarch64"
38+
command: |
39+
julia --color=yes --project=test -e 'using Pkg; Pkg.add("Metal"); Pkg.develop(path="."); Pkg.instantiate()'
40+
julia --color=yes --project=test -e 'include("test/runtests.jl")'
41+
env:
42+
TRIXIPARTICLES_TEST: metal
43+
timeout_in_minutes: 60
44+
45+
# Doesn't work. Fails with segfault. See https://github.com/trixi-framework/TrixiParticles.jl/issues/484.
46+
# - label: "oneAPI"
47+
# plugins:
48+
# - JuliaCI/julia#v1:
49+
# version: "1"
50+
# agents:
51+
# queue: "juliagpu"
52+
# intel: "*"
53+
# command: |
54+
# julia --color=yes --project=test -e 'using Pkg; Pkg.add("oneAPI"); Pkg.develop(path="."); Pkg.instantiate()'
55+
# julia --color=yes --project=test -e 'include("test/runtests.jl")'
56+
# env:
57+
# TRIXIPARTICLES_TEST: oneapi
58+
# timeout_in_minutes: 60

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ It offers intuitive configuration, robust pre- and post-processing, and vendor-a
3636
- Particle sampling of complex geometries from `.stl` and `.asc` files.
3737
- Output formats:
3838
- VTK
39+
- Support for GPUs by Nvidia, AMD and Apple (experimental)
3940

4041
## Examples
4142
We provide several example simulation setups in the `examples` folder (which can be accessed from Julia via `examples_dir()`).

examples/fluid/dam_break_2d_gpu.jl

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ using TrixiParticles
99
# Load setup from dam break example
1010
trixi_include(@__MODULE__,
1111
joinpath(examples_dir(), "fluid", "dam_break_2d.jl"),
12-
sol=nothing)
12+
sol=nothing, ode=nothing)
1313

1414
# Define a GPU-compatible neighborhood search
1515
min_corner = minimum(tank.boundary.coordinates, dims=2)
@@ -23,4 +23,7 @@ trixi_include(@__MODULE__,
2323
neighborhood_search=neighborhood_search,
2424
fluid_particle_spacing=fluid_particle_spacing,
2525
tspan=tspan,
26+
density_diffusion=density_diffusion,
27+
boundary_layers=boundary_layers, spacing_ratio=spacing_ratio,
28+
boundary_model=boundary_model,
2629
data_type=nothing)

examples/fluid/dam_break_3d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ boundary_system = BoundarySPHSystem(tank.boundary, boundary_model)
5757
# ==========================================================================================
5858
# ==== Simulation
5959
semi = Semidiscretization(fluid_system, boundary_system)
60-
ode = semidiscretize(semi, tspan)
60+
ode = semidiscretize(semi, tspan, data_type=nothing)
6161

6262
info_callback = InfoCallback(interval=10)
6363
saving_callback = SolutionSavingCallback(dt=0.02, prefix="")

examples/fluid/hydrostatic_water_column_2d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ boundary_system = BoundarySPHSystem(tank.boundary, boundary_model, movement=noth
6262
# ==========================================================================================
6363
# ==== Simulation
6464
semi = Semidiscretization(fluid_system, boundary_system)
65-
ode = semidiscretize(semi, tspan)
65+
ode = semidiscretize(semi, tspan, data_type=nothing)
6666

6767
info_callback = InfoCallback(interval=50)
6868
saving_callback = SolutionSavingCallback(dt=0.02, prefix="")

examples/fluid/periodic_channel_2d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ periodic_box = PeriodicBox(min_corner=[0.0, -0.25], max_corner=[1.0, 0.75])
6060
neighborhood_search = GridNeighborhoodSearch{2}(; periodic_box)
6161

6262
semi = Semidiscretization(fluid_system, boundary_system; neighborhood_search)
63-
ode = semidiscretize(semi, tspan)
63+
ode = semidiscretize(semi, tspan, data_type=nothing)
6464

6565
info_callback = InfoCallback(interval=100)
6666
saving_callback = SolutionSavingCallback(dt=0.02, prefix="")

examples/fluid/pipe_flow_2d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ boundary_system = BoundarySPHSystem(pipe.boundary, boundary_model)
125125
semi = Semidiscretization(fluid_system, open_boundary_in, open_boundary_out,
126126
boundary_system)
127127

128-
ode = semidiscretize(semi, tspan)
128+
ode = semidiscretize(semi, tspan, data_type=nothing)
129129

130130
info_callback = InfoCallback(interval=100)
131131
saving_callback = SolutionSavingCallback(dt=0.02, prefix="")

examples/fsi/dam_break_gate_2d.jl

+15-23
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# "Study of a complex fluid-structure dam-breaking benchmark problem using a multi-phase SPH method with APR".
55
# In: Engineering Analysis with Boundary Elements 104 (2019), pages 240-258.
66
# https://doi.org/10.1016/j.enganabound.2019.03.033
7+
#
8+
# Use a higher resolution and see the comments below regarding plate thickness
9+
# to reproduce the results from the paper.
710

811
using TrixiParticles
912
using OrdinaryDiffEq
@@ -14,7 +17,7 @@ using OrdinaryDiffEq
1417
# since "larger" particles don't fit through the slightly opened gate. Lower fluid
1518
# resolutions thereforce cause a later and more violent fluid impact against the gate.
1619
fluid_particle_spacing = 0.02
17-
n_particles_x = 5
20+
n_particles_x = 4
1821

1922
# Change spacing ratio to 3 and boundary layers to 1 when using Monaghan-Kajtar boundary model
2023
boundary_layers = 3
@@ -54,13 +57,15 @@ is_moving(t) = t < 0.1
5457

5558
gate_movement = BoundaryMovement(movement_function, is_moving)
5659

57-
# Elastic plate/beam
60+
# Elastic plate/beam.
61+
# The paper is using a thickness of 0.004, which only works properly when a similar fluid
62+
# resolution is used. Increase resolution and change to 0.004 to reproduce the results.
5863
length_beam = 0.09
59-
thickness = 0.004
64+
thickness = 0.004 * 10
6065
solid_density = 1161.54
6166

6267
# Young's modulus and Poisson ratio
63-
E = 3.5e6
68+
E = 3.5e6 / 10
6469
nu = 0.45
6570

6671
# The structure starts at the position of the first particle and ends
@@ -123,24 +128,11 @@ solid_smoothing_kernel = WendlandC2Kernel{2}()
123128
hydrodynamic_densites = fluid_density * ones(size(solid.density))
124129
hydrodynamic_masses = hydrodynamic_densites * solid_particle_spacing^2
125130

126-
k_solid = gravity * initial_fluid_size[2]
127-
beta_solid = fluid_particle_spacing / solid_particle_spacing
128-
boundary_model_solid = BoundaryModelMonaghanKajtar(k_solid, beta_solid,
129-
solid_particle_spacing,
130-
hydrodynamic_masses)
131-
132-
# `BoundaryModelDummyParticles` usually produces better results, since Monaghan-Kajtar BCs
133-
# tend to introduce a non-physical gap between fluid and boundary.
134-
# However, `BoundaryModelDummyParticles` can only be used when the plate thickness is
135-
# at least two fluid particle spacings, so that the compact support is fully sampled,
136-
# or fluid particles can penetrate the solid.
137-
# For higher fluid resolutions, uncomment the code below for better results.
138-
#
139-
# boundary_model_solid = BoundaryModelDummyParticles(hydrodynamic_densites,
140-
# hydrodynamic_masses,
141-
# state_equation=state_equation,
142-
# AdamiPressureExtrapolation(),
143-
# smoothing_kernel, smoothing_length)
131+
boundary_model_solid = BoundaryModelDummyParticles(hydrodynamic_densites,
132+
hydrodynamic_masses,
133+
state_equation=state_equation,
134+
AdamiPressureExtrapolation(),
135+
smoothing_kernel, smoothing_length)
144136

145137
solid_system = TotalLagrangianSPHSystem(solid,
146138
solid_smoothing_kernel, solid_smoothing_length,
@@ -152,7 +144,7 @@ solid_system = TotalLagrangianSPHSystem(solid,
152144
# ==== Simulation
153145
semi = Semidiscretization(fluid_system, boundary_system_tank,
154146
boundary_system_gate, solid_system)
155-
ode = semidiscretize(semi, tspan)
147+
ode = semidiscretize(semi, tspan, data_type=nothing)
156148

157149
info_callback = InfoCallback(interval=100)
158150
saving_callback = SolutionSavingCallback(dt=0.02, prefix="")

examples/solid/oscillating_beam_2d.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ solid_system = TotalLagrangianSPHSystem(solid, smoothing_kernel, smoothing_lengt
5656
# ==== Simulation
5757
semi = Semidiscretization(solid_system,
5858
neighborhood_search=PrecomputedNeighborhoodSearch{2}())
59-
ode = semidiscretize(semi, tspan)
59+
ode = semidiscretize(semi, tspan, data_type=nothing)
6060

6161
info_callback = InfoCallback(interval=100)
6262

src/general/gpu.jl

+2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
Adapt.@adapt_structure Semidiscretization
1111
Adapt.@adapt_structure WeaklyCompressibleSPHSystem
1212
Adapt.@adapt_structure DensityDiffusionAntuono
13+
Adapt.@adapt_structure EntropicallyDampedSPHSystem
1314
Adapt.@adapt_structure BoundarySPHSystem
1415
Adapt.@adapt_structure BoundaryModelDummyParticles
1516
Adapt.@adapt_structure BoundaryModelMonaghanKajtar
17+
Adapt.@adapt_structure BoundaryMovement
1618
Adapt.@adapt_structure TotalLagrangianSPHSystem
1719

1820
# The initial conditions are only used for initialization, which happens before `adapt`ing

src/general/neighborhood_search.jl

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ function PointNeighbors.foreach_point_neighbor(f, system::GPUSystem, neighbor_sy
1515
neighborhood_search;
1616
points=eachparticle(system),
1717
parallel=true)
18+
@assert parallel != false
19+
1820
# For `GPUSystem`s, explicitly pass the backend, so a `GPUSystem` with a CPU
1921
# backend will actually launch the KernelAbstractions.jl kernels on the CPU.
2022
foreach_point_neighbor(f, system_coords, neighbor_coords, neighborhood_search;

src/general/smoothing_kernels.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ end
257257
return result
258258
end
259259

260-
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 2.5 * h
260+
@inline compact_support(::SchoenbergQuarticSplineKernel, h) = 5 // 2 * h
261261

262262
@inline normalization_factor(::SchoenbergQuarticSplineKernel{1}, h) = 1 / 24h
263263
# `1199 * pi` is always `Float64`. `pi * h^2 * 1199` preserves the type of `h`.

src/general/system.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
abstract type System{NDIMS, IC} end
44

55
# When using KernelAbstractions.jl, the initial condition has been replaced by `nothing`
6-
GPUSystem = System{NDIMS, Nothing} where {NDIMS}
6+
const GPUSystem = System{<:Any, Nothing}
77

88
abstract type FluidSystem{NDIMS, IC} <: System{NDIMS, IC} end
99
timer_name(::FluidSystem) = "fluid"
@@ -26,7 +26,7 @@ end
2626
initialize!(system, neighborhood_search) = system
2727

2828
@inline Base.ndims(::System{NDIMS}) where {NDIMS} = NDIMS
29-
@inline Base.eltype(system::System) = eltype(system.initial_condition)
29+
@inline Base.eltype(system::System) = error("eltype not implemented for system $system")
3030

3131
# Number of integrated variables in the first component of the ODE system (coordinates)
3232
@inline u_nvariables(system) = ndims(system)

src/preprocessing/particle_packing/system.jl

+4
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ function Base.show(io::IO, ::MIME"text/plain", system::ParticlePackingSystem)
126126
end
127127
end
128128

129+
@inline function Base.eltype(::ParticlePackingSystem{<:Any, ELTYPE}) where {ELTYPE}
130+
return ELTYPE
131+
end
132+
129133
@inline function v_nvariables(system::ParticlePackingSystem)
130134
return ndims(system) * 2
131135
end

src/schemes/boundary/dummy_particles/dummy_particles.jl

+25-14
Original file line numberDiff line numberDiff line change
@@ -369,27 +369,38 @@ function compute_pressure!(boundary_model,
369369
neighbor_coords = current_coordinates(u_neighbor_system, neighbor_system)
370370

371371
# This is an optimization for simulations with large and complex boundaries.
372-
# Especially, in 3D simulations with large and/or complex structures outside
372+
# Especially in 3D simulations with large and/or complex structures outside
373373
# of areas with permanent flow.
374-
# Note: The version iterating neighbors first is not thread parallelizable.
374+
# Note: The version iterating neighbors first is not thread-parallelizable
375+
# and thus not GPU-compatible.
375376
# The factor is based on the achievable speed-up of the thread parallelizable version.
376-
if nparticles(system) >
377-
ceil(Int, Threads.nthreads() / 2) * nparticles(neighbor_system)
378-
nhs = get_neighborhood_search(neighbor_system, system, semi)
379-
380-
# Loop over fluid particles and then the neighboring boundary particles to extrapolate fluid pressure to the boundaries
381-
boundary_pressure_extrapolation_neighbor!(boundary_model, system,
382-
neighbor_system,
383-
system_coords, neighbor_coords, v,
384-
v_neighbor_system, nhs)
385-
else
377+
# Use the parallel version if the number of boundary particles is not much larger
378+
# than the number of fluid particles.
379+
n_boundary_particles = nparticles(system)
380+
n_fluid_particles = nparticles(neighbor_system)
381+
speedup = ceil(Int, Threads.nthreads() / 2)
382+
parallelize = system isa GPUSystem ||
383+
n_boundary_particles < speedup * n_fluid_particles
384+
if parallelize
386385
nhs = get_neighborhood_search(system, neighbor_system, semi)
387386

388-
# Loop over boundary particles and then the neighboring fluid particles to extrapolate fluid pressure to the boundaries
387+
# Loop over boundary particles and then the neighboring fluid particles
388+
# to extrapolate fluid pressure to the boundaries.
389389
boundary_pressure_extrapolation!(boundary_model, system,
390390
neighbor_system,
391391
system_coords, neighbor_coords, v,
392392
v_neighbor_system, nhs)
393+
else
394+
nhs = get_neighborhood_search(neighbor_system, system, semi)
395+
396+
# Loop over fluid particles and then the neighboring boundary particles
397+
# to extrapolate fluid pressure to the boundaries.
398+
# Note that this needs to be serial, as we are writing into the same
399+
# pressure entry from different loop iterations.
400+
boundary_pressure_extrapolation_neighbor!(boundary_model, system,
401+
neighbor_system,
402+
system_coords, neighbor_coords, v,
403+
v_neighbor_system, nhs)
393404
end
394405

395406
@threaded system for particle in eachparticle(system)
@@ -472,7 +483,7 @@ end
472483
(; pressure, cache, viscosity, density_calculator) = boundary_model
473484
(; pressure_offset) = density_calculator
474485

475-
# Loop over all pairs of particles and neighbors within the kernel cutoff.
486+
# Loop over all pairs of particles and neighbors within the kernel cutoff
476487
foreach_point_neighbor(system, neighbor_system, system_coords, neighbor_coords,
477488
neighborhood_search;
478489
points=eachparticle(system)) do particle, neighbor,

src/schemes/boundary/monaghan_kajtar/monaghan_kajtar.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ end
5959
# In order to avoid this, we clip the force at a "large" value, large enough to prevent
6060
# penetration when a reasonable `K` is used, but small enough to not cause instabilites
6161
# or super small time steps.
62-
distance_from_singularity = max(0.01 * boundary_particle_spacing,
62+
distance_from_singularity = max(boundary_particle_spacing / 100,
6363
distance - boundary_particle_spacing)
6464

6565
return K / beta^(ndims(particle_system) - 1) * pos_diff /
@@ -72,11 +72,11 @@ end
7272

7373
# TODO The neighborhood search fluid->boundary should use this search distance
7474
if q >= 2
75-
return 0.0
75+
return zero(eltype(r))
7676
end
7777

7878
# (Monaghan, Kajtar, 2009, Section 4): The kernel should be normalized to 1.77 for q=0
79-
return 1.77 / 32 * (1 + 5 / 2 * q + 2 * q^2) * (2 - q)^5
79+
return (177 // 100) // 32 * (1 + 5 // 2 * q + 2 * q^2) * (2 - q)^5
8080
end
8181

8282
@inline function particle_density(v, model::BoundaryModelMonaghanKajtar, system, particle)

src/schemes/boundary/open_boundary/system.jl

+4
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ function Base.show(io::IO, ::MIME"text/plain", system::OpenBoundarySPHSystem)
170170
end
171171
end
172172

173+
@inline function Base.eltype(::OpenBoundarySPHSystem{<:Any, <:Any, <:Any, ELTYPE}) where {ELTYPE}
174+
return ELTYPE
175+
end
176+
173177
function reset_callback_flag!(system::OpenBoundarySPHSystem)
174178
system.update_callback_used[] = false
175179

0 commit comments

Comments
 (0)