Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/cpp/llama-cpp/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ else
if [ -d "$CURDIR/lib/rocblas/library" ]; then
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
fi
# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
# the bundled data or it falls back to slow generic kernels (issue #10660).
if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
fi
fi

# If there is a lib/ld.so, use it
Expand Down
6 changes: 6 additions & 0 deletions backend/cpp/turboquant/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ else
if [ -d "$CURDIR/lib/rocblas/library" ]; then
export ROCBLAS_TENSILE_LIBPATH="$CURDIR"/lib/rocblas/library
fi
# Same for hipBLASLt (rocblaslt): the bundled libhipblaslt.so resolves its
# TensileLibrary_lazy_gfx*.dat kernel data relative to itself, so point it at
# the bundled data or it falls back to slow generic kernels (issue #10660).
if [ -d "$CURDIR/lib/hipblaslt/library" ]; then
export HIPBLASLT_TENSILE_LIBPATH="$CURDIR"/lib/hipblaslt/library
fi
fi

# If there is a lib/ld.so, use it
Expand Down
57 changes: 57 additions & 0 deletions scripts/build/package-gpu-libs-rocm-data_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash
# Regression test for scripts/build/package-gpu-libs.sh ROCm data bundling.
#
# Guards issue #10660: hipBLASLt (rocblaslt) resolves its TensileLibrary_lazy_gfx*.dat
# kernel data relative to the bundled libhipblaslt.so. The packager copied the
# rocblas/ data dir but not the hipblaslt/ data dir, so the bundled backend
# fell back to slow generic kernels and logged
# rocblaslt error: Cannot read "TensileLibrary_lazy_gfx1201.dat": No such file or directory
#
# This test fabricates a fake ROCm tree containing both rocblas/ and hipblaslt/
# tensile data, points the packager at it via ROCM_BASE_DIRS, and asserts BOTH
# data directories are bundled into the target lib dir.
set -euo pipefail

CURDIR=$(dirname "$(realpath "$0")")
SCRIPT="$CURDIR/package-gpu-libs.sh"

WORK=$(mktemp -d)
trap 'rm -rf "$WORK"' EXIT

# Fabricate a fake ROCm install with both rocblas and hipblaslt tensile data.
FAKE_ROCM="$WORK/opt/rocm"
mkdir -p "$FAKE_ROCM/lib/rocblas/library"
mkdir -p "$FAKE_ROCM/lib/hipblaslt/library"
echo "fake rocblas tensile" > "$FAKE_ROCM/lib/rocblas/library/TensileLibrary_lazy_gfx1201.dat"
echo "fake hipblaslt tensile" > "$FAKE_ROCM/lib/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat"

TARGET="$WORK/target"
mkdir -p "$TARGET"

# shellcheck source=/dev/null
source "$SCRIPT" "$TARGET"

# Point the data-dir copy at the fabricated tree instead of the real /opt/rocm,
# then run the actual ROCm packager. This asserts package_rocm_libs itself
# bundles BOTH data dirs, not just that the helper works in isolation.
export BUILD_TYPE=hipblas
export ROCM_BASE_DIRS="$FAKE_ROCM"
package_rocm_libs

fail=false
if [ ! -e "$TARGET/rocblas/library/TensileLibrary_lazy_gfx1201.dat" ]; then
echo "FAIL: rocblas tensile data was NOT bundled"
fail=true
fi
if [ ! -e "$TARGET/hipblaslt/library/TensileLibrary_lazy_gfx1201.dat" ]; then
echo "FAIL: hipblaslt tensile data was NOT bundled (regression of #10660)"
fail=true
fi

if [ "$fail" = true ]; then
ls -R "$TARGET" || true
exit 1
fi

echo "PASS: rocblas and hipblaslt tensile data were both bundled"
exit 0
72 changes: 53 additions & 19 deletions scripts/build/package-gpu-libs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,50 @@ package_cuda_libs() {
echo "CUDA libraries packaged successfully"
}

# Copy a ROCm library data subdirectory (e.g. rocblas, hipblaslt) into the
# bundled lib/ dir. These directories hold the TensileLibrary_*.dat GPU kernel
# tuning files, which rocBLAS/hipBLASLt load at runtime *relative to their own
# .so*. Since backends ship their own copies of libhipblaslt.so/librocblas.so
# under lib/, the matching data dir must travel with them or the libs fall back
# to slow generic kernels (rocblaslt error: Cannot read TensileLibrary_lazy_gfx*.dat;
# see issue #10660).
#
# The ROCm search roots default to /opt/rocm{,-*} but can be overridden via the
# ROCM_BASE_DIRS env var (space-separated), which keeps the copy unit-testable
# without a real ROCm install.
# Args: $1 = data subdir name found under <rocm-root>/lib{,64}/
copy_rocm_data_dir() {
local data_name="$1"
# Single-line `local x=$(...)` on purpose: `local` masks the command
# substitution's exit status, which is 1 when nullglob is unset and would
# otherwise trip the script's `set -e`.
local old_nullglob=$(shopt -p nullglob)
shopt -s nullglob
local rocm_dirs
if [ -n "${ROCM_BASE_DIRS:-}" ]; then
# shellcheck disable=SC2206 # intentional word-split of the override
rocm_dirs=(${ROCM_BASE_DIRS})
else
rocm_dirs=(/opt/rocm /opt/rocm-*)
fi
eval "$old_nullglob"
local found=false
local rocm_base lib_subdir
for rocm_base in "${rocm_dirs[@]}"; do
for lib_subdir in lib lib64; do
if [ -d "$rocm_base/$lib_subdir/$data_name" ]; then
echo "Found $data_name data at $rocm_base/$lib_subdir/$data_name"
mkdir -p "$TARGET_LIB_DIR/$data_name"
cp -arfL "$rocm_base/$lib_subdir/$data_name/"* "$TARGET_LIB_DIR/$data_name/" || echo "WARNING: Failed to copy $data_name data from $rocm_base/$lib_subdir/$data_name"
found=true
fi
done
done
if [ "$found" = false ]; then
echo "WARNING: No $data_name library data found in ${ROCM_BASE_DIRS:-/opt/rocm*}/lib{,64}/$data_name"
fi
}

# Package AMD ROCm/HIPBlas libraries
package_rocm_libs() {
echo "Packaging ROCm/HIPBlas libraries for BUILD_TYPE=${BUILD_TYPE}..."
Expand Down Expand Up @@ -267,27 +311,16 @@ package_rocm_libs() {
fi
done

# Copy rocblas library data (tuning files, TensileLibrary, etc.)
local old_nullglob=$(shopt -p nullglob)
shopt -s nullglob
local rocm_dirs=(/opt/rocm /opt/rocm-*)
eval "$old_nullglob"
local rocblas_found=false
for rocm_base in "${rocm_dirs[@]}"; do
for lib_subdir in lib lib64; do
if [ -d "$rocm_base/$lib_subdir/rocblas" ]; then
echo "Found rocblas data at $rocm_base/$lib_subdir/rocblas"
mkdir -p "$TARGET_LIB_DIR/rocblas"
cp -arfL "$rocm_base/$lib_subdir/rocblas/"* "$TARGET_LIB_DIR/rocblas/" || echo "WARNING: Failed to copy rocblas data from $rocm_base/$lib_subdir/rocblas"
rocblas_found=true
fi
done
done
if [ "$rocblas_found" = false ]; then
echo "WARNING: No rocblas library data found in /opt/rocm*/lib{,64}/rocblas"
fi
# Copy rocBLAS and hipBLASLt kernel data (TensileLibrary_*.dat tuning files)
# so the bundled libs find their per-arch kernels at runtime instead of
# falling back to slow generic code (see copy_rocm_data_dir / issue #10660).
copy_rocm_data_dir rocblas
copy_rocm_data_dir hipblaslt

# Copy libomp from LLVM (required for ROCm)
# Single-line `local x=$(...)` on purpose: masks shopt -p's nonzero exit
# (nullglob unset) so it doesn't trip `set -e`.
local old_nullglob=$(shopt -p nullglob)
shopt -s nullglob
local omp_libs=(/opt/rocm*/lib/llvm/lib/libomp.so*)
eval "$old_nullglob"
Expand Down Expand Up @@ -477,6 +510,7 @@ export -f copy_libs_glob
export -f is_core_lib
export -f copy_elf_deps
export -f sweep_transitive_deps
export -f copy_rocm_data_dir
export -f package_cuda_libs
export -f package_rocm_libs
export -f package_intel_libs
Expand Down
Loading