commit: 6c45083d1894f23b77d576436d6058b4429dc529
Author: Patrick Lauer <patrick <AT> gentoo <DOT> org>
AuthorDate: Sun Nov 9 12:23:22 2025 +0000
Commit: Patrick Lauer <patrick <AT> gentoo <DOT> org>
CommitDate: Sun Nov 9 12:25:14 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=6c45083d
sci-libs/hipBLASLt: add 7.1.0
Signed-off-by: Patrick Lauer <patrick <AT> gentoo.org>
sci-libs/hipBLASLt/Manifest | 1 +
sci-libs/hipBLASLt/hipBLASLt-7.1.0.ebuild | 178 ++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
diff --git a/sci-libs/hipBLASLt/Manifest b/sci-libs/hipBLASLt/Manifest
index 6f0d257b8d66..37ab22126671 100644
--- a/sci-libs/hipBLASLt/Manifest
+++ b/sci-libs/hipBLASLt/Manifest
@@ -1,3 +1,4 @@
DIST hipBLASLt-6.3.3.tar.gz 89385318 BLAKE2B
b307e4c418bda7583efdf5dc646f05368c195f6df0077823ae179efb52a56ba4f13b5fce6a10ff38e0ad593bf5b415fc6e5b4132488b2dbf9da58198af15d230
SHA512
9fb523e614dd790aa3c01337f3d93f9df0a135d25e9efda2375e88818f6097d661e5159336258631cc0a25d923efcddb4b39378bf54c33d0e5a01bba387f1368
DIST hipBLASLt-6.4.3.tar.gz 188053924 BLAKE2B
5a249c2ff856fe858422a2a99e3e072afb29c5ff5ca08f4e57c3574baf85ce40611822a7b72aa7a40329e1c328bdb06cbc47f7278506a57e8e241f695b0cc940
SHA512
0edf1ef227e7c8de767a2fd3da639351d55d70921f2cc1fb4c09163cd32f3fc0b7d5ee7ba50bb62c619b7d9fdb5dc23c65a5e29cfc78937868153ad234c3966f
DIST hipBLASLt-7.0.2.tar.gz 223584109 BLAKE2B
3fb1abacf0d592a18f384042824d7b3a11c2084a01329fc8c98a6391c050ac90751791dfbccf049075d9f4bf084db38fa6fcbf3408ef93bea5d5c9e867606c55
SHA512
a999ec6a582592d9d2d7904d04df67d1e19e11eb9df1d01fb98057d990ae552c1f9cd040ac6c6decc9199ede6147de90393f0a74e735f16fceea54831d470dc7
+DIST rocm-libraries-7.1.0.tar.gz 685280964 BLAKE2B
ff19e082dc4452372b6a1c27bef15585946ae27d7ec03b0bbd598de66870a889fd4807b7baac4d74d37cfad066f41b5a3585f603fa42931d6e2563c64f245186
SHA512
2be789ada4fcba179dec1b23cdde86dcc8539e360b343f46ff8cd28cd817502244445e3b792cea48d707cc5384f605a00cb34796e569def9e7064f7b02c0e717
diff --git a/sci-libs/hipBLASLt/hipBLASLt-7.1.0.ebuild
b/sci-libs/hipBLASLt/hipBLASLt-7.1.0.ebuild
new file mode 100644
index 000000000000..19d56edf14af
--- /dev/null
+++ b/sci-libs/hipBLASLt/hipBLASLt-7.1.0.ebuild
@@ -0,0 +1,178 @@
+# Copyright 1999-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+ROCM_SKIP_GLOBALS=1
+PYTHON_COMPAT=( python3_{10..14} )
+
+LLVM_COMPAT=( 20 )
+
+inherit cmake flag-o-matic multiprocessing llvm-r1 python-any-r1 rocm
+DESCRIPTION="General matrix-matrix operations library for AMD Instinct
accelerators"
+HOMEPAGE="https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt"
+SRC_URI="https://github.com/ROCm/rocm-libraries/archive/refs/tags/rocm-${PV}.tar.gz
-> rocm-libraries-${PV}.tar.gz"
+
+S="${WORKDIR}/rocm-libraries-rocm-${PV}/projects/hipblaslt/"
+
+LICENSE="MIT"
+SLOT="0/$(ver_cut 1-2)"
+KEYWORDS="~amd64"
+
+SUPPORTED_GPUS=( gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1100 gfx1101
gfx1103 gfx1150 gfx1151 gfx1200 gfx1201 )
+IUSE_TARGETS=( "${SUPPORTED_GPUS[@]/#/amdgpu_targets_}" )
+IUSE="${IUSE_TARGETS[*]/#/+} benchmark roctracer test"
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+ dev-util/hip:${SLOT}
+ sci-libs/blis
+ virtual/blas
+ roctracer? ( dev-util/roctracer:${SLOT} )
+ benchmark? (
+ dev-util/rocm-smi:${SLOT}
+ sci-libs/flexiblas
+ )
+"
+
+DEPEND="
+ ${RDEPEND}
+ dev-cpp/msgpack-cxx
+ sci-libs/hipBLAS-common:${SLOT}
+ llvm-runtimes/openmp
+"
+
+BDEPEND="
+ ${PYTHON_DEPS}
+ dev-build/rocm-cmake:${SLOT}
+ dev-util/hipcc:${SLOT}
+ $(python_gen_any_dep "
+ dev-python/msgpack[\${PYTHON_USEDEP}]
+ dev-python/pyyaml[\${PYTHON_USEDEP}]
+ dev-python/joblib[\${PYTHON_USEDEP}]
+ dev-python/nanobind[\${PYTHON_USEDEP}]
+ dev-python/setuptools[\${PYTHON_USEDEP}]
+ ")
+ $(llvm_gen_dep "llvm-core/clang:\${LLVM_SLOT}")
+ test? (
+ dev-cpp/gtest
+ sci-libs/flexiblas
+ dev-util/rocm-smi:${SLOT}
+ )
+"
+
+PATCHES=(
+ "${FILESDIR}"/${PN}-7.1.0-no-git.patch
+ "${FILESDIR}"/hipBLASLt-7.1.0-rocisa-nanobind.patch
+)
+
+python_check_deps() {
+ python_has_version "dev-python/msgpack[${PYTHON_USEDEP}]" &&
+ python_has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" &&
+ python_has_version "dev-python/joblib[${PYTHON_USEDEP}]" &&
+ python_has_version "dev-python/nanobind[${PYTHON_USEDEP}]" &&
+ python_has_version "dev-python/setuptools[${PYTHON_USEDEP}]"
+}
+
+pkg_setup() {
+ QA_FLAGS_IGNORED="usr/$(get_libdir)/hipblaslt/library/.*"
+ python-any-r1_pkg_setup
+}
+
+pkg_pretend() {
+ if [[ "${AMDGPU_TARGETS[*]}" = "" ]]; then
+ ewarn "hipBLASLt supports only some GPUs: ${SUPPORTED_GPUS[*]},"
+ ewarn "but none of them were defined in AMDGPU_TARGETS
USE_EXPAND variable."
+ ewarn
+ ewarn "Library will continue to be built in \"dummy\" mode,"
+ ewarn "serving as a non-functional placeholder for end-user
applications."
+ fi
+}
+
+src_prepare() {
+ local shebangs=($(grep -rl "#!/usr/bin/env python3" tensilelite/Tensile
|| die))
+ python_fix_shebang -q "${shebangs[@]}"
+
+ rocm_use_clang
+
+ sed -e
"s:\$(ROCM_PATH)/bin/amdclang++:$(get_llvm_prefix)/bin/clang++:g" \
+ -i tensilelite/Makefile || die
+
+ # Fix compiler validation (just a validation)
+ sed "s/amdclang/$(basename "$CC")/g" \
+ -i tensilelite/Tensile/Toolchain/Validators.py \
+ -i
tensilelite/Tensile/Tests/unit/test_MatrixInstructionConversion.py || die
+
+ #
https://github.com/ROCm/rocm-libraries/commit/48c5e89fd90caff65e62e6a9bcf082d10d8877eb
+ sed -e 's:if(NOT ROCM_FOUND):if(NOT ROCmCMakeBuildTools_FOUND):' \
+ -i cmake/dependencies.cmake || die
+
+ cmake_src_prepare
+}
+
+src_configure() {
+ rocm_use_clang
+
+ # too many warnings
+ append-cxxflags -Wno-explicit-specialization-storage-class
+
+ local targets="$(get_amdgpu_flags)"
+ local Tensile_SKIP_BUILD=$([ "${AMDGPU_TARGETS[*]}" = "" ] && echo ON
|| echo OFF )
+ local HIPBLASLT_ENABLE_DEVICE=$([ "${AMDGPU_TARGETS[*]}" != "" ] &&
echo ON || echo OFF )
+
+ # targets has a trailing semicolon, this trips up Tensile's input
parser, so carefully prune
+ # Tensile guesses weirdly how to compile things, ld.bfd won't work, so
force lld
+
+ local mycmakeargs=(
+ -DGPU_TARGETS="${targets::-1}"
+ -DCMAKE_CXX_FLAGS="-fuse-ld=lld"
+ -DBLA_PKGCONFIG_BLAS=ON
+ -DBLA_VENDOR=FlexiBLAS
+ -DBUILD_CLIENTS_BENCHMARKS="$(usex benchmark ON OFF)"
+ -DBUILD_CLIENTS_TESTS=$(usex test ON OFF)
+ -DHIPBLASLT_ENABLE_DEVICE=${HIPBLASLT_ENABLE_DEVICE}
+ -DHIPBLASLT_ENABLE_MARKER="$(usex roctracer ON OFF)"
+ -DHIPBLASLT_ENABLE_ROCROLLER=OFF
+ -DHIPBLASLT_ENABLE_FETCH=OFF
+ -DHIPBLASLT_BUNDLE_PYTHON_DEPS=ON
+ -Dnanobind_DIR="$(python_get_sitedir)/nanobind/cmake"
+ -DPython_EXECUTABLE="${PYTHON}"
+ -DROCM_SYMLINK_LIBS=OFF
+ -DTensile_COMPILER=${CXX}
+ -DTensile_CPU_THREADS=$(makeopts_jobs)
+ -DTensile_SKIP_BUILD=${Tensile_SKIP_BUILD}
+ -DBLIS_LIB="/usr/$(get_libdir)/libblis.so"
+ -DBLIS_INCLUDE_DIR="/usr/include/blis"
+ -DBLAS_LIBRARIES="/usr/$(get_libdir)"
+ -DHIPBLASLT_BUILD_TESTING="$(usex test ON OFF)"
+ -Wno-dev
+ )
+
+ cmake_src_configure
+}
+
+src_compile() {
+ local -x ROCM_PATH="${EPREFIX}/usr"
+ # set PYTHONPATH to load Tensile from virtualenv, not the system-wide
one
+ local -x PYTHONPATH="${S}_build/virtualenv/lib/${EPYTHON}/site-packages"
+ local -x TENSILE_ROCM_ASSEMBLER_PATH="$(get_llvm_prefix)/bin/clang++"
+ # TensileCreateLibrary reads CMAKE_CXX_COMPILER again
+ local -x CMAKE_CXX_COMPILER="$(get_llvm_prefix)/bin/clang++"
+ cmake_src_compile
+}
+
+src_install() {
+ cmake_src_install
+
+ # Stop llvm-strip from removing .strtab section from *.hsaco files,
+ # otherwise rocclr/elf/elf.cpp complains with "failed: null
sections(STRTAB)" and crashes
+ dostrip -x /usr/$(get_libdir)/hipblaslt/library/
+}
+
+src_test() {
+ check_amdgpu
+
+ # Expected time for 7900 XTX: 340s (full) or 5s with
GTEST_FILTER='*quick*'
+ # Fails in `MatrixTransformTest.MultipleDevices` in dGPU+iGPU
combination
+ HIP_VISIBLE_DEVICES=0 cmake_src_test
+}