commit:     72b1628287806cf7505d20e969de66a401e328b5
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Sun Oct 12 07:27:49 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue Oct 21 18:47:32 2025 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=72b16282

sci-libs/hipBLASLt: add 7.0.2

Bug: https://bugs.gentoo.org/964799

Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Part-of: https://github.com/gentoo/gentoo/pull/44165
Signed-off-by: Sam James <sam <AT> gentoo.org>

 sci-libs/hipBLASLt/Manifest                        |   1 +
 .../hipBLASLt/files/hipBLASLt-7.0.1-no-arch.patch  |  26 ++++
 .../files/hipBLASLt-7.0.1-system-nanobind.patch    |  17 ++
 sci-libs/hipBLASLt/hipBLASLt-7.0.2.ebuild          | 171 +++++++++++++++++++++
 4 files changed, 215 insertions(+)

diff --git a/sci-libs/hipBLASLt/Manifest b/sci-libs/hipBLASLt/Manifest
index 201f510eab5d..6f0d257b8d66 100644
--- a/sci-libs/hipBLASLt/Manifest
+++ b/sci-libs/hipBLASLt/Manifest
@@ -1,2 +1,3 @@
 DIST hipBLASLt-6.3.3.tar.gz 89385318 BLAKE2B 
b307e4c418bda7583efdf5dc646f05368c195f6df0077823ae179efb52a56ba4f13b5fce6a10ff38e0ad593bf5b415fc6e5b4132488b2dbf9da58198af15d230
 SHA512 
9fb523e614dd790aa3c01337f3d93f9df0a135d25e9efda2375e88818f6097d661e5159336258631cc0a25d923efcddb4b39378bf54c33d0e5a01bba387f1368
 DIST hipBLASLt-6.4.3.tar.gz 188053924 BLAKE2B 
5a249c2ff856fe858422a2a99e3e072afb29c5ff5ca08f4e57c3574baf85ce40611822a7b72aa7a40329e1c328bdb06cbc47f7278506a57e8e241f695b0cc940
 SHA512 
0edf1ef227e7c8de767a2fd3da639351d55d70921f2cc1fb4c09163cd32f3fc0b7d5ee7ba50bb62c619b7d9fdb5dc23c65a5e29cfc78937868153ad234c3966f
+DIST hipBLASLt-7.0.2.tar.gz 223584109 BLAKE2B 
3fb1abacf0d592a18f384042824d7b3a11c2084a01329fc8c98a6391c050ac90751791dfbccf049075d9f4bf084db38fa6fcbf3408ef93bea5d5c9e867606c55
 SHA512 
a999ec6a582592d9d2d7904d04df67d1e19e11eb9df1d01fb98057d990ae552c1f9cd040ac6c6decc9199ede6147de90393f0a74e735f16fceea54831d470dc7

diff --git a/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-no-arch.patch 
b/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-no-arch.patch
new file mode 100644
index 000000000000..9f95988db934
--- /dev/null
+++ b/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-no-arch.patch
@@ -0,0 +1,26 @@
+When Tensile_SKIP_BUILD is enabled, fixes error:
+Cannot add target-level dependencies to non-existent target 
"TENSILE_LIBRARY_TARGET".
+--- a/library/src/amd_detail/rocblaslt/src/CMakeLists.txt
++++ b/library/src/amd_detail/rocblaslt/src/CMakeLists.txt
+@@ -93,9 +93,8 @@
+     )
+   endif()
+ 
+-  add_dependencies(TENSILE_LIBRARY_TARGET rocisa)
+-
+   if(NOT Tensile_SKIP_BUILD)
++    add_dependencies(TENSILE_LIBRARY_TARGET rocisa)
+     add_subdirectory(extops)
+   endif()
+ 
+--- a/library/CMakeLists.txt
++++ b/library/CMakeLists.txt
+@@ -237,7 +237,7 @@ rocm_install(
+     PATTERN "*.hpp"
+ )
+ 
+-if ( NOT BUILD_CUDA )
++if ( NOT BUILD_CUDA AND HIPBLASLT_ENABLE_DEVICE )
+     if (WIN32)
+       set( HIPBLASLT_TENSILE_LIBRARY_DIR 
"\${CPACK_PACKAGING_INSTALL_PREFIX}hipblaslt/bin" CACHE PATH "path to tensile 
library" )
+     else()

diff --git a/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-system-nanobind.patch 
b/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-system-nanobind.patch
new file mode 100644
index 000000000000..e7cd6b564898
--- /dev/null
+++ b/sci-libs/hipBLASLt/files/hipBLASLt-7.0.1-system-nanobind.patch
@@ -0,0 +1,17 @@
+--- a/tensilelite/rocisa/CMakeLists.txt
++++ b/tensilelite/rocisa/CMakeLists.txt
+@@ -38,13 +38,7 @@ else()
+     message(FATAL_ERROR "Python version 3.8 or higher is required, but found 
version ${Python_VERSION}")
+ endif()
+ 
+-include(FetchContent)
+-FetchContent_Declare(
+-  nanobind
+-  GIT_REPOSITORY https://github.com/wjakob/nanobind.git
+-  GIT_TAG        9b3afa9dbdc23641daf26fadef7743e7127ff92f # v2.6.1
+-)
+-FetchContent_MakeAvailable(nanobind)
++find_package(nanobind CONFIG REQUIRED)
+ 
+ set(ROCISAINST_SOURCE 
${CMAKE_CURRENT_SOURCE_DIR}/rocisa/src/instruction/instruction.cpp
+                       
${CMAKE_CURRENT_SOURCE_DIR}/rocisa/src/instruction/common.cpp

diff --git a/sci-libs/hipBLASLt/hipBLASLt-7.0.2.ebuild 
b/sci-libs/hipBLASLt/hipBLASLt-7.0.2.ebuild
new file mode 100644
index 000000000000..dc4f49547e09
--- /dev/null
+++ b/sci-libs/hipBLASLt/hipBLASLt-7.0.2.ebuild
@@ -0,0 +1,171 @@
+# Copyright 1999-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+ROCM_SKIP_GLOBALS=1
+PYTHON_COMPAT=( python3_{10..14} )
+
+LLVM_COMPAT=( 20 )
+
+inherit cmake flag-o-matic multiprocessing llvm-r1 python-any-r1 rocm
+DESCRIPTION="General matrix-matrix operations library for AMD Instinct 
accelerators"
+HOMEPAGE="https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt";
+SRC_URI="https://github.com/ROCm/hipBLASLt/archive/rocm-${PV}.tar.gz -> 
${P}.tar.gz"
+S="${WORKDIR}/hipBLASLt-rocm-${PV}"
+
+LICENSE="MIT"
+SLOT="0/$(ver_cut 1-2)"
+KEYWORDS="~amd64"
+
+SUPPORTED_GPUS=( gfx908 gfx90a gfx940 gfx941 gfx942 gfx950 gfx1100 gfx1101 
gfx1103 gfx1150 gfx1151 gfx1200 gfx1201 )
+IUSE_TARGETS=( "${SUPPORTED_GPUS[@]/#/amdgpu_targets_}" )
+IUSE="${IUSE_TARGETS[*]/#/+} benchmark roctracer test"
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+       dev-util/hip:${SLOT}
+       roctracer? ( dev-util/roctracer:${SLOT} )
+       benchmark? (
+               dev-util/rocm-smi:${SLOT}
+               sci-libs/lapack
+               sci-libs/openblas
+       )
+"
+
+DEPEND="
+       ${RDEPEND}
+       dev-cpp/msgpack-cxx
+       sci-libs/hipBLAS-common:${SLOT}
+"
+BDEPEND="
+       ${PYTHON_DEPS}
+       dev-build/rocm-cmake:${SLOT}
+       dev-util/hipcc:${SLOT}
+       $(python_gen_any_dep "
+               dev-python/msgpack[\${PYTHON_USEDEP}]
+               dev-python/pyyaml[\${PYTHON_USEDEP}]
+               dev-python/joblib[\${PYTHON_USEDEP}]
+               dev-python/nanobind[\${PYTHON_USEDEP}]
+       ")
+       $(llvm_gen_dep "llvm-core/clang:\${LLVM_SLOT}")
+       test? (
+               dev-cpp/gtest
+               virtual/blas
+               dev-util/rocm-smi:${SLOT}
+       )
+       benchmark? (
+               virtual/blas
+               llvm-runtimes/openmp
+       )
+"
+
+PATCHES=(
+       "${FILESDIR}"/${PN}-6.1.1-fix-msgpack-dependency.patch
+       "${FILESDIR}"/${PN}-6.1.1-no-git.patch
+       "${FILESDIR}"/${PN}-6.1.1-clang-19.patch
+       "${FILESDIR}"/${PN}-6.4.1-min-pip-install.patch
+       "${FILESDIR}"/${PN}-6.4.1-headers.patch
+       "${FILESDIR}"/${PN}-6.4.1-gentoopath.patch
+       "${FILESDIR}"/${PN}-7.0.1-system-nanobind.patch
+       "${FILESDIR}"/${PN}-7.0.1-no-arch.patch
+)
+
+python_check_deps() {
+       python_has_version "dev-python/msgpack[${PYTHON_USEDEP}]" &&
+       python_has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" &&
+       python_has_version "dev-python/joblib[${PYTHON_USEDEP}]" &&
+       python_has_version "dev-python/nanobind[${PYTHON_USEDEP}]"
+}
+
+pkg_setup() {
+       python-any-r1_pkg_setup
+}
+
+pkg_pretend() {
+       if [[ "${AMDGPU_TARGETS[*]}" = "" ]]; then
+               ewarn "hipBLASLt supports only some GPUs: ${SUPPORTED_GPUS[*]},"
+               ewarn "but none of them were defined in AMDGPU_TARGETS 
USE_EXPAND variable."
+               ewarn
+               ewarn "Library will continue to be built in \"dummy\" mode,"
+               ewarn "serving as a non-functional placeholder for end-user 
applications."
+       fi
+}
+
+src_prepare() {
+       local shebangs=($(grep -rl "#!/usr/bin/env python3" tensilelite/Tensile 
|| die))
+       python_fix_shebang -q "${shebangs[@]}"
+
+       rocm_use_clang
+
+       sed -e 
"s:\$(ROCM_PATH)/bin/amdclang++:$(get_llvm_prefix)/bin/clang++:g" \
+               -i tensilelite/Makefile || die
+
+       # Fix compiler validation (just a validation)
+       sed "s/amdclang/$(basename "$CC")/g" \
+               -i tensilelite/Tensile/Toolchain/Validators.py \
+               -i 
tensilelite/Tensile/Tests/unit/test_MatrixInstructionConversion.py || die
+
+       # sed -e "s:rocm_path + 
\"/bin/amdclang++\":$(get_llvm_prefix)/bin/clang++:" \
+       #       -i tensilelite/rocisa/test/test_base.py \
+       #       -i tensilelite/rocisa/test/test_container.py || die
+
+       # 
https://github.com/ROCm/rocm-libraries/commit/48c5e89fd90caff65e62e6a9bcf082d10d8877eb
+       sed -e 's:if(NOT ROCM_FOUND):if(NOT ROCmCMakeBuildTools_FOUND):' \
+               -i cmake/Dependencies.cmake || die
+
+       cmake_src_prepare
+}
+
+src_configure() {
+       rocm_use_clang
+
+       # too many warnings
+       append-cxxflags -Wno-explicit-specialization-storage-class
+
+       local targets="$(get_amdgpu_flags)"
+       local Tensile_SKIP_BUILD=$([ "${AMDGPU_TARGETS[*]}" = "" ] && echo ON 
|| echo OFF )
+
+       local mycmakeargs=(
+               -DROCM_SYMLINK_LIBS=OFF
+               -DTensile_SKIP_BUILD=${Tensile_SKIP_BUILD}
+               -DTensile_COMPILER=${CXX}
+               -DAMDGPU_TARGETS="${targets}"
+               -DBUILD_CLIENTS_TESTS=$(usex test ON OFF)
+               -DBUILD_CLIENTS_BENCHMARKS="$(usex benchmark ON OFF)"
+               -DPython_EXECUTABLE="${PYTHON}"
+               -DHIPBLASLT_ENABLE_MARKER="$(usex roctracer ON OFF)"\
+               -DHIPBLASLT_USE_ROCROLLER=OFF
+               -DTensile_CPU_THREADS=$(makeopts_jobs)
+               -Dnanobind_DIR="$(python_get_sitedir)/nanobind/cmake"
+               -Wno-dev
+       )
+
+       cmake_src_configure
+}
+
+src_compile() {
+       local -x ROCM_PATH="${EPREFIX}/usr"
+       # set PYTHONPATH to load Tensile from virtualenv, not the system-wide 
one
+       local -x PYTHONPATH="${S}_build/virtualenv/lib/${EPYTHON}/site-packages"
+       local -x TENSILE_ROCM_ASSEMBLER_PATH="$(get_llvm_prefix)/bin/clang++"
+       # TensileCreateLibrary reads CMAKE_CXX_COMPILER again
+       local -x CMAKE_CXX_COMPILER="$(get_llvm_prefix)/bin/clang++"
+       cmake_src_compile
+}
+
+src_install() {
+       cmake_src_install
+
+       # Stop llvm-strip from removing .strtab section from *.hsaco files,
+       # otherwise rocclr/elf/elf.cpp complains with "failed: null 
sections(STRTAB)" and crashes
+       dostrip -x /usr/$(get_libdir)/hipblaslt/library/
+}
+
+src_test() {
+       check_amdgpu
+
+       # Expected time for 7900 XTX: 340s (full) or 5s with 
GTEST_FILTER='*quick*'
+       # Fails in `MatrixTransformTest.MultipleDevices` in dGPU+iGPU 
combination
+       HIP_VISIBLE_DEVICES=0 cmake_src_test
+}

Reply via email to