commit: a31f223c0219f3ad25b2e6d5320e04925821338a
Author: Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Sun Dec 8 14:23:49 2024 +0000
Commit: Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Jan 5 21:46:31 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=a31f223c
sci-libs/hipBLASLt: add 6.3.0
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Signed-off-by: Sam James <sam <AT> gentoo.org>
sci-libs/hipBLASLt/Manifest | 1 +
.../files/hipBLASLt-6.3.0-min-pip-install.patch | 23 ++++++++++++++++
.../files/hipBLASLt-6.3.0-no-arch-extra.patch | 16 +++++++++++
sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild | 2 +-
...ASLt-6.1.1-r1.ebuild => hipBLASLt-6.3.0.ebuild} | 32 +++++++++++++++-------
5 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/sci-libs/hipBLASLt/Manifest b/sci-libs/hipBLASLt/Manifest
index 83717bb68f79..3287f1b48d5e 100644
--- a/sci-libs/hipBLASLt/Manifest
+++ b/sci-libs/hipBLASLt/Manifest
@@ -1 +1,2 @@
DIST hipBLASLt-6.1.1.tar.gz 78448557 BLAKE2B
08ac9ce45d7d1e4384a36939af22f65589ed87e25b4db77f5f7ff5fbe65b8070e9c55fbb09db3b7bd49be98981dc8bf939e646ea27571bf1d1ccfa52a50b0f92
SHA512
1aa3a1cb8e9c7f653db66909b38be065b7386b97d4bd6b52ecc6c2bf72c6cdfabde6f12ebc6016b5fa195f979359af875eda8d54ea6772329312cb357bc2ebc5
+DIST hipBLASLt-6.3.0.tar.gz 89379233 BLAKE2B
0f63b1f0fb4686b58b102ad2d2b9baa272b07d203736c3972216cbd75eda38ffaf63939d8c25aadb416348a7b1c9b8bac27a3711121d5e584baa7a404babdd2b
SHA512
b86613f818a1819668b3fce69422add97d36b09506d1109e3b739a6f86b8463aef9c8ef2434dd603a956f34e76e3f8fb4ddb2b98d5d2e4028f6f44b62aa6a48b
diff --git a/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-min-pip-install.patch
b/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-min-pip-install.patch
new file mode 100644
index 000000000000..d2d9d32020b6
--- /dev/null
+++ b/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-min-pip-install.patch
@@ -0,0 +1,23 @@
+Remove unused libraries and updates from pypi.org
+--- a/cmake/virtualenv.cmake
++++ b/cmake/virtualenv.cmake
+@@ -24,10 +24,6 @@ endfunction()
+
+ function(virtualenv_install)
+ virtualenv_create()
+- execute_process(
+- COMMAND ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} -m pip
install --upgrade pip
+- COMMAND ${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} -m pip
install --upgrade setuptools
+- )
+ message("${VIRTUALENV_BIN_DIR}/${VIRTUALENV_PYTHON_EXENAME} -m pip
install ${ARGN}")
+ execute_process(
+ RESULT_VARIABLE rc
+--- a/tensilelite/requirements.txt
++++ b/tensilelite/requirements.txt
+@@ -3,6 +3,3 @@ pyyaml
+ msgpack
+ joblib>=1.4.0; python_version >= '3.8'
+ joblib>=1.1.1; python_version < '3.8'
+-simplejson
+-ujson
+-orjson
diff --git a/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch
b/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch
new file mode 100644
index 000000000000..785ca9008007
--- /dev/null
+++ b/sci-libs/hipBLASLt/files/hipBLASLt-6.3.0-no-arch-extra.patch
@@ -0,0 +1,16 @@
+--- a/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake
++++ b/library/src/amd_detail/rocblaslt/src/kernels/CompileSourceKernel.cmake
+@@ -25,6 +25,13 @@ function(CompileSourceKernel source archs buildIdKind
outputFolder)
+ message("Setup source kernel targets")
+ string(REGEX MATCHALL "gfx[a-z0-9]+" archs "${archs}")
+ list(REMOVE_DUPLICATES archs)
++
++ list(LENGTH archs archs_length)
++ if(archs_length EQUAL 0)
++ message("No architectures specified.")
++ return()
++ endif()
++
+ list(JOIN archs "," archs)
+ message("archs for source kernel compilation: ${archs}")
+ add_custom_target(MatrixTransformKernels ALL
diff --git a/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
b/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
index 021ed71fd7cd..b0aed5de521f 100644
--- a/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
+++ b/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
@@ -25,7 +25,7 @@ IUSE="${IUSE_TARGETS[@]/#/+} test"
RESTRICT="!test? ( test )"
RDEPEND="
- dev-util/hip
+ dev-util/hip:${SLOT}
dev-cpp/msgpack-cxx
sci-libs/hipBLAS:${SLOT}
"
diff --git a/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
b/sci-libs/hipBLASLt/hipBLASLt-6.3.0.ebuild
similarity index 78%
copy from sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
copy to sci-libs/hipBLASLt/hipBLASLt-6.3.0.ebuild
index 021ed71fd7cd..3a2f44a97a64 100644
--- a/sci-libs/hipBLASLt/hipBLASLt-6.1.1-r1.ebuild
+++ b/sci-libs/hipBLASLt/hipBLASLt-6.3.0.ebuild
@@ -1,4 +1,4 @@
-# Copyright 1999-2024 Gentoo Authors
+# Copyright 1999-2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
@@ -6,10 +6,9 @@ EAPI=8
ROCM_SKIP_GLOBALS=1
PYTHON_COMPAT=( python3_{10..13} )
-# gfx941 and gfx942 assembly uses directives of LLVM >= 18.1.0
-LLVM_COMPAT=( 18 )
+LLVM_COMPAT=( 19 )
-inherit cmake python-any-r1 llvm-r1 prefix rocm
+inherit cmake flag-o-matic llvm-r1 prefix python-any-r1 rocm
DESCRIPTION="General matrix-matrix operations library for AMD Instinct
accelerators"
HOMEPAGE="https://github.com/ROCm/hipBLASLt"
SRC_URI="https://github.com/ROCm/hipBLASLt/archive/rocm-${PV}.tar.gz ->
${P}.tar.gz"
@@ -19,20 +18,20 @@ LICENSE="MIT"
SLOT="0/$(ver_cut 1-2)"
KEYWORDS="~amd64"
-SUPPORTED_GPUS=( gfx90a gfx940 gfx941 gfx942 )
+SUPPORTED_GPUS=( gfx908 gfx90a gfx940 gfx941 gfx942 gfx1100 gfx1101 )
IUSE_TARGETS=( "${SUPPORTED_GPUS[@]/#/amdgpu_targets_}" )
IUSE="${IUSE_TARGETS[@]/#/+} test"
RESTRICT="!test? ( test )"
RDEPEND="
- dev-util/hip
+ dev-util/hip:${SLOT}
dev-cpp/msgpack-cxx
- sci-libs/hipBLAS:${SLOT}
"
DEPEND="${RDEPEND}"
BDEPEND="
dev-build/rocm-cmake
+ sci-libs/hipBLAS-common:${SLOT}
$(python_gen_any_dep '
dev-python/msgpack[${PYTHON_USEDEP}]
dev-python/pyyaml[${PYTHON_USEDEP}]
@@ -41,6 +40,8 @@ BDEPEND="
$(llvm_gen_dep 'llvm-core/clang:${LLVM_SLOT}')
test? (
dev-cpp/gtest
+ virtual/blas
+ dev-util/rocm-smi:${SLOT}
)
"
@@ -50,6 +51,8 @@ PATCHES=(
"${FILESDIR}"/${PN}-6.1.1-no-git.patch
"${FILESDIR}"/${PN}-6.1.1-clang-19.patch
"${FILESDIR}"/${PN}-6.1.1-fix-libcxx.patch
+ "${FILESDIR}"/${PN}-6.3.0-no-arch-extra.patch
+ "${FILESDIR}"/${PN}-6.3.0-min-pip-install.patch
)
python_check_deps() {
@@ -80,12 +83,18 @@ src_prepare() {
local shebangs=($(grep -rl "#!/usr/bin/env python3" tensilelite/Tensile
|| die))
python_fix_shebang -q ${shebangs[*]}
+ sed -e "s:\${rocm_path}/bin/amdclang++:$(get_llvm_prefix)/bin/clang++:"
\
+ -i
library/src/amd_detail/rocblaslt/src/kernels/compile_code_object.sh || die
+
cmake_src_prepare
}
src_configure() {
rocm_use_hipcc
+ # too many warnings
+ append-cxxflags -Wno-explicit-specialization-storage-class
+
local targets="$(get_amdgpu_flags)"
local build_with_tensile=$([ "${AMDGPU_TARGETS[@]}" = "" ] && echo OFF
|| echo ON )
@@ -94,10 +103,9 @@ src_configure() {
-DBUILD_WITH_TENSILE="${build_with_tensile}"
-DAMDGPU_TARGETS="${targets}"
-DBUILD_CLIENTS_TESTS=$(usex test ON OFF)
+ -Wno-dev
)
- use test && mycmakeargs+=( -DBUILD_FORTRAN_CLIENTS=ON )
-
cmake_src_configure
}
@@ -106,6 +114,8 @@ src_compile() {
# set PYTHONPATH to load Tensile from virtualenv, not the system-wide
one
local -x PYTHONPATH="${S}_build/virtualenv/lib/${EPYTHON}/site-packages"
local -x TENSILE_ROCM_ASSEMBLER_PATH="$(get_llvm_prefix)/bin/clang++"
+ # TensileCreateLibrary reads CMAKE_CXX_COMPILER again
+ local -x CMAKE_CXX_COMPILER="$(get_llvm_prefix)/bin/clang++"
cmake_src_compile
}
@@ -119,5 +129,7 @@ src_install() {
src_test() {
check_amdgpu
- cmake_src_test -j1
+
+ # Expected time for 7900 XTX: 340s (full) or 5s with
GTEST_FILTER='*quick*'
+ cmake_src_test
}