commit:     2a8d386c87c9df5554e2a54d2da18c1b66c66af8
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Wed Oct 18 18:57:57 2023 +0000
Commit:     Benda XU <heroxbd <AT> gentoo <DOT> org>
CommitDate: Mon Nov 27 14:34:52 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=2a8d386c

sci-libs/rocBLAS: add 5.7.1

Closes: https://bugs.gentoo.org/911816
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Signed-off-by: Benda Xu <heroxbd <AT> gentoo.org>

 sci-libs/rocBLAS/Manifest                          |   1 +
 .../rocBLAS-5.7.1-expand-isa-compatibility.patch   | 132 +++++++++++++++++++++
 sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild           | 109 +++++++++++++++++
 3 files changed, 242 insertions(+)

diff --git a/sci-libs/rocBLAS/Manifest b/sci-libs/rocBLAS/Manifest
index e067ceef0a6c..f17d3a47c686 100644
--- a/sci-libs/rocBLAS/Manifest
+++ b/sci-libs/rocBLAS/Manifest
@@ -2,3 +2,4 @@ DIST rocBLAS-5.0.2-Tensile-asm_full-navi22.tar.gz 1110323 
BLAKE2B 80fab97c6f89fc
 DIST rocBLAS-5.4.2-Tensile-asm_full-navi22.tar.gz 1057686 BLAKE2B 
aa755595bbcd48775713f1d94c18b9bfa6464cae97653dd5beda9e4ed600802b62c4159e6edef7fbd4c92e629bbda1c284e6c48e096c7eaf22bbaad86bdcdace
 SHA512 
cdc928528e7ce5bcc3ae32ec18e3aee48533f24e77c1e42f6e0398858eefebd52b8812caabe01a0ef7fa552e00118b862e025f1675a9642b009f1e38efbcdd53
 DIST rocm-rocBLAS-5.1.3.tar.gz 13741546 BLAKE2B 
af10d8cb69bee44ef5aab2dc350e1d9f3b6fddb3a840c17230c4b55b2649a4724c9f2e58c1eafdb3e2716eea016a72a35c3ddb498881b1ce682d780baa8d8b07
 SHA512 
5ab71838fdd0e9c5848cbf28a19d113353b619a878d8c7d05f64feb32faae2054169c95ed6e9dd6b05a2e807b57229dd2c361c4d289b6e6f17c196558640890f
 DIST rocm-rocBLAS-5.4.2.tar.gz 15938434 BLAKE2B 
14ebafa944fdac443800bb7f9b16f8ecf0f420b168d3c6534f68ad7d14bf058a4cc1673fce8f4b9be53e4a6c1cf05011e01853cd901bce0b59827d2aca4d029f
 SHA512 
e62bb80457c1e89454885499bdce9d60beecd706806724418983c78c65c2ae303550f9670b5a6e71dae6a61c0e42b223ab01b36b8406430731ebcbff54c4a8f4
+DIST rocm-rocBLAS-5.7.1.tar.gz 55389700 BLAKE2B 
9e58b1d29c8a04aa58ce17fdd9b0c2504934261851619dc5b696a04e8c6a79e7c44d86a6db7a9c76a434d76593de8c191cba6f163cf398d01cfc001e5fa32b5a
 SHA512 
cfc73f6c490595a3c153c2ac2a3cb5926c48c2c19baa07ff3dcdf3a7ccaed82cfdff64ed5a3be64c349cb43d654d4b104d9090e2ec9f2061049d6a4921c07722

diff --git 
a/sci-libs/rocBLAS/files/rocBLAS-5.7.1-expand-isa-compatibility.patch 
b/sci-libs/rocBLAS/files/rocBLAS-5.7.1-expand-isa-compatibility.patch
new file mode 100644
index 000000000000..c5c5d4750393
--- /dev/null
+++ b/sci-libs/rocBLAS/files/rocBLAS-5.7.1-expand-isa-compatibility.patch
@@ -0,0 +1,132 @@
+Allow rocBLAS to load the compatible kernels when running on
+architectures compatible with those ISAs.
+
+Based on patch from Cordell Bloor <[email protected]>
+https://salsa.debian.org/rocm-team/rocblas/-/blob/master/debian/patches/0012-expand-isa-compatibility.patch
+
+--- a/library/src/handle.cpp
++++ b/library/src/handle.cpp
+@@ -21,6 +21,7 @@
+  * ************************************************************************ */
+ #include "handle.hpp"
+ #include <cstdarg>
++#include <cstring>
+ #include <limits>
+ #ifdef WIN32
+ #include <windows.h>
+@@ -77,6 +78,31 @@ static inline int getActiveArch(int deviceId)
+ {
+     hipDeviceProp_t deviceProperties;
+     hipGetDeviceProperties(&deviceProperties, deviceId);
++    // coerce to compatible arch
++    switch(deviceProperties.gcnArch)
++    {
++    case 902:
++    case 909:
++    case 912:
++        deviceProperties.gcnArch = 900;
++        std::strcpy(deviceProperties.gcnArchName, "gfx900");
++        break;
++    case 1011:
++    case 1012:
++    case 1013:
++        deviceProperties.gcnArch = 1010;
++        std::strcpy(deviceProperties.gcnArchName, "gfx1010");
++        break;
++    case 1031:
++    case 1032:
++    case 1033:
++    case 1034:
++    case 1035:
++    case 1036:
++        deviceProperties.gcnArch = 1030;
++        std::strcpy(deviceProperties.gcnArchName, "gfx1030");
++        break;
++    }
+     return deviceProperties.gcnArch;
+ }
+ 
+--- a/library/src/rocblas_auxiliary.cpp
++++ b/library/src/rocblas_auxiliary.cpp
+@@ -24,6 +24,7 @@
+ #include "logging.hpp"
+ #include "rocblas-auxiliary.h"
+ #include <cctype>
++#include <cstring>
+ #include <cstdlib>
+ #include <memory>
+ #include <string>
+@@ -1285,6 +1286,31 @@ std::string rocblas_internal_get_arch_name()
+     hipGetDevice(&deviceId);
+     hipDeviceProp_t deviceProperties;
+     hipGetDeviceProperties(&deviceProperties, deviceId);
++    // coerce to compatible arch
++    switch(deviceProperties.gcnArch)
++    {
++    case 902:
++    case 909:
++    case 912:
++        deviceProperties.gcnArch = 900;
++        std::strcpy(deviceProperties.gcnArchName, "gfx900");
++        break;
++    case 1011:
++    case 1012:
++    case 1013:
++        deviceProperties.gcnArch = 1010;
++        std::strcpy(deviceProperties.gcnArchName, "gfx1010");
++        break;
++    case 1031:
++    case 1032:
++    case 1033:
++    case 1034:
++    case 1035:
++    case 1036:
++        deviceProperties.gcnArch = 1030;
++        std::strcpy(deviceProperties.gcnArchName, "gfx1030");
++        break;
++    }
+     return ArchName<hipDeviceProp_t>{}(deviceProperties);
+ }
+ 
+--- a/library/src/tensile_host.cpp
++++ b/library/src/tensile_host.cpp
+@@ -45,6 +45,7 @@ extern "C" void rocblas_shutdown();
+ #include <Tensile/hip/HipUtils.hpp>
+ #include <atomic>
+ #include <complex>
++#include <cstring>
+ #include <exception>
+ #include <future>
+ #include <iomanip>
+@@ -837,6 +838,31 @@ namespace
+ 
+             hipDeviceProp_t prop;
+             HIP_CHECK_EXC(hipGetDeviceProperties(&prop, deviceId));
++            // coerce to compatible arch
++            switch(prop.gcnArch)
++            {
++            case 902:
++            case 909:
++            case 912:
++                prop.gcnArch = 900;
++                std::strcpy(prop.gcnArchName, "gfx900");
++                break;
++            case 1011:
++            case 1012:
++            case 1013:
++                prop.gcnArch = 1010;
++                std::strcpy(prop.gcnArchName, "gfx1010");
++                break;
++            case 1031:
++            case 1032:
++            case 1033:
++            case 1034:
++            case 1035:
++            case 1036:
++                prop.gcnArch = 1030;
++                std::strcpy(prop.gcnArchName, "gfx1030");
++                break;
++            }
+ 
+             m_deviceProp = std::make_shared<hipDeviceProp_t>(prop);
+ 

diff --git a/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild 
b/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild
new file mode 100644
index 000000000000..c3f12fff175e
--- /dev/null
+++ b/sci-libs/rocBLAS/rocBLAS-5.7.1-r1.ebuild
@@ -0,0 +1,109 @@
+# Copyright 1999-2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DOCS_BUILDER="doxygen"
+DOCS_DIR="docs"
+DOCS_DEPEND="media-gfx/graphviz"
+ROCM_VERSION=${PV}
+inherit cmake docs edo multiprocessing rocm
+
+DESCRIPTION="AMD's library for BLAS on ROCm"
+HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS";
+SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz
 -> rocm-${P}.tar.gz"
+S="${WORKDIR}/${PN}-rocm-${PV}"
+
+LICENSE="BSD"
+KEYWORDS="~amd64"
+SLOT="0/$(ver_cut 1-2)"
+IUSE="benchmark test"
+REQUIRED_USE="${ROCM_REQUIRED_USE}"
+
+RESTRICT="test" # Tests fail
+
+BDEPEND="
+       >=dev-util/rocm-cmake-5.3
+       dev-util/Tensile:${SLOT}
+       dev-python/joblib
+       test? ( dev-cpp/gtest )
+"
+
+DEPEND="
+       >=dev-cpp/msgpack-cxx-6.0.0
+       dev-util/hip
+       test? (
+               virtual/blas
+               dev-cpp/gtest
+               sys-libs/libomp
+       )
+       benchmark? (
+               virtual/blas
+               sys-libs/libomp
+       )
+"
+
+QA_FLAGS_IGNORED="/usr/lib64/rocblas/library/.*"
+
+PATCHES=(
+       "${FILESDIR}"/${PN}-5.4.2-cpp_lib_filesystem.patch
+       "${FILESDIR}"/${PN}-5.4.2-add-missing-header.patch
+       "${FILESDIR}"/${PN}-5.4.2-link-cblas.patch
+       "${FILESDIR}"/${PN}-5.7.1-expand-isa-compatibility.patch
+       )
+
+src_prepare() {
+       cmake_src_prepare
+       sed -e "s:,-rpath=.*\":\":" -i clients/CMakeLists.txt || die
+}
+
+src_configure() {
+       addpredict /dev/random
+       addpredict /dev/kfd
+       addpredict /dev/dri/
+
+       local mycmakeargs=(
+               -DCMAKE_SKIP_RPATH=On
+               -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
+               -DROCM_SYMLINK_LIBS=OFF
+               -DAMDGPU_TARGETS="$(get_amdgpu_flags)"
+               -DTensile_LOGIC="asm_full"
+               -DTensile_COMPILER="hipcc"
+               -DTensile_LIBRARY_FORMAT="msgpack"
+               -DTensile_CODE_OBJECT_VERSION="default"
+               -DTensile_ROOT="${EPREFIX}/usr/share/Tensile"
+               -DBUILD_WITH_TENSILE=ON
+               -DCMAKE_INSTALL_INCLUDEDIR="include/rocblas"
+               -DBUILD_CLIENTS_SAMPLES=OFF
+               -DBUILD_CLIENTS_TESTS=$(usex test ON OFF)
+               -DTensile_TEST_LOCAL_PATH="${EPREFIX}/usr/share/Tensile"
+               -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF)
+               -DTensile_CPU_THREADS=$(makeopts_jobs)
+               -DBUILD_WITH_PIP=OFF
+       )
+
+       CXX=hipcc cmake_src_configure
+}
+
+src_compile() {
+       docs_compile
+       cmake_src_compile
+}
+
+src_test() {
+       check_amdgpu
+       cd "${BUILD_DIR}"/clients/staging || die
+       export ROCBLAS_TEST_TIMEOUT=3600 
ROCBLAS_TENSILE_LIBPATH="${BUILD_DIR}/Tensile/library"
+       export LD_LIBRARY_PATH="${BUILD_DIR}/clients:${BUILD_DIR}/library/src"
+       edob ./${PN,,}-test
+}
+
+src_install() {
+       cmake_src_install
+
+       if use benchmark; then
+               cd "${BUILD_DIR}" || die
+               dolib.a clients/librocblas_fortran_client.a
+               dobin clients/staging/rocblas-bench
+       fi
+}

Reply via email to