commit: 59b6eeed46c12217015bf87d3a0475a81cd4839e Author: YiyangWu <xgreenlandforwyy <AT> gmail <DOT> com> AuthorDate: Sun Aug 1 16:34:14 2021 +0000 Commit: Benda XU <heroxbd <AT> gentoo <DOT> org> CommitDate: Wed Aug 4 08:49:54 2021 +0000 URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=59b6eeed
sci-libs/rocBLAS: bump to 4.1.0 Bundle Tensile in venv, and using setup.py for Tensile installation. Use python-any-r1 to ensure pyyaml and msgpack dependency Add USE flag : benchmark. Enable client testing. Tests relies on system BLAS libraries, which may give different result using different system blas. Only generate code object for detected ISAs. Closes: https://github.com/gentoo/sci/pull/1102 Package-Manager: Portage-3.0.20, Repoman-3.0.3 Signed-off-by: Yiyang Wu <xgreenlandforwyy <AT> gmail.com> Signed-off-by: Benda Xu <heroxbd <AT> gentoo.org> .../files/Tensile-4.1.0-output-EnabledISA.patch | 65 +++++++++++ .../files/Tensile-4.1.0-output-commands.patch | 36 +++++++ .../files/rocBLAS-4.1.0-fix-Ninja-build.patch | 24 +++++ .../rocBLAS-4.1.0-fix-glibc-2.32-and-above.patch | 25 +++++ .../files/rocBLAS-4.1.0-link-system-blas.patch | 42 ++++++++ sci-libs/rocBLAS/metadata.xml | 4 + sci-libs/rocBLAS/rocBLAS-4.1.0.ebuild | 120 +++++++++++++++++++++ 7 files changed, 316 insertions(+) diff --git a/sci-libs/rocBLAS/files/Tensile-4.1.0-output-EnabledISA.patch b/sci-libs/rocBLAS/files/Tensile-4.1.0-output-EnabledISA.patch new file mode 100644 index 000000000..047da78c6 --- /dev/null +++ b/sci-libs/rocBLAS/files/Tensile-4.1.0-output-EnabledISA.patch @@ -0,0 +1,65 @@ +Add a parameter called EnabledISA (list) with empty default value. If empty, auto detect supported GPU ISA + +kernels for ISA in EnabledISA will be generated and compiled, then merged in to codeobjects + +--- Tensile-rocm-4.1.0/Tensile/Common.py ++++ Tensile-rocm-4.1.0/Tensile/Common.py +@@ -209,6 +209,7 @@ globalParameters["ClientExecutionLockPat + + # internal, i.e., gets set during startup + globalParameters["CurrentISA"] = (0,0,0) ++globalParameters["EnabledISA"] = [] + globalParameters["ROCmAgentEnumeratorPath"] = None # /opt/rocm/bin/rocm_agent_enumerator + globalParameters["ROCmSMIPath"] = None # /opt/rocm/bin/rocm-smi + globalParameters["AssemblerPath"] = None # /opt/rocm/hip/bin/hipcc +@@ -1644,6 +1645,7 @@ def assignGlobalParameters( config ): + globalParameters["ROCmAgentEnumeratorPath"] = config["ROCmAgentEnumeratorPath"] + + # read current gfx version ++ auto_detect_ISA = globalParameters["EnabledISA"] == [] + if os.name != "nt" and globalParameters["CurrentISA"] == (0,0,0) and globalParameters["ROCmAgentEnumeratorPath"]: + process = Popen([globalParameters["ROCmAgentEnumeratorPath"], "-t", "GPU"], stdout=PIPE) + line = process.stdout.readline().decode() +@@ -1653,7 +1655,9 @@ def assignGlobalParameters( config ): + if arch in globalParameters["SupportedISA"]: + print1("# Detected local GPU with ISA: gfx" + ''.join(map(str,arch))) + globalParameters["CurrentISA"] = arch ++ if auto_detect_ISA : globalParameters["EnabledISA"].append(arch) + line = process.stdout.readline().decode() ++ print1("# kernels for " + ', '.join('gfx' + ''.join(map(str,arch)) for arch in globalParameters["EnabledISA"]) + " will be generated and compiled.") + if globalParameters["CurrentISA"] == (0,0,0): + printWarning("Did not detect SupportedISA: %s; cannot benchmark assembly kernels." % globalParameters["SupportedISA"]) + if process.returncode: + +--- Tensile-rocm-4.1.0/Tensile/TensileCreateLibrary.py ++++ Tensile-rocm-4.1.0/Tensile/TensileCreateLibrary.py +@@ -169,7 +169,7 @@ def buildSourceCodeObjectFile(CxxCompile + + archs = [] + cmdlineArchs = [] +- for arch in globalParameters['SupportedISA']: ++ for arch in globalParameters['EnabledISA']: + if isSupported(arch): + if (arch == (9,0,6) or arch == (9,0,8)): + archs += ['gfx'+''.join(map(str,arch))+'-xnack-'] +@@ -1067,7 +1067,7 @@ def buildObjectFileNames(solutionWriter, + if isSupported(arch)] + elif (cxxCompiler == 'hipcc'): + sourceArchs = [] +- for arch in globalParameters['SupportedISA']: ++ for arch in globalParameters['EnabledISA']: + if isSupported(arch): + if (arch == (9,0,6) or arch == (9,0,8)): + sourceArchs += ['gfx'+''.join(map(str,arch))+'-xnack-'] +@@ -1249,8 +1249,9 @@ def generateKernelObjectsFromSolutions(s + for solution in solutions: + solutionKernels = solution.getKernels() + for kernel in solutionKernels: +- if kernel not in kernels: +- kernels.append(kernel) ++ if kernel["ISA"] in globalParameters["EnabledISA"] or kernel["KernelLanguage"] == "Source" : ++ if kernel not in kernels: ++ kernels.append(kernel) + solutionHelperKernels = solution.getHelperKernelObjects() + for ko in solutionHelperKernels: + kname = ko.getKernelName() diff --git a/sci-libs/rocBLAS/files/Tensile-4.1.0-output-commands.patch b/sci-libs/rocBLAS/files/Tensile-4.1.0-output-commands.patch new file mode 100644 index 000000000..152142d59 --- /dev/null +++ b/sci-libs/rocBLAS/files/Tensile-4.1.0-output-commands.patch @@ -0,0 +1,36 @@ +Print Code objects generation commands + +Cancel running dpkg to check existence of hip + +--- Tensile-rocm-4.1.0/Tensile/Common.py ++++ Tensile-rocm-4.1.0/Tensile/Common.py +@@ -175,7 +175,7 @@ globalParameters["PrintTensorD"] = 0 + globalParameters["PrintTensorRef"] = 0 # Print reference tensor. 0x1=after init; 0x2=after copy-back; 0x3=both + globalParameters["PrintIndexAssignments"] = 0 # Print the tensor index assignment info + globalParameters["PrintWinnersOnly"] = False # Only print the solutions which become the fastest +-globalParameters["PrintCodeCommands"] = False # print the commands used to generate the code objects (asm,link,hip-clang, etc) ++globalParameters["PrintCodeCommands"] = True # print the commands used to generate the code objects (asm,link,hip-clang, etc) + + # TODO - remove this when NewClient is mainstream + globalParameters["OldClientSourceTmp"] = True # Use an intermediate sourceTmp dir to detect file changes and minimize rebuilds on old client +@@ -1680,20 +1680,6 @@ def assignGlobalParameters( config ): + # Due to platform.linux_distribution() being deprecated, just try to run dpkg regardless. + # The alternative would be to install the `distro` package. + # See https://docs.python.org/3.7/library/platform.html#platform.linux_distribution +- try: +- if globalParameters["CxxCompiler"] == "hipcc": +- output = subprocess.run(["dpkg", "-l", "hip-rocclr"], check=True, stdout=subprocess.PIPE).stdout.decode() +- elif globalParameters["CxxCompiler"] == "hcc": +- output = subprocess.run(["dpkg", "-l", "hcc"], check=True, stdout=subprocess.PIPE).stdout.decode() +- +- for line in output.split('\n'): +- if 'hipcc' in line: +- globalParameters['HipClangVersion'] = line.split()[2] +- elif 'hcc' in line: +- globalParameters['HccVersion'] = line.split()[2] +- +- except (subprocess.CalledProcessError, OSError) as e: +- printWarning("Error: {} looking for package {}: {}".format('dpkg', 'hip-rocclr', e)) + + for key in config: + value = config[key] diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-Ninja-build.patch b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-Ninja-build.patch new file mode 100644 index 000000000..3b9c3efdd --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-Ninja-build.patch @@ -0,0 +1,24 @@ +https://github.com/ROCmSoftwarePlatform/rocBLAS/commit/b90e6ef22166b72e73a830aa83e9bbe342d5e676 + +--- rocBLAS-rocm-4.1.0/library/src/CMakeLists.txt ++++ rocBLAS-rocm-4.1.0/library/src/CMakeLists.txt +@@ -447,10 +447,18 @@ set_target_properties( rocblas PROPERTIE + generate_export_header( rocblas EXPORT_FILE_NAME ${PROJECT_BINARY_DIR}/include/internal/rocblas-export.h ) + + # generate header with prototypes for export reuse ++file( GLOB rocblas_prototype_inputs ++ LIST_DIRECTORIES OFF ++ CONFIGURE_DEPENDS ++ ${CMAKE_CURRENT_SOURCE_DIR}/blas3/Tensile/*.hpp ++ ${CMAKE_CURRENT_SOURCE_DIR}/blas3/*.hpp ++ ${CMAKE_CURRENT_SOURCE_DIR}/blas2/*.hpp ++ ${CMAKE_CURRENT_SOURCE_DIR}/blas1/*.hpp ++) + set( ROCBLAS_PROTO_TEMPLATES "${PROJECT_BINARY_DIR}/include/internal/rocblas-exported-proto.hpp" ) + add_custom_command(OUTPUT ${ROCBLAS_PROTO_TEMPLATES} + COMMAND python3 template-proto.py ${CMAKE_CURRENT_SOURCE_DIR}/blas3/Tensile/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas3/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas2/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas1/*.hpp > ${ROCBLAS_PROTO_TEMPLATES} +- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/blas3/Tensile/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas3/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas2/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/blas1/*.hpp ++ DEPENDS ${rocblas_prototype_inputs} + COMMENT "Generating prototypes from ${CMAKE_CURRENT_SOURCE_DIR}." + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + ) diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-glibc-2.32-and-above.patch b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-glibc-2.32-and-above.patch new file mode 100644 index 000000000..a4d9f0bab --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-fix-glibc-2.32-and-above.patch @@ -0,0 +1,25 @@ +https://sourceware.org/glibc/wiki/Release/2.32#Deprectation_sys_siglist.2C__sys_siglist.2C_sys_sigabbrev + +--- rocBLAS-rocm-4.1.0/clients/gtest/rocblas_test.cpp ++++ rocBLAS-rocm-4.1.0/clients/gtest/rocblas_test.cpp +@@ -173,7 +173,7 @@ void catch_signals_and_exceptions_as_fai + // Set up the return point, and handle siglongjmp returning back to here + if(sigsetjmp(t_handler.sigjmp_buf, true)) + { +- FAIL() << "Received " << sys_siglist[t_handler.signal] << " signal"; ++ FAIL() << "Received " << strsignal(t_handler.signal) << " signal"; + } + else + { + +--- rocBLAS-rocm-4.1.0/clients/include/utility.hpp ++++ rocBLAS-rocm-4.1.0/clients/include/utility.hpp +@@ -39,7 +39,7 @@ + // puts, putchar, fputs, printf, fprintf, vprintf, vfprintf: Use rocblas_cout or rocblas_cerr + // sprintf, vsprintf: Possible buffer overflows; us snprintf or vsnprintf instead + // strerror: Thread-unsafe; use snprintf / dprintf with %m or strerror_* alternatives +-// strsignal: Thread-unsafe; use sys_siglist[signal] instead ++// strsignal: Thread-unsafe; use strsignal(signal) instead + // strtok: Thread-unsafe; use strtok_r + // gmtime, ctime, asctime, localtime: Thread-unsafe + // tmpnam: Thread-unsafe; use mkstemp or related functions instead diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.1.0-link-system-blas.patch b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-link-system-blas.patch new file mode 100644 index 000000000..082d8313f --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.1.0-link-system-blas.patch @@ -0,0 +1,42 @@ +Link system blas libraries against rocblas-bench and rocblas-test + +--- rocBLAS-rocm-4.1.0/clients/benchmarks/CMakeLists.txt ++++ rocBLAS-rocm-4.1.0/clients/benchmarks/CMakeLists.txt +@@ -83,7 +83,7 @@ if( OS_ID_rhel OR OS_ID_sles OR OS_ID_ce + if(LINK_BLIS) + target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas cblas lapack ${BLIS_LIBRARY} ${OPENMP_LIBRARY} ) + else() +- target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas cblas lapack ${OPENMP_LIBRARY} ) ++ target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas cblas lapack -lblas ${OPENMP_LIBRARY} ) + endif() + else() + # External header includes included as system files +@@ -97,7 +97,7 @@ else() + if(LINK_BLIS) + target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${BLIS_LIBRARY} ) + else() +- target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ) ++ target_link_libraries( rocblas-bench PRIVATE rocblas_fortran_client roc::rocblas lapack cblas -lblas ) + endif() + endif() + +--- rocBLAS-rocm-4.1.0/clients/gtest/CMakeLists.txt ++++ rocBLAS-rocm-4.1.0/clients/gtest/CMakeLists.txt +@@ -163,7 +163,7 @@ if( OS_ID_rhel OR OS_ID_sles OR OS_ID_ce + if(LINK_BLIS) + target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${BLIS_LIBRARY} ${GTEST_LIBRARIES} ${OPENMP_LIBRARY} ) + else() +- target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${GTEST_LIBRARIES} ${OPENMP_LIBRARY} ) ++ target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas -lblas ${GTEST_LIBRARIES} ${OPENMP_LIBRARY} ) + endif() + else() + # External header includes included as system files +@@ -177,7 +177,7 @@ else() + if(LINK_BLIS) + target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${BLIS_LIBRARY} ${GTEST_LIBRARIES} ) + else() +- target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas ${GTEST_LIBRARIES} ) ++ target_link_libraries( rocblas-test PRIVATE rocblas_fortran_client roc::rocblas lapack cblas -lblas ${GTEST_LIBRARIES} ) + endif() + endif() + diff --git a/sci-libs/rocBLAS/metadata.xml b/sci-libs/rocBLAS/metadata.xml index 336bdb639..189545382 100644 --- a/sci-libs/rocBLAS/metadata.xml +++ b/sci-libs/rocBLAS/metadata.xml @@ -8,4 +8,8 @@ <email>[email protected]</email> <name>Wilfried Holzke</name> </maintainer> + <use> + <flag name="test">Perform rocblas-test to compare the result between rocBLAS and system BLAS.</flag> + <flag name="benchmark">Build and install rocblas-bench.</flag> + </use> </pkgmetadata> diff --git a/sci-libs/rocBLAS/rocBLAS-4.1.0.ebuild b/sci-libs/rocBLAS/rocBLAS-4.1.0.ebuild new file mode 100644 index 000000000..43867c1ef --- /dev/null +++ b/sci-libs/rocBLAS/rocBLAS-4.1.0.ebuild @@ -0,0 +1,120 @@ +# Copyright 1999-2021 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 + +PYTHON_COMPAT=( python3_{8,9} ) + +inherit cmake python-any-r1 + +DESCRIPTION="AMD's library for BLAS on ROCm." +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS" +SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz -> rocm-${P}.tar.gz + https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" + +LICENSE="MIT" +KEYWORDS="~amd64" +IUSE="benchmark test" +SLOT="0" + +BDEPEND=" + dev-util/rocm-cmake + !dev-util/Tensile + $(python_gen_any_dep ' + dev-python/msgpack[${PYTHON_USEDEP}] + dev-python/pyyaml[${PYTHON_USEDEP}] + ') +" + +RDEPEND=" + =dev-util/hip-$(ver_cut 1-2)* + benchmark? ( virtual/blas ) +" + +DEPEND="${RDEPEND} + test? ( virtual/blas ) +" + +# stripped library is not working +RESTRICT="strip !test? ( test )" + +python_check_deps() { + has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" && + has_version "dev-python/msgpack[${PYTHON_USEDEP}]" +} + +S="${WORKDIR}"/${PN}-rocm-${PV} + +PATCHES=( "${FILESDIR}"/${PN}-4.1.0-fix-Ninja-build.patch + "${FILESDIR}"/${PN}-4.1.0-fix-glibc-2.32-and-above.patch + "${FILESDIR}"/${PN}-4.1.0-link-system-blas.patch ) + +src_prepare() { + eapply_user + + pushd "${WORKDIR}"/Tensile-rocm-${PV} || die + eapply "${FILESDIR}/Tensile-4.1.0-output-commands.patch" + eapply "${FILESDIR}/Tensile-4.1.0-output-EnabledISA.patch" + popd || die + + sed -e "/PREFIX rocblas/d" \ + -e "/<INSTALL_INTERFACE/s:include:include/rocblas:" \ + -e "s:rocblas/include:include/rocblas:" \ + -e "s:\\\\\${CPACK_PACKAGING_INSTALL_PREFIX}rocblas/lib:${EPREFIX}/usr/$(get_libdir)/rocblas:" \ + -e "/rocm_install_symlink_subdir( rocblas )/d" -i library/src/CMakeLists.txt || die + + # Use setup.py to install Tensile rather than pip + sed -r -e "/pip install/s:([^ \"\(]*python) -m pip install ([^ \"\)]*):\1 setup.py install --single-version-externally-managed --root / WORKING_DIRECTORY \2:g" -i cmake/virtualenv.cmake + + cmake_src_prepare +} + +src_configure() { + # allow acces to hardware + addwrite /dev/kfd + addwrite /dev/dri/ + addwrite /dev/random + + export PATH="${EPREFIX}/usr/lib/llvm/roc/bin:${PATH}" + + local mycmakeargs=( + -DTensile_LOGIC="asm_full" + -DTensile_COMPILER="hipcc" + -DTensile_ARCHITECTURE="all" + -DTensile_LIBRARY_FORMAT="msgpack" + -DTensile_CODE_OBJECT_VERSION="V3" + -DTensile_TEST_LOCAL_PATH="${WORKDIR}/Tensile-rocm-${PV}" + -DBUILD_WITH_TENSILE=ON + -DBUILD_WITH_TENSILE_HOST=ON + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr" + -DCMAKE_INSTALL_INCLUDEDIR="include/rocblas" + -DBUILD_TESTING=OFF + -DBUILD_CLIENTS_SAMPLES=OFF + -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) + -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF) + ) + + CXX="hipcc" cmake_src_configure + + # do not rerun cmake and the build process in src_install + sed -e '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die +} + +src_test() { + cd "${BUILD_DIR}/clients/staging" || die + ROCBLAS_TENSILE_LIBPATH="${BUILD_DIR}/Tensile/library" ./rocblas-test +} + +src_install() { + echo "ROCBLAS_TENSILE_LIBPATH=${EPREFIX}/usr/$(get_libdir)/rocblas/library" >> 99rocblas || die + doenvd 99rocblas + + cmake_src_install + + if use benchmark; then + cd "${BUILD_DIR}" || die + dolib.so clients/librocblas_fortran_client.so + dobin clients/staging/rocblas-bench + chrpath -d "${ED}/usr/bin/rocblas-bench" || die + fi +}
