commit: 403a032bafc4a2f6ff937921c1f204142c025b47
Author: Paul Zander <negril.nx+gentoo <AT> gmail <DOT> com>
AuthorDate: Wed Nov 5 14:09:36 2025 +0000
Commit: Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Wed Nov 5 21:07:43 2025 +0000
URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=403a032b
dev-libs/cutlass: add 4.1.0
add code for -9999 version
allow USE=headers-only with USE=tools
add missing gtest dep
cleanup USE=clang-cuda
- including setting correct CUDA_HOST_COMPILER ( this will break with to new
clang, but that isn't supported well enough yet )
pass all-major when CUDAARCHS is unset
lower test level to 0
run tests in serial to avoid broken inter test dependencies
Closes: https://bugs.gentoo.org/955660
Signed-off-by: Paul Zander <negril.nx+gentoo <AT> gmail.com>
Part-of: https://github.com/gentoo/gentoo/pull/44489
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>
dev-libs/cutlass/Manifest | 1 +
dev-libs/cutlass/cutlass-4.1.0.ebuild | 161 ++++++++++++++++++++++++++++++++++
2 files changed, 162 insertions(+)
diff --git a/dev-libs/cutlass/Manifest b/dev-libs/cutlass/Manifest
index 95f6b5dc29a5..2488122271e3 100644
--- a/dev-libs/cutlass/Manifest
+++ b/dev-libs/cutlass/Manifest
@@ -1,2 +1,3 @@
DIST cutlass-3.8.0.tar.gz 31021072 BLAKE2B
4dd85f7c0d3452c2a194902fcd0afd7de3a3f17f86f477628d5e5f416ac885a86ed1fbbf2a9959a46e60e38a93400a7ec99bad1f980b0a4be36fad0de887ec0b
SHA512
a08aac281fb3bdea82c0a044dc643c40e4803d02e55bbea450021cb7a5472aed86e79c5df41cd981976af8403f18cc48d8069045c4e68339430d3a3caeb109ac
DIST cutlass-3.9.2.tar.gz 31534258 BLAKE2B
04462b3c6983f96b2027821408c4de30bf6b2e18e986ddebaf4f9d5572df354273116603ccc0ac618c61e03b981972e6d7786f354aa4f5e08d185cf7e4ad8e1d
SHA512
d45a9e4908b5886259acc1ffd4c8e4c6072801ad45909f365d599510b9989d3313438f2fa5cbee5c1e916e496a0b95bda85f79de3c38502d73e2b9206f868822
+DIST cutlass-4.1.0.tar.gz 33083022 BLAKE2B
0a30c28ab7539481a47b2a667c585eaa763ebafa15463cf50a8c57300e8dccd31d1790d00ae091e0d317fe57bb48955a3309de48cebb2529a850099ea4acc1f7
SHA512
a8c2cdf772ea3b1a35bfc948ca70240477d6e8ee004ae9e487275a7b35e40424b2820396cbc827482ddb75172fcdf56372ea0d4d96ae6f3253369bd315de3ce6
diff --git a/dev-libs/cutlass/cutlass-4.1.0.ebuild
b/dev-libs/cutlass/cutlass-4.1.0.ebuild
new file mode 100644
index 000000000000..617ccdfe2fbf
--- /dev/null
+++ b/dev-libs/cutlass/cutlass-4.1.0.ebuild
@@ -0,0 +1,161 @@
+# Copyright 2023-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{11..14} )
+
+inherit cuda cmake python-any-r1 flag-o-matic toolchain-funcs
+
+DESCRIPTION="CUDA Templates for Linear Algebra Subroutines"
+HOMEPAGE="https://github.com/NVIDIA/cutlass"
+
+if [[ "${PV}" == *9999* ]]; then
+ inherit git-r3
+ EGIT_REPO_URI="https://github.com/NVIDIA/${PN}"
+else
+ SRC_URI="
+ https://github.com/NVIDIA/${PN}/archive/refs/tags/v${PV}.tar.gz
-> ${P}.tar.gz
+ "
+ KEYWORDS="~amd64"
+fi
+
+LICENSE="BSD"
+SLOT="0"
+
+X86_CPU_FEATURES=(
+ f16c:f16c
+)
+CPU_FEATURES=( "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}" )
+
+IUSE="clang-cuda cublas cudnn doc dot examples +headers-only jumbo-build
performance profiler test tools ${CPU_FEATURES[*]%:*}"
+
+REQUIRED_USE="
+ headers-only? (
+ !examples
+ !profiler
+ !test
+ )
+ test? (
+ tools
+ )
+"
+
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+ dev-util/nvidia-cuda-toolkit:=
+"
+DEPEND="${RDEPEND}
+ test? (
+ ${PYTHON_DEPS}
+ dev-cpp/gtest
+ cudnn? (
+ dev-libs/cudnn:=
+ )
+ )
+ tools? (
+ ${PYTHON_DEPS}
+ )
+"
+
+pkg_setup() {
+ if use test || use tools; then
+ python-any-r1_pkg_setup
+ fi
+}
+
+src_configure() {
+ # we can use clang as default
+ if use clang-cuda && ! tc-is-clang; then
+ export CC="${CHOST}-clang"
+ export CXX="${CHOST}-clang++"
+ else
+ tc-export CXX CC
+ fi
+
+ cuda_add_sandbox
+ addpredict "/dev/char/"
+
+ local mycmakeargs=(
+ -DCMAKE_POLICY_DEFAULT_CMP0156="OLD" # cutlass_add_library
+
+ -DCMAKE_DISABLE_FIND_PACKAGE_Doxygen="$(usex !doc)"
+
+ -DCUTLASS_REVISION="${PVR}"
+ -DCUTLASS_ENABLE_CUBLAS="$(usex cublas)"
+ -DCUTLASS_ENABLE_CUDNN="$(usex cudnn)"
+ -DCUTLASS_ENABLE_EXAMPLES="$(usex examples)"
+ -DCUTLASS_ENABLE_F16C="$(usex cpu_flags_x86_f16c)"
+ -DCUTLASS_ENABLE_GTEST_UNIT_TESTS="$(usex test)"
+ -DCUTLASS_ENABLE_HEADERS_ONLY="$(usex headers-only)"
+ -DCUTLASS_ENABLE_LIBRARY="$(usex !headers-only)"
+ -DCUTLASS_ENABLE_PERFORMANCE="$(usex performance)"
+ -DCUTLASS_ENABLE_PROFILER="$(usex profiler)"
+ -DCUTLASS_ENABLE_PROFILER_UNIT_TESTS="$(usex test "$(usex
profiler)")"
+ -DCUTLASS_ENABLE_TESTS="$(usex test)"
+ -DCUTLASS_ENABLE_TOOLS="$(usex tools)"
+ -DCUTLASS_INSTALL_TESTS="no"
+ -DCUTLASS_NVCC_ARCHS="${CUDAARCHS:-all-major}"
+ -DCUTLASS_UNITY_BUILD_ENABLED="$(usex jumbo-build)"
+ -DCUTLASS_USE_SYSTEM_GOOGLETEST="yes"
+ -DIMPLICIT_CMAKE_CXX_STANDARD="yes"
+ )
+
+ # clang-cuda needs to filter mfpmath
+ if use clang-cuda; then
+ filter-mfpmath sse
+ filter-mfpmath i386
+
+ mycmakeargs+=(
+ -DCMAKE_CUDA_HOST_COMPILER="${CHOST}-clang++"
+ )
+ else
+ mycmakeargs+=(
+ -DCMAKE_CUDA_HOST_COMPILER="$(cuda_gccdir)"
+ )
+ fi
+
+ if use cudnn; then
+ mycmakeargs+=(
+
-DCUDNN_INCLUDE_DIR="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/linux/include"
+
-DCUDNN_LIBRARY="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/$(get_libdir)/libcudnn.so"
+ )
+ fi
+
+ if use doc; then
+ mycmakeargs+=(
+ -DCUTLASS_ENABLE_DOXYGEN_DOT="$(usex dot)"
+ )
+ fi
+
+ if use test; then
+ mycmakeargs+=(
+ -DCUTLASS_TEST_LEVEL="0"
+ )
+
+ append-cxxflags -DNDEBUG
+ fi
+
+ cmake_src_configure
+}
+
+src_test() {
+ cuda_add_sandbox -w
+
+ local myctestargs=(
+ )
+
+ local CMAKE_SKIP_TESTS=(
+ "ctest_examples_41_fmha_backward_python$"
+ )
+
+ cmake_src_test -j1
+ cmake_build test_unit "${myctestargs[@]}" -j1
+}
+
+src_install() {
+ cmake_src_install
+
+ rm -r "${ED}/usr/test" || die
+}