commit: 71642b949e6f5f4d1748bc57ac23e3fcfb721840 Author: Rok Faith <rok <AT> faith <DOT> si> AuthorDate: Tue Jan 20 18:45:38 2026 +0000 Commit: Paul Zander <negril.nx+gentoo <AT> gmail <DOT> com> CommitDate: Mon Jan 26 14:10:11 2026 +0000 URL: https://gitweb.gentoo.org/repo/proj/guru.git/commit/?id=71642b94
sci-misc/llama-cpp: update 9999: add USE="examples wmma" Signed-off-by: Rok Faith <rok <AT> faith.si> Part-of: https://github.com/gentoo/guru/pull/420 Closes: https://github.com/gentoo/guru/pull/420 Signed-off-by: Paul Zander <negril.nx+gentoo <AT> gmail.com> sci-misc/llama-cpp/Manifest | 1 + sci-misc/llama-cpp/llama-cpp-9999.ebuild | 65 +++++++++++++++++++++++--------- sci-misc/llama-cpp/metadata.xml | 1 + 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/sci-misc/llama-cpp/Manifest b/sci-misc/llama-cpp/Manifest index 63f3d6d0d6..3fdd253743 100644 --- a/sci-misc/llama-cpp/Manifest +++ b/sci-misc/llama-cpp/Manifest @@ -1,3 +1,4 @@ +DIST ggml-org_models_tinyllamas_stories15M-q4_0-99dd1a73db5a37100bd4ae633f4cfce6560e1567.gguf 19077344 BLAKE2B 16e65adf9785e3091c51f1de59e5580f93fb47f79961513aeb3dbb8a0f5930f7120f0304f0f293a006170805e2b70ee1fcff0496b63356323d32c2caa55be8a8 SHA512 f9944886089958e0d97b1906cfd45020e0821c65429346e76fae29136c634ae5d039dffbae5933a95b0674f4acd87b656feb9f9e1b16dd434c5c9b5886f4f617 DIST llama-cpp-0_pre6318.tar.gz 25626090 BLAKE2B b95826a5fd4ab27927d390cdc091648d1ffe281d5d9946fdfa4e6c8c59fb7461dd1e2b83751c86c575b4f00207bbd0cfbe467a0ae9dfdb3b192356bc77e0f808 SHA512 f3b5655123919a76fa27f1be05ffb2a7f681d7793d4d9e24106739a21846a2918ffdf9ef326ac99a55f6b4943059e4f76de754da894ff6fdd7e2d56a41edc56b DIST llama-cpp-0_pre6710.tar.gz 25894417 BLAKE2B 147f30d76fd49bf18fa0ab9e3e75d0ad337dcd87a73f1dbce43f180488ea06b40b1a2a93b4686a88b5a442dd4dd6a8e45bf848ceb549bdc0ad0078427336c56e SHA512 75c5918713256cb11f704b94d6e249a9f3ac2dde1107a6f4506134ba9c772e1c42d991915b571887207003f4b0679a183cd0787ffd742a08d2283fdfb86695eb DIST llama-cpp-0_pre6980.tar.gz 26431911 BLAKE2B b7d7c0dcdabde01acb816e73bc344564823dd1fc498fb98bf3c611b2d7a964af4d94f7cad533fe675a30685d510829160e392ab0f3bd16f4757a2f3446b8e3ca SHA512 33e63336ad7c0fc653acd409d9314ce3fc3755ed1c03b4806c647b7c80d91b3c883aec6633334555c3855a24276d4975a54c96af91df8d2f818d4dd1dbcbabfb diff --git a/sci-misc/llama-cpp/llama-cpp-9999.ebuild b/sci-misc/llama-cpp/llama-cpp-9999.ebuild index 5b70066b26..ff61000fd0 100644 --- a/sci-misc/llama-cpp/llama-cpp-9999.ebuild +++ b/sci-misc/llama-cpp/llama-cpp-9999.ebuild @@ -1,4 +1,4 @@ -# Copyright 2025 Gentoo Authors +# Copyright 2026 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=8 @@ -7,24 +7,45 @@ ROCM_VERSION="6.3" inherit cmake cuda rocm linux-info -if [[ "${PV}" != "9999" ]]; then - KEYWORDS="~amd64" - MY_PV="b${PV#0_pre}" - S="${WORKDIR}/llama.cpp-${MY_PV}" - SRC_URI="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${MY_PV}.tar.gz -> ${P}.tar.gz" -else +TINY_LLAMAS_COMMIT="99dd1a73db5a37100bd4ae633f4cfce6560e1567" + +DESCRIPTION="Port of Facebook's LLaMA model in C/C++" +HOMEPAGE="https://github.com/ggml-org/llama.cpp" + +if [[ ${PV} == *9999* ]]; then inherit git-r3 EGIT_REPO_URI="https://github.com/ggml-org/llama.cpp.git" +else + MY_PV="b${PV#0_pre}" + SRC_URI="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${MY_PV}.tar.gz -> ${P}.tar.gz" + S="${WORKDIR}/llama.cpp-${MY_PV}" + KEYWORDS="~amd64" fi -DESCRIPTION="Port of Facebook's LLaMA model in C/C++" -HOMEPAGE="https://github.com/ggml-org/llama.cpp" +SRC_URI+=" + examples? ( + https://huggingface.co/ggml-org/tiny-llamas/resolve/${TINY_LLAMAS_COMMIT}/stories15M-q4_0.gguf + -> ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf + ) +" LICENSE="MIT" SLOT="0" CPU_FLAGS_X86=( avx avx2 f16c ) -IUSE="curl openblas +openmp blis rocm cuda opencl vulkan flexiblas" -REQUIRED_USE="?? ( openblas blis flexiblas )" + +# wwma USE explained here: https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#hip +IUSE="curl openblas +openmp blis rocm cuda opencl vulkan flexiblas wmma examples" + +REQUIRED_USE=" + ?? ( + openblas + blis + flexiblas + ) + wmma? ( + rocm + ) +" # curl is needed for pulling models from huggingface # numpy is used by convert_hf_to_gguf.py @@ -37,6 +58,9 @@ CDEPEND=" rocm? ( >=dev-util/hip-${ROCM_VERSION}:= >=sci-libs/hipBLAS-${ROCM_VERSION}:= + wmma? ( + >=sci-libs/rocWMMA-${ROCM_VERSION}:= + ) ) cuda? ( dev-util/nvidia-cuda-toolkit:= ) " @@ -59,30 +83,34 @@ pkg_setup() { ewarn "To use ROCm/HIP, you need to have HSA_AMD_SVM option enabled in your kernel." fi fi - fi } src_prepare() { use cuda && cuda_src_prepare - cmake_src_prepare + if use examples; then + mkdir -p "${BUILD_DIR}/tinyllamas" || die + cp "${DISTDIR}/ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf" \ + "${BUILD_DIR}/tinyllamas/stories15M-q4_0.gguf" || die + fi } src_configure() { local mycmakeargs=( -DLLAMA_BUILD_TESTS=OFF + -DLLAMA_BUILD_EXAMPLES=$(usex examples) -DLLAMA_BUILD_SERVER=ON -DCMAKE_SKIP_BUILD_RPATH=ON -DGGML_NATIVE=0 # don't set march -DGGML_RPC=ON - -DLLAMA_CURL=$(usex curl ON OFF) + -DLLAMA_CURL=$(usex curl) -DBUILD_NUMBER="1" -DGENTOO_REMOVE_CMAKE_BLAS_HACK=ON - -DGGML_CUDA=$(usex cuda ON OFF) - -DGGML_OPENCL=$(usex opencl ON OFF) - -DGGML_OPENMP=$(usex openmp ON OFF) - -DGGML_VULKAN=$(usex vulkan ON OFF) + -DGGML_CUDA=$(usex cuda) + -DGGML_OPENCL=$(usex opencl) + -DGGML_OPENMP=$(usex openmp) + -DGGML_VULKAN=$(usex vulkan) # avoid clashing with whisper.cpp -DCMAKE_INSTALL_LIBDIR="${EPREFIX}/usr/$(get_libdir)/llama.cpp" @@ -118,6 +146,7 @@ src_configure() { rocm_use_hipcc mycmakeargs+=( -DGGML_HIP=ON -DAMDGPU_TARGETS=$(get_amdgpu_flags) + -DGGML_HIP_ROCWMMA_FATTN=$(usex wmma) ) fi diff --git a/sci-misc/llama-cpp/metadata.xml b/sci-misc/llama-cpp/metadata.xml index 3c0d0befbb..8f1876715c 100644 --- a/sci-misc/llama-cpp/metadata.xml +++ b/sci-misc/llama-cpp/metadata.xml @@ -9,6 +9,7 @@ <flag name="flexiblas">Build a FlexiBLAS backend</flag> <flag name="rocm">Build a HIP (ROCm) backend</flag> <flag name="hip">Build a HIP (ROCm) backend</flag> + <flag name="wmma">Use rocWMMA to enhance flash attention performance</flag> <flag name="openblas">Build an OpenBLAS backend</flag> <flag name="opencl">Build an OpenCL backend, so far only works on Adreno and Intel GPUs</flag> </use>
