commit:     71642b949e6f5f4d1748bc57ac23e3fcfb721840
Author:     Rok Faith <rok <AT> faith <DOT> si>
AuthorDate: Tue Jan 20 18:45:38 2026 +0000
Commit:     Paul Zander <negril.nx+gentoo <AT> gmail <DOT> com>
CommitDate: Mon Jan 26 14:10:11 2026 +0000
URL:        https://gitweb.gentoo.org/repo/proj/guru.git/commit/?id=71642b94

sci-misc/llama-cpp: update 9999: add USE="examples wmma"

Signed-off-by: Rok Faith <rok <AT> faith.si>
Part-of: https://github.com/gentoo/guru/pull/420
Closes: https://github.com/gentoo/guru/pull/420
Signed-off-by: Paul Zander <negril.nx+gentoo <AT> gmail.com>

 sci-misc/llama-cpp/Manifest              |  1 +
 sci-misc/llama-cpp/llama-cpp-9999.ebuild | 65 +++++++++++++++++++++++---------
 sci-misc/llama-cpp/metadata.xml          |  1 +
 3 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/sci-misc/llama-cpp/Manifest b/sci-misc/llama-cpp/Manifest
index 63f3d6d0d6..3fdd253743 100644
--- a/sci-misc/llama-cpp/Manifest
+++ b/sci-misc/llama-cpp/Manifest
@@ -1,3 +1,4 @@
+DIST 
ggml-org_models_tinyllamas_stories15M-q4_0-99dd1a73db5a37100bd4ae633f4cfce6560e1567.gguf
 19077344 BLAKE2B 
16e65adf9785e3091c51f1de59e5580f93fb47f79961513aeb3dbb8a0f5930f7120f0304f0f293a006170805e2b70ee1fcff0496b63356323d32c2caa55be8a8
 SHA512 
f9944886089958e0d97b1906cfd45020e0821c65429346e76fae29136c634ae5d039dffbae5933a95b0674f4acd87b656feb9f9e1b16dd434c5c9b5886f4f617
 DIST llama-cpp-0_pre6318.tar.gz 25626090 BLAKE2B 
b95826a5fd4ab27927d390cdc091648d1ffe281d5d9946fdfa4e6c8c59fb7461dd1e2b83751c86c575b4f00207bbd0cfbe467a0ae9dfdb3b192356bc77e0f808
 SHA512 
f3b5655123919a76fa27f1be05ffb2a7f681d7793d4d9e24106739a21846a2918ffdf9ef326ac99a55f6b4943059e4f76de754da894ff6fdd7e2d56a41edc56b
 DIST llama-cpp-0_pre6710.tar.gz 25894417 BLAKE2B 
147f30d76fd49bf18fa0ab9e3e75d0ad337dcd87a73f1dbce43f180488ea06b40b1a2a93b4686a88b5a442dd4dd6a8e45bf848ceb549bdc0ad0078427336c56e
 SHA512 
75c5918713256cb11f704b94d6e249a9f3ac2dde1107a6f4506134ba9c772e1c42d991915b571887207003f4b0679a183cd0787ffd742a08d2283fdfb86695eb
 DIST llama-cpp-0_pre6980.tar.gz 26431911 BLAKE2B 
b7d7c0dcdabde01acb816e73bc344564823dd1fc498fb98bf3c611b2d7a964af4d94f7cad533fe675a30685d510829160e392ab0f3bd16f4757a2f3446b8e3ca
 SHA512 
33e63336ad7c0fc653acd409d9314ce3fc3755ed1c03b4806c647b7c80d91b3c883aec6633334555c3855a24276d4975a54c96af91df8d2f818d4dd1dbcbabfb

diff --git a/sci-misc/llama-cpp/llama-cpp-9999.ebuild 
b/sci-misc/llama-cpp/llama-cpp-9999.ebuild
index 5b70066b26..ff61000fd0 100644
--- a/sci-misc/llama-cpp/llama-cpp-9999.ebuild
+++ b/sci-misc/llama-cpp/llama-cpp-9999.ebuild
@@ -1,4 +1,4 @@
-# Copyright 2025 Gentoo Authors
+# Copyright 2026 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 EAPI=8
@@ -7,24 +7,45 @@ ROCM_VERSION="6.3"
 
 inherit cmake cuda rocm linux-info
 
-if [[ "${PV}" != "9999" ]]; then
-       KEYWORDS="~amd64"
-       MY_PV="b${PV#0_pre}"
-       S="${WORKDIR}/llama.cpp-${MY_PV}"
-       
SRC_URI="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${MY_PV}.tar.gz
 -> ${P}.tar.gz"
-else
+TINY_LLAMAS_COMMIT="99dd1a73db5a37100bd4ae633f4cfce6560e1567"
+
+DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
+HOMEPAGE="https://github.com/ggml-org/llama.cpp";
+
+if [[ ${PV} == *9999* ]]; then
        inherit git-r3
        EGIT_REPO_URI="https://github.com/ggml-org/llama.cpp.git";
+else
+       MY_PV="b${PV#0_pre}"
+       
SRC_URI="https://github.com/ggml-org/llama.cpp/archive/refs/tags/${MY_PV}.tar.gz
 -> ${P}.tar.gz"
+       S="${WORKDIR}/llama.cpp-${MY_PV}"
+       KEYWORDS="~amd64"
 fi
 
-DESCRIPTION="Port of Facebook's LLaMA model in C/C++"
-HOMEPAGE="https://github.com/ggml-org/llama.cpp";
+SRC_URI+="
+       examples? (
+               
https://huggingface.co/ggml-org/tiny-llamas/resolve/${TINY_LLAMAS_COMMIT}/stories15M-q4_0.gguf
+                       -> 
ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf
+       )
+"
 
 LICENSE="MIT"
 SLOT="0"
 CPU_FLAGS_X86=( avx avx2 f16c )
-IUSE="curl openblas +openmp blis rocm cuda opencl vulkan flexiblas"
-REQUIRED_USE="?? ( openblas blis flexiblas )"
+
+# wwma USE explained here: 
https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#hip
+IUSE="curl openblas +openmp blis rocm cuda opencl vulkan flexiblas wmma 
examples"
+
+REQUIRED_USE="
+       ?? (
+               openblas
+               blis
+               flexiblas
+       )
+       wmma? (
+               rocm
+       )
+"
 
 # curl is needed for pulling models from huggingface
 # numpy is used by convert_hf_to_gguf.py
@@ -37,6 +58,9 @@ CDEPEND="
        rocm? (
                >=dev-util/hip-${ROCM_VERSION}:=
                >=sci-libs/hipBLAS-${ROCM_VERSION}:=
+               wmma? (
+                       >=sci-libs/rocWMMA-${ROCM_VERSION}:=
+               )
        )
        cuda? ( dev-util/nvidia-cuda-toolkit:= )
 "
@@ -59,30 +83,34 @@ pkg_setup() {
                                ewarn "To use ROCm/HIP, you need to have 
HSA_AMD_SVM option enabled in your kernel."
                        fi
                fi
-
        fi
 }
 
 src_prepare() {
        use cuda && cuda_src_prepare
-
        cmake_src_prepare
+       if use examples; then
+               mkdir -p "${BUILD_DIR}/tinyllamas" || die
+               cp 
"${DISTDIR}/ggml-org_models_tinyllamas_stories15M-q4_0-${TINY_LLAMAS_COMMIT}.gguf"
 \
+                       "${BUILD_DIR}/tinyllamas/stories15M-q4_0.gguf" || die
+       fi
 }
 
 src_configure() {
        local mycmakeargs=(
                -DLLAMA_BUILD_TESTS=OFF
+               -DLLAMA_BUILD_EXAMPLES=$(usex examples)
                -DLLAMA_BUILD_SERVER=ON
                -DCMAKE_SKIP_BUILD_RPATH=ON
                -DGGML_NATIVE=0 # don't set march
                -DGGML_RPC=ON
-               -DLLAMA_CURL=$(usex curl ON OFF)
+               -DLLAMA_CURL=$(usex curl)
                -DBUILD_NUMBER="1"
                -DGENTOO_REMOVE_CMAKE_BLAS_HACK=ON
-               -DGGML_CUDA=$(usex cuda ON OFF)
-               -DGGML_OPENCL=$(usex opencl ON OFF)
-               -DGGML_OPENMP=$(usex openmp ON OFF)
-               -DGGML_VULKAN=$(usex vulkan ON OFF)
+               -DGGML_CUDA=$(usex cuda)
+               -DGGML_OPENCL=$(usex opencl)
+               -DGGML_OPENMP=$(usex openmp)
+               -DGGML_VULKAN=$(usex vulkan)
 
                # avoid clashing with whisper.cpp
                -DCMAKE_INSTALL_LIBDIR="${EPREFIX}/usr/$(get_libdir)/llama.cpp"
@@ -118,6 +146,7 @@ src_configure() {
                rocm_use_hipcc
                mycmakeargs+=(
                        -DGGML_HIP=ON -DAMDGPU_TARGETS=$(get_amdgpu_flags)
+                       -DGGML_HIP_ROCWMMA_FATTN=$(usex wmma)
                )
        fi
 

diff --git a/sci-misc/llama-cpp/metadata.xml b/sci-misc/llama-cpp/metadata.xml
index 3c0d0befbb..8f1876715c 100644
--- a/sci-misc/llama-cpp/metadata.xml
+++ b/sci-misc/llama-cpp/metadata.xml
@@ -9,6 +9,7 @@
                <flag name="flexiblas">Build a FlexiBLAS backend</flag>
                <flag name="rocm">Build a HIP (ROCm) backend</flag>
                <flag name="hip">Build a HIP (ROCm) backend</flag>
+               <flag name="wmma">Use rocWMMA to enhance flash attention 
performance</flag>
                <flag name="openblas">Build an OpenBLAS backend</flag>
                <flag name="opencl">Build an OpenCL backend, so far only works 
on Adreno and Intel GPUs</flag>
        </use>

Reply via email to