commit:     8f2f5e1cf90b1b0d0d418355e16bf7e967df3482
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Mon Dec  9 21:10:54 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Jan  5 21:46:41 2025 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=8f2f5e1c

dev-libs/rccl: add 6.3.0

Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 dev-libs/rccl/Manifest                             |   1 +
 dev-libs/rccl/files/rccl-6.3.0-headers-fix.patch   |  12 +
 .../rccl/files/rccl-6.3.0-same-rank-sendrecv.patch | 250 +++++++++++++++++++++
 dev-libs/rccl/rccl-6.3.0.ebuild                    |  75 +++++++
 4 files changed, 338 insertions(+)

diff --git a/dev-libs/rccl/Manifest b/dev-libs/rccl/Manifest
index 8ce9b421344b..a40a8eb250b1 100644
--- a/dev-libs/rccl/Manifest
+++ b/dev-libs/rccl/Manifest
@@ -1,2 +1,3 @@
 DIST rccl-5.7.1.tar.gz 1425561 BLAKE2B 
852c111ad806d5c99f48b3c65c8cf37315c68b969f9544bfa14c1faf1d5557edcc57cdc21705ced6ded4a0288d42b1076e65fb67b3f89b4fa78cfba9d317b23e
 SHA512 
5913b8ff67fa787714713b7d5b571374898be740d56c77db9f04fe7a3e6ca74023fa930a3494d8a6f984ac9e68ee318343835e110049d08700fe773376618af4
 DIST rccl-6.1.1.tar.gz 1679144 BLAKE2B 
371d64691dc74f875c49e14df8f3f2d8b9c607376e6c5a889bd2bdb50607e88715d6d75ffed4ba3184a5b9b241cb37b8501e927a5f495632212909e410102490
 SHA512 
6c6376dd822182bcf28f573c0f3b5c7e52f94f4b670ee7c88519232f51b443d52cd37cbe6c41b5b6e9cb0b93c1124246a989f6e6a2ae74935134135585118002
+DIST rccl-6.3.0.tar.gz 1828647 BLAKE2B 
8c312fc51e7d600bb62fa059e1af53e153955b79b2ba2e8a6b6b52228b9217b7df6dc815c3a48c0800aaa9387f645070e079d04e99c0e8ebdfe41d5ebe0bda06
 SHA512 
a068b4a21786176638d108c8c85d5e5a8b0413335b555c2602f2a2e0b9f291f6872dbf68fbb5a17a6a0af9d9b5a90b1b37cce63b655a867b68fc9e20d49931ea

diff --git a/dev-libs/rccl/files/rccl-6.3.0-headers-fix.patch 
b/dev-libs/rccl/files/rccl-6.3.0-headers-fix.patch
new file mode 100644
index 000000000000..297627819f2c
--- /dev/null
+++ b/dev-libs/rccl/files/rccl-6.3.0-headers-fix.patch
@@ -0,0 +1,12 @@
+gtest 1.14 included iomanip, gtest 1.15 does not anymore.
+Upstream bug: https://github.com/ROCm/rccl/issues/1455
+--- a/test/common/TestBed.cpp
++++ b/test/common/TestBed.cpp
+@@ -4,6 +4,7 @@
+  * See LICENSE.txt for license information
+  ************************************************************************/
+ #include <unistd.h>
++#include <iomanip>
+ #include "TestBed.hpp"
+ #include <rccl/rccl.h>
+ 

diff --git a/dev-libs/rccl/files/rccl-6.3.0-same-rank-sendrecv.patch 
b/dev-libs/rccl/files/rccl-6.3.0-same-rank-sendrecv.patch
new file mode 100644
index 000000000000..435d6ac57b0f
--- /dev/null
+++ b/dev-libs/rccl/files/rccl-6.3.0-same-rank-sendrecv.patch
@@ -0,0 +1,250 @@
+Enable UT sendrecv to same rank. Fixes test failure.
+Backports commit: 
https://github.com/ROCm/rccl/commit/fd9924cfe7afbb94b1f157972ba001865481480a
+--- a/test/SendRecvTests.cpp
++++ b/test/SendRecvTests.cpp
+@@ -16,7 +16,6 @@ namespace RcclUnitTesting
+     std::vector<int>            const  numElements     = {1048576, 53327, 
1024, 0};
+     bool                        const  inPlace         = false;
+     bool                        const  useManagedMem   = false;
+-    int                         const  groupCallId     = 0;
+ 
+     OptionalColArgs options;
+     bool isCorrect = true;
+@@ -28,7 +27,10 @@ namespace RcclUnitTesting
+       int ranksPerGpu = rpg == 0 ? 1 : testBed.ev.maxRanksPerGpu;
+       int totalRanks = numGpus * ranksPerGpu;
+       int const numProcesses = isMultiProcess ? numGpus : 1;
+-      testBed.InitComms(TestBed::GetDeviceIdsList(numProcesses, numGpus, 
ranksPerGpu), 1);
++      testBed.InitComms(TestBed::GetDeviceIdsList(numProcesses, numGpus, 
ranksPerGpu),
++                        {1,2}, //two group, second group sendrecv to self, 
has 2 coll
++                        testBed.GetNumStreamsPerGroup(1,2),
++                        2);
+ 
+       for (int dataIdx = 0; dataIdx < dataTypes.size() && isCorrect; 
++dataIdx)
+       for (int numIdx = 0; numIdx < numElements.size() && isCorrect; ++numIdx)
+@@ -37,6 +39,8 @@ namespace RcclUnitTesting
+         for (int recvRank = 0; recvRank  < totalRanks; ++recvRank)
+         {
+           options.root = recvRank;
++          int groupCallId = sendRank == recvRank; //self sendrecv group has 
two coll
++          int recvId      = sendRank == recvRank; //where recv will be second 
coll
+           testBed.SetCollectiveArgs(ncclCollSend,
+                                     dataTypes[dataIdx],
+                                     numElements[numIdx],
+@@ -47,36 +51,46 @@ namespace RcclUnitTesting
+                                     sendRank);
+           if (recvRank == 0)
+           {
+-            testBed.AllocateMem(inPlace, useManagedMem, groupCallId, 0, 
sendRank);
+-            testBed.PrepareData(groupCallId, 0, sendRank);
+-          }
+-          if (recvRank  != sendRank)
+-          {
+-            if (testBed.ev.showNames) // Show test names
+-              INFO("%s Datatype: %s SendReceive test Rank %d -> Rank %d for 
%d Elements\n",
+-                  isMultiProcess ? "MP" : "SP",
+-                  ncclDataTypeNames[dataTypes[dataIdx]],
+-                  sendRank,
+-                  recvRank,
+-                  numElements[numIdx]);
+-
+-            options.root = sendRank;
+-            testBed.SetCollectiveArgs(ncclCollRecv,
++            //set up the collArg slot to make sure AllocateMem is called once 
and correctly
++            testBed.SetCollectiveArgs(ncclCollSend,
+                                       dataTypes[dataIdx],
+                                       numElements[numIdx],
+                                       numElements[numIdx],
+                                       options,
+                                       0,
+-                                      groupCallId,
+-                                      recvRank);
+-            testBed.AllocateMem(inPlace, useManagedMem, groupCallId, 0, 
recvRank);
+-            testBed.PrepareData(groupCallId, 0, recvRank);
+-            testBed.ExecuteCollectives({sendRank, recvRank});
+-            testBed.ValidateResults(isCorrect, groupCallId, 0, recvRank);
+-            testBed.DeallocateMem(groupCallId, 0, recvRank);
++                                      !groupCallId,
++                                      sendRank);
++            testBed.AllocateMem(inPlace, useManagedMem, 0, 0, sendRank);
++            testBed.PrepareData(0, 0, sendRank);
++            testBed.AllocateMem(inPlace, useManagedMem, 1, 0, sendRank);
++            testBed.PrepareData(1, 0, sendRank);
+           }
++
++          if (testBed.ev.showNames) // Show test names
++            INFO("%s Datatype: %s SendReceive test Rank %d -> Rank %d for %d 
Elements\n",
++                 isMultiProcess ? "MP" : "SP",
++                 ncclDataTypeNames[dataTypes[dataIdx]],
++                 sendRank,
++                 recvRank,
++                 numElements[numIdx]);
++          options.root = sendRank;
++
++          testBed.SetCollectiveArgs(ncclCollRecv,
++                                    dataTypes[dataIdx],
++                                    numElements[numIdx],
++                                    numElements[numIdx],
++                                    options,
++                                    recvId,
++                                    groupCallId,
++                                    recvRank);
++          testBed.AllocateMem(inPlace, useManagedMem, groupCallId, recvId, 
recvRank);
++          testBed.PrepareData(groupCallId, recvId, recvRank);
++          testBed.ExecuteCollectives({sendRank, recvRank}, groupCallId);
++          testBed.ValidateResults(isCorrect, groupCallId, recvId, recvRank);
++          testBed.DeallocateMem(groupCallId, recvId, recvRank);
+         }
+-        testBed.DeallocateMem(groupCallId, 0, sendRank);
++        testBed.DeallocateMem(0, 0, sendRank);
++        testBed.DeallocateMem(1, 0, sendRank);
+       }
+       testBed.DestroyComms();
+     }
+@@ -94,7 +108,6 @@ namespace RcclUnitTesting
+     bool                        const  inPlace         = false;
+     bool                        const  useManagedMem   = false;
+     bool                        const  userRegistered  = true;
+-    int                         const  groupCallId     = 0;
+ 
+     OptionalColArgs options;
+     bool isCorrect = true;
+@@ -106,7 +119,10 @@ namespace RcclUnitTesting
+       int ranksPerGpu = rpg == 0 ? 1 : testBed.ev.maxRanksPerGpu;
+       int totalRanks = numGpus * ranksPerGpu;
+       int const numProcesses = isMultiProcess ? numGpus : 1;
+-      testBed.InitComms(TestBed::GetDeviceIdsList(numProcesses, numGpus, 
ranksPerGpu), 1);
++      testBed.InitComms(TestBed::GetDeviceIdsList(numProcesses, numGpus, 
ranksPerGpu),
++                        {1,2}, //two group, second group sendrecv to self, 
has 2 coll
++                        testBed.GetNumStreamsPerGroup(1,2),
++                        2);
+ 
+       for (int dataIdx = 0; dataIdx < dataTypes.size() && isCorrect; 
++dataIdx)
+       for (int numIdx = 0; numIdx < numElements.size() && isCorrect; ++numIdx)
+@@ -115,6 +131,8 @@ namespace RcclUnitTesting
+         for (int recvRank = 0; recvRank  < totalRanks; ++recvRank)
+         {
+           options.root = recvRank;
++          int groupCallId = sendRank == recvRank;
++          int recvId      = sendRank == recvRank;
+           testBed.SetCollectiveArgs(ncclCollSend,
+                                     dataTypes[dataIdx],
+                                     numElements[numIdx],
+@@ -125,36 +143,45 @@ namespace RcclUnitTesting
+                                     sendRank);
+           if (recvRank == 0)
+           {
+-            testBed.AllocateMem(inPlace, useManagedMem, groupCallId, 0, 
sendRank, userRegistered);
+-            testBed.PrepareData(groupCallId, 0, sendRank);
+-          }
+-          if (recvRank  != sendRank)
+-          {
+-            if (testBed.ev.showNames) // Show test names
+-              INFO("%s Datatype: %s SendReceive test Rank %d -> Rank %d for 
%d Elements\n",
+-                  isMultiProcess ? "MP" : "SP",
+-                  ncclDataTypeNames[dataTypes[dataIdx]],
+-                  sendRank,
+-                  recvRank,
+-                  numElements[numIdx]);
+-
+-            options.root = sendRank;
+-            testBed.SetCollectiveArgs(ncclCollRecv,
++            testBed.SetCollectiveArgs(ncclCollSend,
+                                       dataTypes[dataIdx],
+                                       numElements[numIdx],
+                                       numElements[numIdx],
+                                       options,
+                                       0,
+-                                      groupCallId,
+-                                      recvRank);
+-            testBed.AllocateMem(inPlace, useManagedMem, groupCallId, 0, 
recvRank, userRegistered);
+-            testBed.PrepareData(groupCallId, 0, recvRank);
+-            testBed.ExecuteCollectives({sendRank, recvRank});
+-            testBed.ValidateResults(isCorrect, groupCallId, 0, recvRank);
+-            testBed.DeallocateMem(groupCallId, 0, recvRank);
++                                      !groupCallId,
++                                      sendRank);
++            testBed.AllocateMem(inPlace, useManagedMem, 0, 0, sendRank, 
userRegistered);
++            testBed.PrepareData(0, 0, sendRank);
++            testBed.AllocateMem(inPlace, useManagedMem, 1, 0, sendRank, 
userRegistered);
++            testBed.PrepareData(1, 0, sendRank);
+           }
++
++          if (testBed.ev.showNames) // Show test names
++            INFO("%s Datatype: %s SendReceive test Rank %d -> Rank %d for %d 
Elements\n",
++                 isMultiProcess ? "MP" : "SP",
++                 ncclDataTypeNames[dataTypes[dataIdx]],
++                 sendRank,
++                 recvRank,
++                 numElements[numIdx]);
++
++          options.root = sendRank;
++          testBed.SetCollectiveArgs(ncclCollRecv,
++                                    dataTypes[dataIdx],
++                                    numElements[numIdx],
++                                    numElements[numIdx],
++                                    options,
++                                    recvId,
++                                    groupCallId,
++                                    recvRank);
++          testBed.AllocateMem(inPlace, useManagedMem, groupCallId, recvId, 
recvRank, userRegistered);
++          testBed.PrepareData(groupCallId, recvId, recvRank);
++          testBed.ExecuteCollectives({sendRank, recvRank}, groupCallId);
++          testBed.ValidateResults(isCorrect, groupCallId, recvId, recvRank);
++          testBed.DeallocateMem(groupCallId, recvId, recvRank);
+         }
+-        testBed.DeallocateMem(groupCallId, 0, sendRank);
++        testBed.DeallocateMem(0, 0, sendRank);
++        testBed.DeallocateMem(1, 0, sendRank);
+       }
+       testBed.DestroyComms();
+     }
+--- a/test/common/TestBedChild.cpp
++++ b/test/common/TestBedChild.cpp
+@@ -395,6 +395,8 @@ namespace RcclUnitTesting
+       {
+         CollectiveArgs& collArg = this->collArgs[groupId][localRank][collIdx];
+         CHECK_CALL(collArg.AllocateMem(inPlace, useManagedMem, 
userRegistered));
++        if (collArg.userRegistered && (collArg.funcType == ncclCollSend || 
collArg.funcType == ncclCollRecv))
++          CHILD_NCCL_CALL(ncclCommRegister(this->comms[localRank], 
collArg.inputGpu.ptr, collArg.numInputBytesAllocated, 
&(collArg.commRegHandle)),"ncclCommRegister");
+         if (this->verbose) INFO("Rank %d on child %d allocates memory for 
collective %d in group %d on device %d (%s,%s,%s) Input: %p Output %p\n",
+                                 globalRank, this->childId, collIdx, groupId, 
this->deviceIds[localRank],
+                                 inPlace ? "in-place" : "out-of-place",
+@@ -646,8 +648,6 @@ namespace RcclUnitTesting
+                           "ncclAllToAllv");
+           break;
+         case ncclCollSend:
+-          if (collArg.userRegistered)
+-            CHILD_NCCL_CALL_RANK(errCode, 
ncclCommRegister(this->comms[localRank], collArg.inputGpu.ptr, 
collArg.numInputBytesAllocated, &(collArg.commRegHandle)),"ncclCommRegister");
+           CHILD_NCCL_CALL_RANK(errCode, ncclSend(
+                                    collArg.inputGpu.ptr,
+                                    collArg.numInputElements,
+@@ -658,8 +658,6 @@ namespace RcclUnitTesting
+                           "ncclSend");
+           break;
+         case ncclCollRecv:
+-          if (collArg.userRegistered)
+-            CHILD_NCCL_CALL_RANK(errCode, 
ncclCommRegister(this->comms[localRank], collArg.outputGpu.ptr, 
collArg.numOutputBytesAllocated, &(collArg.commRegHandle)), "ncclCommRegister");
+           CHILD_NCCL_CALL_RANK(errCode, ncclRecv(
+                                    collArg.outputGpu.ptr,
+                                    collArg.numOutputElements,
+@@ -891,8 +889,6 @@ namespace RcclUnitTesting
+     for (int collIdx = 0; collIdx < collArgs[groupId][localRank].size(); 
++collIdx)
+     {
+       CollectiveArgs& collArg = this->collArgs[groupId][localRank][collIdx];
+-      if (collArg.userRegistered && (collArg.funcType == ncclCollSend || 
collArg.funcType == ncclCollRecv))
+-        CHILD_NCCL_CALL(ncclCommDeregister(this->comms[localRank], 
collArg.commRegHandle), "ncclCommDeregister");
+       if (collId == -1 || collId == collIdx)
+       {
+         if (this->verbose)
+@@ -900,6 +896,10 @@ namespace RcclUnitTesting
+           INFO("Child %d release memory for collective %d in group %d (Input: 
%p Output %p\n",
+                this->childId, collIdx, groupId, collArg.inputGpu.ptr, 
collArg.outputGpu.ptr);
+         }
++        if (collArg.userRegistered && (collArg.funcType == ncclCollSend || 
collArg.funcType == ncclCollRecv))
++        {
++          CHILD_NCCL_CALL(ncclCommDeregister(this->comms[localRank], 
collArg.commRegHandle), "ncclCommDeregister");
++        }
+ 
+         CHECK_CALL(collArg.DeallocateMem());
+       }

diff --git a/dev-libs/rccl/rccl-6.3.0.ebuild b/dev-libs/rccl/rccl-6.3.0.ebuild
new file mode 100644
index 000000000000..d610f7eb139c
--- /dev/null
+++ b/dev-libs/rccl/rccl-6.3.0.ebuild
@@ -0,0 +1,75 @@
+# Copyright 1999-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+ROCM_VERSION=${PV}
+
+inherit cmake edo rocm flag-o-matic
+
+DESCRIPTION="ROCm Communication Collectives Library (RCCL)"
+HOMEPAGE="https://github.com/ROCm/rccl";
+SRC_URI="https://github.com/ROCm/rccl/archive/rocm-${PV}.tar.gz -> 
rccl-${PV}.tar.gz"
+S="${WORKDIR}/rccl-rocm-${PV}"
+
+LICENSE="BSD"
+SLOT="0/$(ver_cut 1-2)"
+KEYWORDS="~amd64"
+IUSE="test"
+
+RDEPEND="
+       dev-util/hip:${SLOT}
+       dev-util/rocm-smi:${SLOT}"
+DEPEND="${RDEPEND}
+       sys-libs/binutils-libs"
+BDEPEND="
+       >=dev-build/cmake-3.22
+       >=dev-build/rocm-cmake-5.7.1
+       dev-util/hipify-clang:${SLOT}
+       test? ( dev-cpp/gtest )"
+
+RESTRICT="!test? ( test )"
+
+PATCHES=(
+       "${FILESDIR}/${PN}-6.0.2-fix-version-check.patch"
+       "${FILESDIR}/${PN}-6.3.0-same-rank-sendrecv.patch"
+       "${FILESDIR}/${PN}-6.3.0-headers-fix.patch"
+)
+
+src_prepare() {
+       cmake_src_prepare
+
+       # https://reviews.llvm.org/D69582 - clang does not support parallel jobs
+       sed '/parallel-jobs/d' -i CMakeLists.txt || die
+
+       # complete fix-version-check patch
+       sed "s/@rocm_version@/${PV}/" -i CMakeLists.txt || die
+
+       # don't install tests
+       sed "/rocm_install(TARGETS rccl-UnitTests/d" -i test/CMakeLists.txt || 
die
+}
+
+src_configure() {
+       rocm_use_hipcc
+
+       # lto flags make compilation fail with "undefined hidden symbol"
+       filter-lto
+
+       local mycmakeargs=(
+               -DCMAKE_SKIP_RPATH=ON
+               -DAMDGPU_TARGETS="$(get_amdgpu_flags)"
+               -DBUILD_TESTS=$(usex test ON OFF)
+               -DROCM_SYMLINK_LIBS=OFF
+               -DROCM_PATH="${EPREFIX}/usr"
+               -DRCCL_ROCPROFILER_REGISTER=OFF
+               -Wno-dev
+       )
+
+       cmake_src_configure
+}
+
+src_test() {
+       check_amdgpu
+       cd "${BUILD_DIR}" || die
+       LD_LIBRARY_PATH="${BUILD_DIR}" edob test/rccl-UnitTests
+}

Reply via email to