Copilot commented on code in PR #3255:
URL: https://github.com/apache/brpc/pull/3255#discussion_r3009930078


##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -64,6 +66,7 @@ DEFINE_int32(rdma_poller_num, 1, "Poller number in RDMA 
polling mode.");
 DEFINE_bool(rdma_poller_yield, false, "Yield thread in RDMA polling mode.");
 DEFINE_bool(rdma_edisp_unsched, false, "Disable event dispatcher schedule");
 DEFINE_bool(rdma_disable_bthread, false, "Disable bthread in RDMA");
+DEFINE_bool(rdma_ece, false, "Open ece in RDMA, should use this feature when 
rdma nics are from the same merchant.");

Review Comment:
   Flag help string: "Open ece in RDMA, should use this feature when rdma nics 
are from the same merchant." is grammatically unclear and uses "merchant" 
(likely meant "vendor"). Consider clarifying what ECE does, any requirements 
(both ends support ECE / minimum rdma-core), and when users should enable it.
   ```suggestion
   DEFINE_bool(rdma_ece, false,
               "Enable ECE (Enhanced Connection Establishment) for RDMA. "
               "Use this only when both endpoints and their RDMA NICs support 
ECE, "
               "typically with NICs from the same vendor.");
   ```



##########
src/brpc/rdma/rdma_helper.cpp:
##########
@@ -386,6 +388,8 @@ static int ReadRdmaDynamicLib() {
     LoadSymbol(g_handle_ibverbs, IbvGetAsyncEvent, "ibv_get_async_event");
     LoadSymbol(g_handle_ibverbs, IbvAckAsyncEvent, "ibv_ack_async_event");
     LoadSymbol(g_handle_ibverbs, IbvEventTypeStr, "ibv_event_type_str");
+    LoadSymbol(g_handle_ibverbs, IbvQueryEce, "ibv_query_ece");
+    LoadSymbol(g_handle_ibverbs, IbvSetEce, "ibv_set_ece");
 

Review Comment:
   `ReadRdmaDynamicLib()` now unconditionally requires 
`ibv_query_ece`/`ibv_set_ece` to be present. With the current `LoadSymbol` 
macro this will make RDMA initialization fail on libibverbs versions that don't 
export these symbols, even when `--rdma_ece` is false. Consider loading these 
symbols optionally (don’t fail if missing) and only enforcing their presence 
when ECE is enabled, with a clear warning/error in that case.
   ```suggestion
   
       // ECE-related symbols are optional: they may not be present on older
       // libibverbs versions. Do not fail RDMA initialization if they are
       // missing; instead, leave the pointers as nullptr and let the ECE
       // feature gate enforce their presence when needed.
       dlerror();  // Clear any existing error before dlsym
       {
           void* sym = dlsym(g_handle_ibverbs, "ibv_query_ece");
           const char* err = dlerror();
           if (err != NULL) {
               LOG(WARNING) << "ibv_query_ece not found in RDMA library: " << 
err
                            << ". ECE functionality will be disabled.";
           } else {
               IbvQueryEce = reinterpret_cast<decltype(IbvQueryEce)>(sym);
           }
       }
   
       dlerror();  // Clear any existing error before dlsym
       {
           void* sym = dlsym(g_handle_ibverbs, "ibv_set_ece");
           const char* err = dlerror();
           if (err != NULL) {
               LOG(WARNING) << "ibv_set_ece not found in RDMA library: " << err
                            << ". ECE functionality will be disabled.";
           } else {
               IbvSetEce = reinterpret_cast<decltype(IbvSetEce)>(sym);
           }
       }
   ```



##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -1203,6 +1206,21 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, 
uint32_t qp_num) {
         return -1;
     }
 
+    if (FLAGS_rdma_ece) {
+        struct ibv_ece ece;
+        int err = IbvQueryEce(_resource->qp, &ece);

Review Comment:
   `struct ibv_ece ece;` is left uninitialized before being passed to 
`IbvQueryEce`. Even though query should fill it on success, it's safer to 
zero-initialize (at least `comp_mask = 0`) to avoid propagating garbage if the 
provider validates input fields or if query fails unexpectedly. Also avoid 
shadowing the outer `err` here (currently `int err = ...`), which makes the 
error-handling flow harder to follow.
   ```suggestion
           struct ibv_ece ece = {};
           err = IbvQueryEce(_resource->qp, &ece);
   ```



##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -1203,6 +1206,21 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, 
uint32_t qp_num) {
         return -1;
     }
 
+    if (FLAGS_rdma_ece) {
+        struct ibv_ece ece;
+        int err = IbvQueryEce(_resource->qp, &ece);
+        if (err != 0) {
+            LOG(WARNING) << "Fail to IbvQueryEce: " << berror(err);
+            return -1;
+        }
+        // ToDo: should check if remote qp support ece
+        err = IbvSetEce(_resource->qp, &ece);
+        if (err != 0) {
+            LOG(WARNING) << "Fail to IbvSetEce: " << berror(err);
+            return -1;
+        }

Review Comment:
   New behavior (enabling ECE during QP bring-up) is not covered by existing 
RDMA unit tests. Since `test/brpc_rdma_unittest.cpp` already exercises RDMA 
endpoint state machines (with and without an RDMA runtime), consider adding a 
test that stubs `IbvQueryEce`/`IbvSetEce` and verifies they are invoked (and 
failures are handled) when `FLAGS_rdma_ece` is true.



##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -1203,6 +1206,21 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, 
uint32_t qp_num) {
         return -1;
     }
 
+    if (FLAGS_rdma_ece) {
+        struct ibv_ece ece;
+        int err = IbvQueryEce(_resource->qp, &ece);
+        if (err != 0) {

Review Comment:
   When `--rdma_ece` is enabled, `IbvQueryEce`/`IbvSetEce` may be null if the 
symbols are made optional for compatibility (or if loading failed). Add an 
explicit pointer check here and return a clear error indicating that the 
installed libibverbs doesn't support ECE.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to