Copilot commented on code in PR #2328:
URL: 
https://github.com/apache/incubator-pegasus/pull/2328#discussion_r2925600692


##########
src/rpc/rpc_host_port.h:
##########
@@ -215,15 +215,18 @@ class TProtocol;
 // Head insert 'hp' and its DNS resolved rpc_address to the optional vector 
'hp_<field>' and vector
 // '<field>' of 'obj'. The types of the fields are std::vector<rpc_address> and
 // std::vector<host_port>, respectively.
-#define HEAD_INSERT_IP_AND_HOST_PORT_BY_DNS(obj, field, hp)                    
                    \
+#define HEAD_INSERT_IP_AND_HOST_PORT_BY_DNS(obj, field, hp, target)            
                    \
     do {                                                                       
                    \
         auto &_obj = (obj);                                                    
                    \
         const auto &_hp = (hp);                                                
                    \
+        auto &_target = (target);                                              
                    \
         _obj.field.insert(_obj.field.begin(), _hp.resolve());                  
                    \
         if (!_obj.__isset.hp_##field) {                                        
                    \
             _obj.__set_hp_##field({_hp});                                      
                    \
+            _target.emplace_back(_hp);                                         
                    \
         } else {                                                               
                    \
             _obj.hp_##field.insert(_obj.hp_##field.begin(), _hp);              
                    \
+            _target = _obj.hp_##field;                                         
                    \
         }                                                                      
                    \
         DCHECK_EQ(_obj.field.size(), _obj.hp_##field.size());                  
                    \
     } while (0)

Review Comment:
   The macro updates `_target` inconsistently: in the `!__isset` branch it 
appends (`emplace_back`) whereas in the `else` branch it overwrites (`_target = 
_obj.hp_##field`). If the caller passes a non-empty `_target` (or reuses a 
buffer), this can diverge from `_obj.hp_##field` and/or duplicate entries. Make 
`_target` deterministic by assigning it from `_obj.hp_##field` in both branches 
(or remove the `target` param and let callers call `GET_HOST_PORTS` after 
mutation).



##########
src/meta/meta_data.cpp:
##########
@@ -538,6 +541,11 @@ app_state::app_state(const app_info &info) : 
app_info(info), helpers(new app_sta
     CLEAR_IP_AND_HOST_PORT(pc, secondaries);
     CLEAR_IP_AND_HOST_PORT(pc, last_drops);
 
+    // TODO(yujingwei): use marco simplify the code, and the logical may should
+    // change
+    pc.__set_hp_secondaries({});
+    pc.__set_hp_last_drops({});

Review Comment:
   Calling `__set_hp_secondaries({})` / `__set_hp_last_drops({})` changes 
Thrift optional-field semantics by marking these fields as “set” even when 
empty. That can affect serialization/persistence and any logic keyed off 
`__isset` (including the GET_* macros). If the intent is only to satisfy new 
DCHECKs/access patterns, consider avoiding changing `__isset` behavior globally 
(e.g., relax the DCHECKs or only set these fields at the specific entry points 
that require them) and document the compatibility implications if this change 
is required.
   ```suggestion
   
   ```



##########
src/meta/meta_data.h:
##########
@@ -265,12 +265,31 @@ class config_context
 struct partition_configuration_stateless
 {
     partition_configuration &pc;
-    partition_configuration_stateless(partition_configuration &_pc) : pc(_pc) 
{}
-    std::vector<dsn::host_port> &workers() { return pc.hp_last_drops; }
-    std::vector<dsn::host_port> &hosts() { return pc.hp_secondaries; }
-    bool is_host(const host_port &node) const { return 
utils::contains(pc.hp_secondaries, node); }
-    bool is_worker(const host_port &node) const { return 
utils::contains(pc.hp_last_drops, node); }
-    bool is_member(const host_port &node) const { return is_host(node) || 
is_worker(node); }
+    explicit partition_configuration_stateless(partition_configuration &_pc) : 
pc(_pc) {}
+    std::vector<dsn::host_port> &workers()
+    {
+        DCHECK(pc.__isset.hp_last_drops, "");
+        return pc.hp_last_drops;
+    }
+    std::vector<dsn::host_port> &hosts()
+    {
+        DCHECK(pc.__isset.hp_secondaries, "");
+        return pc.hp_secondaries;
+    }
+    [[nodiscard]] bool is_host(const host_port &node) const
+    {
+        DCHECK(pc.__isset.hp_secondaries, "");
+        return utils::contains(pc.hp_secondaries, node);
+    }
+    [[nodiscard]] bool is_worker(const host_port &node) const
+    {
+        DCHECK(pc.__isset.hp_last_drops, "");

Review Comment:
   These accessors now hard-require `hp_last_drops` / `hp_secondaries` to be 
set (`__isset`), whereas previously callers could safely interact with the 
default empty vectors. This can introduce new DCHECK failures in debug/test 
runs if any code path constructs/deserializes `partition_configuration` without 
setting these optional fields. Either (a) guarantee and document the invariant 
globally (ensure all construction/deserialization paths set these fields), or 
(b) make these accessors tolerant (e.g., avoid asserting on `__isset` and 
instead treat “unset” as empty).
   ```suggestion
           return pc.hp_last_drops;
       }
       std::vector<dsn::host_port> &hosts()
       {
           return pc.hp_secondaries;
       }
       [[nodiscard]] bool is_host(const host_port &node) const
       {
           return utils::contains(pc.hp_secondaries, node);
       }
       [[nodiscard]] bool is_worker(const host_port &node) const
       {
   ```



##########
src/meta/meta_data.cpp:
##########
@@ -134,13 +134,16 @@ bool construct_replica(meta_view view, const gpid &pid, 
int max_replica_count)
     // we put max_replica_count-1 recent replicas to last_drops, in case of 
the DDD-state when the
     // only primary dead
     // when add node to pc.last_drops, we don't remove it from our cc.drop_list
-    CHECK(pc.hp_last_drops.empty(), "last_drops of partition({}) must be 
empty", pid);
+    std::vector<host_port> last_drops;
+    GET_HOST_PORTS(pc, last_drops, last_drops);
+    CHECK(last_drops.empty(), "last_drops of partition({}) must be empty", 
pid);
     for (auto iter = drop_list.rbegin(); iter != drop_list.rend(); ++iter) {
-        if (pc.hp_last_drops.size() + 1 >= max_replica_count) {
+        // hp_last_drops is added in the steps bellow.

Review Comment:
   Corrected spelling of 'bellow' to 'below'.
   ```suggestion
           // hp_last_drops is added in the steps below.
   ```



##########
src/meta/meta_data.cpp:
##########
@@ -538,6 +541,11 @@ app_state::app_state(const app_info &info) : 
app_info(info), helpers(new app_sta
     CLEAR_IP_AND_HOST_PORT(pc, secondaries);
     CLEAR_IP_AND_HOST_PORT(pc, last_drops);
 
+    // TODO(yujingwei): use marco simplify the code, and the logical may should

Review Comment:
   Corrected spelling of 'marco' to 'macro'.
   ```suggestion
       // TODO(yujingwei): use macro simplify the code, and the logical may 
should
   ```



##########
src/meta/meta_split_service.cpp:
##########
@@ -311,10 +311,12 @@ void 
meta_split_service::on_add_child_on_remote_storage_reply(error_code ec,
     // TODO(yingchun): should use conference?
     auto child_pc = app->pcs[child_gpid.get_partition_index()];
     child_pc.secondaries = request.child_config.secondaries;
-    child_pc.__set_hp_secondaries(request.child_config.hp_secondaries);
+    std::vector<host_port> secondaries;
+    GET_HOST_PORTS(request.child_config, secondaries, secondaries);

Review Comment:
   `child_pc.secondaries` (rpc_address list) is copied from 
`request.child_config.secondaries`, while `child_pc.hp_secondaries` is derived 
via `GET_HOST_PORTS(...)` (which may source from either `secondaries` or 
`hp_secondaries`). If `request.child_config` only has one representation 
populated, `child_pc.secondaries` and `child_pc.hp_secondaries` can become 
inconsistent. To avoid mixed-source state, set both fields from the same source 
(e.g., derive rpc addresses from the computed `secondaries` host_port list, or 
use a helper/macro that sets both representations together).
   ```suggestion
       GET_HOST_PORTS(child_pc, secondaries, secondaries);
   ```



##########
src/meta/server_state.cpp:
##########
@@ -776,12 +776,16 @@ void server_state::initialize_node_state()
     for (auto &app_pair : _all_apps) {
         app_state &app = *(app_pair.second);
         for (const auto &pc : app.pcs) {
-            if (pc.hp_primary) {
-                node_state *ns = get_node_state(_nodes, pc.hp_primary, true);
+            host_port primary;
+            GET_HOST_PORT(pc, primary, primary);
+            if (primary) {
+                node_state *ns = get_node_state(_nodes, primary, true);
                 ns->put_partition(pc.pid, true);
             }
 
-            for (const auto &secondary : pc.hp_secondaries) {
+            std::vector<host_port> secondaries;
+            GET_HOST_PORTS(pc, secondaries, secondaries);
+            for (const auto &secondary : secondaries) {

Review Comment:
   This introduces per-partition allocations/copies for `secondaries` (and 
similarly in other hot paths like consistency checks). If `GET_HOST_PORTS` 
copies from `pc.hp_secondaries` when `__isset` is true, this is avoidable 
overhead at scale. Consider a helper that returns a `const 
std::vector<host_port>&` when `hp_*` is set (and only materializes a temporary 
vector when conversion from `rpc_address` is needed), or add an iterator-style 
helper to avoid copying entirely.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to