On 3/17/26 9:28 AM, Felix Huettner via dev wrote:
> Previously the transaction history was hard limited to 100 entries.
> However in fast changing environments (e.g. with 20 transactions/sec)
> this means that transactions are only in the history for a few seconds.
> If now a client reconnects and tries to use monitor_cond_since it has
> only a short timeframe where this will reliably work.
> 
> We make the history limit configurable here so that use can choose the

*user

> speed vs memory tradeoff as needed.
> We still keep the limit based on size of the history, as syncing a
> history larger than the actual database size does not make sense in any
> way.
> 
> Signed-off-by: Felix Huettner <[email protected]>
> ---

Hi, Felix.  Thanks or the patch!  And sorry for delay (lots of random
security stuff lately...).

I was thinking about this problem for along time but didn't make a patch
so far, but the problem definitely needs some solution.

One other use case here beside the basic transaction rate is when you
run a sync command and every ovn-controller sends a tiny transaction
to update the sequence number.  In a large cluster the entire history
will be overwritten multiple times within a few seconds.

However, what I was thinking is maybe we should just drop the static
history limit and always rely on the dynamic one.  Have you tried this
approach?  Or do you see the configured history limit being hit after
you increase it?  If so, is the memory consumption significantly
different than just allowing it to grow up to the database size?

Just trying to see if we can drop the limit and avoid extra config knobs.
I saw people hacking the code to set the limit to 5000 in practice, but
I'm not sure if this value is even reachable in the normal operation, or
is it always capped by the database size in the end anyway.

See some small comments for the current patch below.

> v1->v2: handle setting if the config does not specify the service-model.
> 
>  ovsdb/ovsdb-server.c  | 29 +++++++++++++++++++++++++++--
>  ovsdb/ovsdb.h         |  1 +
>  ovsdb/relay.c         |  4 ++--
>  ovsdb/transaction.c   | 33 +++++++++++++++++++++++++++++++--
>  ovsdb/transaction.h   |  5 ++++-
>  tests/ovsdb-server.at | 22 +++++++++++++++++++++-
>  6 files changed, 86 insertions(+), 8 deletions(-)
> 
> diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
> index 7c3a5ef11..7d8562cb2 100644
> --- a/ovsdb/ovsdb-server.c
> +++ b/ovsdb/ovsdb-server.c
> @@ -156,6 +156,11 @@ struct db_config {
>          bool backup;  /* If true, the database is read-only and receives
>                         * updates from the 'source'. */
>      } ab;
> +
> +    /* Valid for SM_CLUSTERED or SM_RELAY. */
> +    unsigned int n_txn_history_max; /* The maximum amount of history entries
> +                                     * to keep. 0 means default. */
> +

nit: The empty line here is not needed.

>  };
>  
>  struct db {
> @@ -462,6 +467,7 @@ db_config_clone(const struct db_config *c)
>          conf->options = ovsdb_jsonrpc_options_clone(c->options);
>      }
>      conf->ab.sync_exclude = nullable_xstrdup(c->ab.sync_exclude);
> +    conf->n_txn_history_max = c->n_txn_history_max;
>  
>      return conf;
>  }
> @@ -572,6 +578,12 @@ database_update_config(struct server_config 
> *server_config,
>          replication_set_db(db->db, conf->source, conf->ab.sync_exclude,
>                             server_uuid, &conf->options->rpc);
>      }
> +
> +    if ((conf->model == SM_CLUSTERED || conf->model == SM_RELAY) &&
> +        conf->n_txn_history_max) {
> +        ovsdb_txn_history_update(db->db, conf->n_txn_history_max);
> +

nit: Extra empty line.

> +    }
>  }
>  
>  static bool
> @@ -1180,7 +1192,8 @@ open_db(struct server_config *server_config,
>      /* Enable txn history for clustered and relay modes.  It is not enabled 
> for
>       * other modes for now, since txn id is available for clustered and relay
>       * modes only. */
> -    ovsdb_txn_history_init(db->db, model == SM_RELAY || model == 
> SM_CLUSTERED);
> +    ovsdb_txn_history_init(db->db, model == SM_RELAY || model == 
> SM_CLUSTERED,
> +                           conf->n_txn_history_max);
>  
>      read_db(server_config, db);
>  
> @@ -2889,6 +2902,12 @@ db_config_to_json(const struct db_config *conf)
>          }
>          json_object_put(json, "backup", 
> json_boolean_create(conf->ab.backup));
>      }
> +
> +    if (conf->n_txn_history_max) {
> +        json_object_put(json, "transaction-history-limit",
> +                        json_integer_create(conf->n_txn_history_max));
> +    }
> +
>      return json;
>  }
>  
> @@ -3012,7 +3031,7 @@ remotes_from_json(struct shash *remotes, const struct 
> json *json)
>  static struct db_config *
>  db_config_from_json(const char *name, const struct json *json)
>  {
> -    const struct json *model, *source, *sync_exclude, *backup;
> +    const struct json *model, *source, *sync_exclude, *backup, *txn_limit;
>      struct db_config *conf = xzalloc(sizeof *conf);
>      struct ovsdb_parser parser;
>      struct ovsdb_error *error;
> @@ -3093,6 +3112,12 @@ db_config_from_json(const char *name, const struct 
> json *json)
>          }
>      }
>  
> +    txn_limit = ovsdb_parser_member(&parser, "transaction-history-limit",
> +                                    OP_INTEGER | OP_OPTIONAL);
> +    if (txn_limit) {
> +        conf->n_txn_history_max = json_integer(txn_limit);
> +    }
> +
>      error = ovsdb_parser_finish(&parser);
>      if (error) {
>          char *s = ovsdb_error_to_string_free(error);
> diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
> index 325900bc6..a6a65dc1b 100644
> --- a/ovsdb/ovsdb.h
> +++ b/ovsdb/ovsdb.h
> @@ -110,6 +110,7 @@ struct ovsdb {
>      bool need_txn_history;     /* Need to maintain history of transactions. 
> */
>      unsigned int n_txn_history; /* Current number of history transactions. */
>      unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
> +    unsigned int n_txn_history_max; /* The maximum history length. */
>      struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. 
> */
>  
>      size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
> diff --git a/ovsdb/relay.c b/ovsdb/relay.c
> index a5e1a5f3a..7691e0b73 100644
> --- a/ovsdb/relay.c
> +++ b/ovsdb/relay.c
> @@ -274,7 +274,7 @@ exit:
>              /* The relay source doesn't support unique transaction ids,
>               * disabling transaction history for relay. */
>              ovsdb_txn_history_destroy(db);
> -            ovsdb_txn_history_init(db, false);
> +            ovsdb_txn_history_init(db, false, 0);
>          } else {
>              ovsdb_txn_set_txnid(last_id, txn);
>          }
> @@ -307,7 +307,7 @@ ovsdb_relay_clear(struct ovsdb *db)
>  
>      /* Clearing the transaction history, and re-enabling it. */
>      ovsdb_txn_history_destroy(db);
> -    ovsdb_txn_history_init(db, true);
> +    ovsdb_txn_history_init(db, true, db->n_txn_history_max);
>  
>      return error;
>  }
> diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
> index 0d0d27b61..7ebb8245f 100644
> --- a/ovsdb/transaction.c
> +++ b/ovsdb/transaction.c
> @@ -18,6 +18,7 @@
>  #include "transaction.h"
>  
>  #include "bitmap.h"
> +#include "coverage.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "file.h"
>  #include "hash.h"
> @@ -37,6 +38,9 @@
>  #include "util.h"
>  
>  VLOG_DEFINE_THIS_MODULE(transaction);

nit: Would add an empty line here.

> +COVERAGE_DEFINE(transaction_history_add);

Maybe worth abbreviating into 'txn'?  We use this in idl for example,
and shorter names are rendered better.

> +
> +#define TRANSACTION_HISTORY_LIMIT_DEFAULT 100
>  
>  struct ovsdb_txn {
>      struct ovsdb *db;
> @@ -1192,6 +1196,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
>          ovs_list_push_back(&txn->db->txn_history, &node->node);
>          txn->db->n_txn_history++;
>          txn->db->n_txn_history_atoms += txn->n_atoms;
> +        COVERAGE_INC(transaction_history_add);
>      }
>  }
>  
> @@ -1686,7 +1691,7 @@ ovsdb_txn_history_run(struct ovsdb *db)
>       * Keeping at least one transaction to avoid sending UUID_ZERO as a last 
> id
>       * if all entries got removed due to the size limit. */
>      while (db->n_txn_history > 1 &&
> -           (db->n_txn_history > 100 ||
> +           (db->n_txn_history > db->n_txn_history_max ||
>              db->n_txn_history_atoms > db->n_atoms)) {
>          struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
>                  ovs_list_pop_front(&db->txn_history),
> @@ -1700,11 +1705,20 @@ ovsdb_txn_history_run(struct ovsdb *db)
>  }
>  
>  void
> -ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
> +ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history,
> +                       unsigned int n_txn_history_max)
>  {
>      db->need_txn_history = need_txn_history;

I wonder if we need this variable.  If we're setting the max according
to the service model anyway, we should be able to just check if it's
zero or not.  Right?

>      db->n_txn_history = 0;
>      db->n_txn_history_atoms = 0;
> +    db->n_txn_history_max = n_txn_history_max != 0 ? n_txn_history_max :
> +                            TRANSACTION_HISTORY_LIMIT_DEFAULT;
> +    if (db->need_txn_history && db->n_txn_history_max < 2) {

I'd say the 100 should be the default and the minimum, then we could just
take the max between the user-configured value and the 100.

> +        VLOG_WARN("transaction history needed, but limit too low. "
> +                  "%u < 2. Setting it to 2.",
> +                  n_txn_history_max);
> +        db->n_txn_history_max = 2;
> +    }
>      ovs_list_init(&db->txn_history);
>  }
>  
> @@ -1725,3 +1739,18 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
>      db->n_txn_history = 0;
>      db->n_txn_history_atoms = 0;
>  }
> +
> +void
> +ovsdb_txn_history_update(struct ovsdb *db, unsigned int n_txn_history_max)
> +{
> +    db->n_txn_history_max = n_txn_history_max != 0 ? n_txn_history_max :
> +                            TRANSACTION_HISTORY_LIMIT_DEFAULT;
> +    if (db->need_txn_history && db->n_txn_history_max < 2) {
> +        VLOG_WARN("transaction history needed, but limit too low. "
> +                  "%u < 2. Setting it to 2.",
> +                  n_txn_history_max);
> +        db->n_txn_history_max = 2;
> +    }

This code block is repeated twice, maybe worth moving into a function.

> +
> +    ovsdb_txn_history_run(db);
> +}
> diff --git a/ovsdb/transaction.h b/ovsdb/transaction.h
> index d94205414..04f96f079 100644
> --- a/ovsdb/transaction.h
> +++ b/ovsdb/transaction.h
> @@ -75,7 +75,10 @@ struct ovsdb_row *ovsdb_index_search(struct hmap *index,
>  void ovsdb_txn_add_comment(struct ovsdb_txn *, const char *);
>  const char *ovsdb_txn_get_comment(const struct ovsdb_txn *);
>  void ovsdb_txn_history_run(struct ovsdb *);
> -void ovsdb_txn_history_init(struct ovsdb *, bool need_txn_history);
> +void ovsdb_txn_history_init(struct ovsdb *, bool need_txn_history,
> +                            unsigned int n_txn_history_max);
>  void ovsdb_txn_history_destroy(struct ovsdb *);
> +void ovsdb_txn_history_update(struct ovsdb *db,
> +                              unsigned int n_txn_history_max);
>  
>  #endif /* ovsdb/transaction.h */
> diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
> index d9389e12f..921339dfa 100644
> --- a/tests/ovsdb-server.at
> +++ b/tests/ovsdb-server.at
> @@ -1937,7 +1937,8 @@ m4_define([OVSDB_CHECK_EXECUTION_RELAY],
>               \"databases\": {
>                   \"${schema_name}\": {
>                       \"service-model\": \"relay\",
> -                     \"source\": { \"unix:db$((i-1)).sock\": {} }
> +                     \"source\": { \"unix:db$((i-1)).sock\": {} },
> +                     \"transaction-history-limit\": 1234

Not sure if we need this.  Seems better to test with the default here.
We have the special history limit test, might be better to adjust it
or have a separate one.

>                   }
>               }
>         }" > config${i}.json
> @@ -3108,6 +3109,25 @@ WARN|syntax 
> "{"dscp":42,"inactivity-probe":10000,"max-backoff":8000,"role":"My-R
>   syntax error: Parsing JSON-RPC options failed: Member 'role' is present but 
> not allowed here.
>  ])
>  
> +TEST_CONFIG_FILE([relay with transaction-history-limit], [
> +{
> +    "remotes": { "punix:db.sock": {} },
> +    "databases": {
> +        "RelaySchema": {
> +            "service-model": "relay",
> +            "source": {
> +                "punix:db2.sock": {
> +                    "inactivity-probe": 10000,
> +                    "max-backoff": 8000,
> +                    "dscp": 42
> +                }
> +            },
> +            "transaction-history-limit": 1000
> +        }
> +    }
> +}
> +], [0])

Should probably update the "complex config" test as well.

We'll also need some documentation updates in Documentation/ref/ovsdb.7.rst
and ovsdb/ovsdb-server.1.in.  And a NEWS entry.

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to