This is an automated email from the ASF dual-hosted git repository.
cmcfarlen pushed a commit to branch 10.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/10.1.x by this push:
new c89cd60d29 Add per_server_conn metrics (#12269)
c89cd60d29 is described below
commit c89cd60d29253ea2dbd1e815ccc14fb00d2644f0
Author: Serris Lew <[email protected]>
AuthorDate: Tue Jun 3 13:27:11 2025 -0700
Add per_server_conn metrics (#12269)
Co-authored-by: Serris Lew <[email protected]>
---
include/iocore/net/ConnectionTracker.h | 54 +++++++++++++++++-
src/iocore/net/ConnectionTracker.cc | 66 ++++++++++++++++++++--
src/proxy/http/HttpSM.cc | 6 +-
src/records/RecordsConfig.cc | 6 +-
.../origin_connection/gold/two_200_ok.gold | 9 +++
.../per_server_connection_max.test.py | 58 +++++++++++++++----
6 files changed, 179 insertions(+), 20 deletions(-)
diff --git a/include/iocore/net/ConnectionTracker.h
b/include/iocore/net/ConnectionTracker.h
index d272b78f8d..8ab6860410 100644
--- a/include/iocore/net/ConnectionTracker.h
+++ b/include/iocore/net/ConnectionTracker.h
@@ -43,6 +43,7 @@
#include "swoc/TextView.h"
#include <tscore/MgmtDefs.h>
#include "iocore/net/SessionSharingAPIEnums.h"
+#include "tsutil/Metrics.h"
/**
* Singleton class to keep track of the number of inbound and outbound
connections.
@@ -83,6 +84,8 @@ public:
struct GlobalConfig {
std::chrono::seconds client_alert_delay{60}; ///< Alert delay in seconds.
std::chrono::seconds server_alert_delay{60}; ///< Alert delay in seconds.
+ bool metric_enabled{false}; ///< Enabling per server
metrics.
+ std::string metric_prefix; ///< Per server metric prefix.
};
// The names of the configuration values.
@@ -93,6 +96,8 @@ public:
static constexpr std::string_view
CONFIG_SERVER_VAR_MIN{"proxy.config.http.per_server.connection.min"};
static constexpr std::string_view
CONFIG_SERVER_VAR_MATCH{"proxy.config.http.per_server.connection.match"};
static constexpr std::string_view
CONFIG_SERVER_VAR_ALERT_DELAY{"proxy.config.http.per_server.connection.alert_delay"};
+ static constexpr std::string_view
CONFIG_SERVER_VAR_METRIC_ENABLED{"proxy.config.http.per_server.connection.metric_enabled"};
+ static constexpr std::string_view
CONFIG_SERVER_VAR_METRIC_PREFIX{"proxy.config.http.per_server.connection.metric_prefix"};
/// A record for the outbound connection count.
/// These are stored per outbound session equivalence class, as determined
by the session matching.
@@ -131,6 +136,11 @@ public:
std::atomic<int> _in_queue{0}; ///< # of connections queued, waiting
for a connection.
std::atomic<Ticker> _last_alert{0}; ///< Absolute time of the last alert.
+ // Recording data as metrics
+ ts::Metrics::Gauge::AtomicType *_count_metric = nullptr;
+ ts::Metrics::Counter::AtomicType *_count_total_metric = nullptr;
+ ts::Metrics::Counter::AtomicType *_blocked_metric = nullptr;
+
/** Constructor.
* Construct from @c Key because the use cases do a table lookup first so
the @c Key is already constructed.
* @param key A populated @c Key structure - values are copied to the @c
Group.
@@ -149,7 +159,8 @@ public:
/// @return @c true if an alert should be generated, @c false otherwise.
bool should_alert(std::time_t *lat = nullptr);
/// Time of the last alert in epoch seconds.
- std::time_t get_last_alert_epoch_time() const;
+ std::time_t get_last_alert_epoch_time() const;
+ static std::string metric_name(const Key &key, std::string_view fqdn,
std::string metric_prefix);
/// Release the reference count to this group and remove it from the
/// group table if it is no longer referenced.
@@ -322,6 +333,31 @@ ConnectionTracker::Group::hash(const Key &key)
}
}
+inline std::string
+ConnectionTracker::Group::metric_name(const Key &key, std::string_view fqdn,
std::string metric_prefix)
+{
+ std::string metric_name = "";
+ char buf[INET6_ADDRSTRLEN];
+
+ switch (key._match_type) {
+ case MATCH_IP:
+ metric_name += ats_ip_ntop(&key._addr.sa, buf, sizeof(buf));
+ break;
+ case MATCH_PORT:
+ metric_name += ats_ip_nptop(&key._addr.sa, buf, sizeof(buf));
+ break;
+ case MATCH_HOST:
+ metric_name += std::string(fqdn);
+ break;
+ case MATCH_BOTH:
+ metric_name += std::string(fqdn) + "." + ats_ip_nptop(&key._addr.sa, buf,
sizeof(buf));
+ break;
+ default:
+ Warning("Invalid matching type to add to per_server.connections metrics");
+ }
+ return metric_prefix.empty() ? metric_name : metric_prefix + "." +
metric_name;
+}
+
inline bool
ConnectionTracker::TxnState::is_active()
{
@@ -332,6 +368,12 @@ inline int
ConnectionTracker::TxnState::reserve()
{
_reserved_p = true;
+ // If metric enabled, use metric as count
+ if (_g->_count_metric != nullptr) {
+ ts::Metrics::Gauge::increment(_g->_count_metric);
+ ts::Metrics::Counter::increment(_g->_count_total_metric);
+ return _g->_count_metric->load();
+ }
return ++_g->_count;
}
@@ -340,7 +382,12 @@ ConnectionTracker::TxnState::release()
{
if (_reserved_p) {
_reserved_p = false;
- --_g->_count;
+ // If metric enabled, use metric as count
+ if (_g->_count_metric != nullptr) {
+ ts::Metrics::Gauge::decrement(_g->_count_metric);
+ } else {
+ --_g->_count;
+ }
}
}
@@ -372,6 +419,9 @@ ConnectionTracker::TxnState::update_max_count(int count)
inline void
ConnectionTracker::TxnState::blocked()
{
+ if (_g->_blocked_metric != nullptr) {
+ ts::Metrics::Counter::increment(_g->_blocked_metric);
+ }
++_g->_blocked;
}
diff --git a/src/iocore/net/ConnectionTracker.cc
b/src/iocore/net/ConnectionTracker.cc
index 8401ecbf06..43323318a9 100644
--- a/src/iocore/net/ConnectionTracker.cc
+++ b/src/iocore/net/ConnectionTracker.cc
@@ -149,6 +149,30 @@ Config_Update_Conntrack_Client_Alert_Delay(const char
*name, RecDataT dtype, Rec
return Config_Update_Conntrack_Server_Alert_Delay_Helper(name, dtype, data,
cookie, config->client_alert_delay);
}
+bool
+Config_Update_Conntrack_Metric_Enabled(const char * /* name ATS_UNUSED */,
RecDataT dtype, RecData data, void *cookie)
+{
+ auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
+
+ if (RECD_INT == dtype) {
+ config->metric_enabled = data.rec_int;
+ return true;
+ }
+ return false;
+}
+
+bool
+Config_Update_Conntrack_Metric_Prefix(const char * /* name ATS_UNUSED */,
RecDataT dtype, RecData data, void *cookie)
+{
+ auto config = static_cast<ConnectionTracker::GlobalConfig *>(cookie);
+
+ if (RECD_STRING == dtype) {
+ config->metric_prefix = data.rec_string;
+ return true;
+ }
+ return false;
+}
+
} // namespace
void
@@ -162,6 +186,8 @@ ConnectionTracker::config_init(GlobalConfig *global,
TxnConfig *txn, RecConfigUp
Enable_Config_Var(CONFIG_SERVER_VAR_MAX, &Config_Update_Conntrack_Max,
config_cb, txn);
Enable_Config_Var(CONFIG_SERVER_VAR_MATCH, &Config_Update_Conntrack_Match,
config_cb, txn);
Enable_Config_Var(CONFIG_SERVER_VAR_ALERT_DELAY,
&Config_Update_Conntrack_Server_Alert_Delay, config_cb, global);
+ Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_ENABLED,
&Config_Update_Conntrack_Metric_Enabled, config_cb, global);
+ Enable_Config_Var(CONFIG_SERVER_VAR_METRIC_PREFIX,
&Config_Update_Conntrack_Metric_Prefix, config_cb, global);
}
ConnectionTracker::TxnState
@@ -214,6 +240,19 @@ ConnectionTracker::Group::Group(DirectionType direction,
Key const &key, std::st
_alert_delay{direction == DirectionType::INBOUND ?
_global_config->client_alert_delay : _global_config->server_alert_delay}
{
Metrics::Gauge::increment(net_rsb.connection_tracker_table_size);
+ // only add metrics for server connections
+ if (_global_config->metric_enabled && direction == DirectionType::OUTBOUND) {
+ std::string _metric_name = metric_name(key, fqdn,
_global_config->metric_prefix);
+ _count_metric =
Metrics::Gauge::createPtr("proxy.process.http.per_server.current_connection.",
_metric_name);
+ _count_total_metric =
Metrics::Counter::createPtr("proxy.process.http.per_server.total_connection.",
_metric_name);
+ _blocked_metric =
Metrics::Counter::createPtr("proxy.process.http.per_server.blocked_connection.",
_metric_name);
+
+ if (dbg_ctl.on()) {
+ swoc::LocalBufferWriter<256> w;
+ w.print("Registered per_server_connection.{}\0", _metric_name);
+ DbgPrint(dbg_ctl, "%s", w.data());
+ }
+ }
// store the host name if relevant.
if (MATCH_HOST == _match_type || MATCH_BOTH == _match_type) {
_fqdn.assign(fqdn);
@@ -284,7 +323,25 @@ ConnectionTracker::Group::should_alert(std::time_t *lat)
void
ConnectionTracker::Group::release()
{
- if (_count > 0) {
+ // If metric enabled, use metric as count
+ if (_count_metric != nullptr) {
+ if (_count_metric->load() > 0) {
+ ts::Metrics::Gauge::decrement(_count_metric);
+ if (_count_metric->load() == 0) {
+ TableSingleton &table = _direction ==
DirectionType::INBOUND ? _inbound_table : _outbound_table;
+ std::lock_guard<std::mutex> lock(table._mutex); // Table lock
+ if (_count_metric->load() > 0) {
+ // Someone else grabbed the Group between our last check and taking
the
+ // lock.
+ return;
+ }
+ table._table.erase(_key);
+ }
+ } else {
+ // A bit dubious, as there's no guarantee it's still negative, but even
that would be interesting to know.
+ Error("Number of tracked connections should be greater than or equal to
zero: %" PRId64, _count_metric->load());
+ }
+ } else if (_count > 0) {
if (--_count == 0) {
TableSingleton &table = _direction == DirectionType::INBOUND
? _inbound_table : _outbound_table;
std::lock_guard<std::mutex> lock(table._mutex); // Table lock
@@ -331,8 +388,8 @@ ConnectionTracker::outbound_to_json_string()
static const std::string_view trailer{" \n]}"};
static const auto printer = [](swoc::BufferWriter &w, Group const *g) ->
swoc::BufferWriter & {
- w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count.load(),
g->_count_max.load(), g->_blocked.load(),
- g->get_last_alert_epoch_time());
+ w.print(item_fmt, g->_match_type, g->_addr, g->_fqdn, g->_count_metric !=
nullptr ? g->_count_metric->load() : g->_count.load(),
+ g->_count_max.load(), g->_blocked.load(),
g->get_last_alert_epoch_time());
return w;
};
@@ -373,7 +430,8 @@ ConnectionTracker::dump(FILE *f)
for (std::shared_ptr<Group const> g : groups) {
swoc::LocalBufferWriter<128> w;
- w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count.load(),
g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
+ w.print("{:7} | {:5} | {:24} | {:33} | {:8} |\n", g->_count_metric !=
nullptr ? g->_count_metric->load() : g->_count.load(),
+ g->_blocked.load(), g->_addr, g->_hash, g->_match_type);
fwrite(w.data(), w.size(), 1, f);
}
diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc
index bba5a4da65..4324fc334f 100644
--- a/src/proxy/http/HttpSM.cc
+++ b/src/proxy/http/HttpSM.cc
@@ -5542,7 +5542,8 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
}
// See if the outbound connection tracker data is needed. If so, get it here
for consistency.
- if (t_state.txn_conf->connection_tracker_config.server_max > 0 ||
t_state.txn_conf->connection_tracker_config.server_min > 0) {
+ if (t_state.txn_conf->connection_tracker_config.server_max > 0 ||
t_state.txn_conf->connection_tracker_config.server_min > 0 ||
+
t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
t_state.outbound_conn_track_state =
ConnectionTracker::obtain_outbound(t_state.txn_conf->connection_tracker_config,
std::string_view{t_state.current.server->name},
t_state.current.server->dst_addr);
@@ -5569,6 +5570,9 @@ HttpSM::do_http_server_open(bool raw, bool only_direct)
}
ct_state.update_max_count(ccount);
+ } else if
(t_state.http_config_param->global_connection_tracker_config.metric_enabled) {
+ auto &ct_state = t_state.outbound_conn_track_state;
+ ct_state.reserve();
}
// We did not manage to get an existing session and need to open a new
connection
diff --git a/src/records/RecordsConfig.cc b/src/records/RecordsConfig.cc
index c612e2d438..0022e1c8df 100644
--- a/src/records/RecordsConfig.cc
+++ b/src/records/RecordsConfig.cc
@@ -367,12 +367,16 @@ static const RecordElement RecordsConfig[] =
,
{RECT_CONFIG, "proxy.config.http.per_server.connection.max", RECD_INT, "0",
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
,
- {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING,
"both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|host|both|none)$", RECA_NULL}
+ {RECT_CONFIG, "proxy.config.http.per_server.connection.match", RECD_STRING,
"both", RECU_DYNAMIC, RR_NULL, RECC_STR, "^(?:ip|port|host|both|none)$",
RECA_NULL}
,
{RECT_CONFIG, "proxy.config.http.per_server.connection.alert_delay",
RECD_INT, "60", RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
,
{RECT_CONFIG, "proxy.config.http.per_server.connection.min", RECD_INT, "0",
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
,
+ {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_enabled",
RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_STR, "[0-1]", RECA_NULL}
+ ,
+ {RECT_CONFIG, "proxy.config.http.per_server.connection.metric_prefix",
RECD_STRING, "", RECU_DYNAMIC, RR_NULL, RECC_NULL, nullptr, RECA_NULL}
+ ,
{RECT_CONFIG, "proxy.config.http.attach_server_session_to_client", RECD_INT,
"0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
,
{RECT_CONFIG, "proxy.config.http.max_proxy_cycles", RECD_INT, "0",
RECU_DYNAMIC, RR_NULL, RECC_STR, "^[0-9]+$", RECA_NULL}
diff --git a/tests/gold_tests/origin_connection/gold/two_200_ok.gold
b/tests/gold_tests/origin_connection/gold/two_200_ok.gold
new file mode 100644
index 0000000000..e7833998be
--- /dev/null
+++ b/tests/gold_tests/origin_connection/gold/two_200_ok.gold
@@ -0,0 +1,9 @@
+``
+> CONNECT``
+``
+< HTTP/1.1 200 OK
+``
+> CONNECT``
+``
+< HTTP/1.1 200 OK
+``
diff --git
a/tests/gold_tests/origin_connection/per_server_connection_max.test.py
b/tests/gold_tests/origin_connection/per_server_connection_max.test.py
index 6c7253fdb3..998ddd9775 100644
--- a/tests/gold_tests/origin_connection/per_server_connection_max.test.py
+++ b/tests/gold_tests/origin_connection/per_server_connection_max.test.py
@@ -49,13 +49,29 @@ class PerServerConnectionMaxTest:
'proxy.config.dns.nameservers':
f"127.0.0.1:{self._dns.Variables.Port}",
'proxy.config.dns.resolv_conf': 'NULL',
'proxy.config.diags.debug.enabled': 1,
- 'proxy.config.diags.debug.tags': 'http',
+ 'proxy.config.diags.debug.tags': 'http|conn_track',
'proxy.config.http.per_server.connection.max':
self._origin_max_connections,
+ 'proxy.config.http.per_server.connection.metric_enabled': 1,
+ 'proxy.config.http.per_server.connection.metric_prefix': 'foo',
+ 'proxy.config.http.per_server.connection.match': 'port',
})
self._ts.Disk.diags_log.Content += Testers.ContainsExpression(
f'WARNING:.*too many
connections:.*limit={self._origin_max_connections}',
'Verify the user is warned about the connection limit being hit.')
+ def _test_metrics(self) -> None:
+ """Use traffic_ctl to test metrics."""
+ tr = Test.AddTestRun("Check connection metrics")
+ tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
+ tr.Processes.Default.ReturnCode = 0
+ tr.Processes.Default.Env = self._ts.Env
+ tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+
f'per_server.total_connection.foo.127.0.0.1:{self._server.Variables.http_port}
4',
+ 'incorrect statistic return, or possible error.')
+ tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+
f'per_server.blocked_connection.foo.127.0.0.1:{self._server.Variables.http_port}
1',
+ 'incorrect statistic return, or possible error.')
+
def run(self) -> None:
"""Configure the TestRun."""
tr = Test.AddTestRun('Verify we enforce
proxy.config.http.per_server.connection.max')
@@ -65,18 +81,19 @@ class PerServerConnectionMaxTest:
tr.AddVerifierClientProcess('client', self._replay_file,
http_ports=[self._ts.Variables.port])
+ self._test_metrics()
+
class ConnectMethodTest:
"""Test our max origin connection behavior with CONNECT traffic."""
_client_counter: int = 0
- _origin_max_connections: int = 3
- def __init__(self) -> None:
+ def __init__(self, max_conn) -> None:
"""Configure the server processes in preparation for the TestRun."""
self._configure_dns()
self._configure_origin_server()
- self._configure_trafficserver()
+ self._configure_trafficserver(max_conn)
def _configure_dns(self) -> None:
"""Configure a nameserver for the test."""
@@ -86,18 +103,19 @@ class ConnectMethodTest:
"""Configure the httpbin origin server."""
self._server = Test.MakeHttpBinServer("server2")
- def _configure_trafficserver(self) -> None:
- self._ts = Test.MakeATSProcess("ts2")
+ def _configure_trafficserver(self, max_conn) -> None:
+ self._ts = Test.MakeATSProcess("ts2_" + str(max_conn))
self._ts.Disk.records_config.update(
{
'proxy.config.dns.nameservers':
f"127.0.0.1:{self._dns.Variables.Port}",
'proxy.config.dns.resolv_conf': 'NULL',
'proxy.config.diags.debug.enabled': 1,
- 'proxy.config.diags.debug.tags': 'http|dns|hostdb',
+ 'proxy.config.diags.debug.tags': 'http|dns|hostdb|conn_track',
'proxy.config.http.server_ports': f"{self._ts.Variables.port}",
'proxy.config.http.connect_ports':
f"{self._server.Variables.Port}",
- 'proxy.config.http.per_server.connection.max':
self._origin_max_connections,
+ 'proxy.config.http.per_server.connection.metric_enabled': 1,
+ 'proxy.config.http.per_server.connection.max': max_conn,
})
self._ts.Disk.remap_config.AddLines([
@@ -111,7 +129,20 @@ class ConnectMethodTest:
tr.MakeCurlCommand(f"-v --fail -s -p -x
127.0.0.1:{self._ts.Variables.port} 'http://foo.com/delay/2'", p=p)
return p
- def run(self) -> None:
+ def _test_metrics(self, blocked) -> None:
+ """Use traffic_ctl to test metrics."""
+ tr = Test.AddTestRun("Check connection metrics")
+ tr.Processes.Default.Command = 'traffic_ctl metric match per_server'
+ tr.Processes.Default.ReturnCode = 0
+ tr.Processes.Default.Env = self._ts.Env
+ tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+
f'per_server.total_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port}
5',
+ 'incorrect statistic return, or possible error.')
+ tr.Processes.Default.Streams.All = Testers.ContainsExpression(
+
f'per_server.blocked_connection.www.this.origin.com.127.0.0.1:{self._server.Variables.Port}
{blocked}',
+ 'incorrect statistic return, or possible error.')
+
+ def run(self, blocked, gold_file) -> None:
"""Verify per_server.connection.max with CONNECT traffic."""
tr = Test.AddTestRun()
tr.Processes.Default.StartBefore(self._dns)
@@ -134,10 +165,13 @@ class ConnectMethodTest:
f"--next -v --fail -s -p -x 127.0.0.1:{self._ts.Variables.port}
'http://foo.com/get'")
# Curl will have a 22 exit code if it receives a 5XX response (and we
# expect a 503).
- tr.Processes.Default.ReturnCode = 22
- tr.Processes.Default.Streams.stderr = "gold/two_503_congested.gold"
+ tr.Processes.Default.ReturnCode = 22 if blocked else 0
+ tr.Processes.Default.Streams.stderr = gold_file
tr.Processes.Default.TimeOut = 3
+ self._test_metrics(blocked)
+
PerServerConnectionMaxTest().run()
-ConnectMethodTest().run()
+ConnectMethodTest(3).run(blocked=2, gold_file="gold/two_503_congested.gold")
+ConnectMethodTest(0).run(blocked=0, gold_file="gold/two_200_ok.gold")