This is an automated email from the ASF dual-hosted git repository.
baoyuan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new abd398c61 feat(ai-proxy): add new ctx variable for request llm model
(#12554)
abd398c61 is described below
commit abd398c6141fdf8eaff72f02a96beeea7afefc85
Author: Ashish Tiwari <[email protected]>
AuthorDate: Tue Sep 2 09:08:10 2025 +0530
feat(ai-proxy): add new ctx variable for request llm model (#12554)
---
apisix/cli/ngx_tpl.lua | 1 +
apisix/core/ctx.lua | 1 +
apisix/plugins/ai-proxy/base.lua | 4 ++++
apisix/plugins/prometheus/exporter.lua | 34 +++++++++++++++++-----------------
docs/en/latest/plugins/prometheus.md | 12 ++++++------
docs/zh/latest/plugins/prometheus.md | 12 ++++++------
t/APISIX.pm | 3 ++-
t/plugin/ai-proxy3.t | 6 +++---
t/plugin/prometheus-ai-proxy.t | 14 +++++++-------
t/plugin/prometheus.t | 10 +++++-----
t/plugin/prometheus2.t | 18 +++++++++---------
t/plugin/prometheus3.t | 2 +-
t/plugin/prometheus4.t | 14 +++++++-------
13 files changed, 69 insertions(+), 62 deletions(-)
diff --git a/apisix/cli/ngx_tpl.lua b/apisix/cli/ngx_tpl.lua
index 33be2ea75..454f67b13 100644
--- a/apisix/cli/ngx_tpl.lua
+++ b/apisix/cli/ngx_tpl.lua
@@ -808,6 +808,7 @@ http {
set $llm_content_risk_level '';
set $request_type 'traditional_http';
+ set $request_llm_model '';
set $llm_time_to_first_token '0';
set $llm_model '';
diff --git a/apisix/core/ctx.lua b/apisix/core/ctx.lua
index 50e08cf93..dcd50962b 100644
--- a/apisix/core/ctx.lua
+++ b/apisix/core/ctx.lua
@@ -237,6 +237,7 @@ do
request_type = true,
llm_time_to_first_token = true,
+ request_llm_model = true,
llm_model = true,
llm_prompt_tokens = true,
llm_completion_tokens = true,
diff --git a/apisix/plugins/ai-proxy/base.lua b/apisix/plugins/ai-proxy/base.lua
index 0c188f1e4..944d977f5 100644
--- a/apisix/plugins/ai-proxy/base.lua
+++ b/apisix/plugins/ai-proxy/base.lua
@@ -27,6 +27,7 @@ local _M = {}
function _M.set_logging(ctx, summaries, payloads)
if summaries then
ctx.llm_summary = {
+ request_model = ctx.var.request_llm_model,
model = ctx.var.llm_model,
duration = ctx.var.llm_time_to_first_token,
prompt_tokens = ctx.var.llm_prompt_tokens,
@@ -69,6 +70,9 @@ function _M.before_proxy(conf, ctx)
else
ctx.var.request_type = "ai_chat"
end
+ if request_body.model then
+ ctx.var.request_llm_model = request_body.model
+ end
local model = ai_instance.options and ai_instance.options.model or
request_body.model
if model then
ctx.var.llm_model = model
diff --git a/apisix/plugins/prometheus/exporter.lua
b/apisix/plugins/prometheus/exporter.lua
index bb2e31320..ed219a49b 100644
--- a/apisix/plugins/prometheus/exporter.lua
+++ b/apisix/plugins/prometheus/exporter.lua
@@ -211,7 +211,7 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.status = prometheus:counter("http_status",
"HTTP status codes per service in APISIX",
{"code", "route", "matched_uri", "matched_host", "service",
"consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("http_status"))},
status_metrics_exptime)
@@ -223,14 +223,14 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.latency = prometheus:histogram("http_latency",
"HTTP request latency in milliseconds per service in APISIX",
{"type", "route", "service", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("http_latency"))},
buckets, latency_metrics_exptime)
metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes consumed per service in APISIX",
{"type", "route", "service", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("bandwidth"))},
bandwidth_metrics_exptime)
@@ -241,7 +241,7 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.llm_latency = prometheus:histogram("llm_latency",
"LLM request latency in milliseconds",
{"route_id", "service_id", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("llm_latency"))},
llm_latency_buckets,
llm_latency_exptime)
@@ -249,14 +249,14 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.llm_prompt_tokens = prometheus:counter("llm_prompt_tokens",
"LLM service consumed prompt tokens",
{"route_id", "service_id", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("llm_prompt_tokens"))},
llm_prompt_tokens_exptime)
metrics.llm_completion_tokens = prometheus:counter("llm_completion_tokens",
"LLM service consumed completion tokens",
{"route_id", "service_id", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("llm_completion_tokens"))},
llm_completion_tokens_exptime)
@@ -264,7 +264,7 @@ function _M.http_init(prometheus_enabled_in_stream)
"Number of active connections to LLM service",
{"route", "route_id", "matched_uri", "matched_host",
"service", "service_id", "consumer", "node",
- "request_type", "llm_model",
+ "request_type", "request_llm_model", "llm_model",
unpack(extra_labels("llm_active_connections"))},
llm_active_connections_exptime)
@@ -338,7 +338,7 @@ function _M.http_log(conf, ctx)
metrics.status:inc(1,
gen_arr(vars.status, route_id, matched_uri, matched_host,
service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(extra_labels("http_status", ctx))))
local latency, upstream_latency, apisix_latency = latency_details(ctx)
@@ -346,50 +346,50 @@ function _M.http_log(conf, ctx)
metrics.latency:observe(latency,
gen_arr("request", route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(latency_extra_label_values)))
if upstream_latency then
metrics.latency:observe(upstream_latency,
gen_arr("upstream", route_id, service_id, consumer_name,
balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(latency_extra_label_values)))
end
metrics.latency:observe(apisix_latency,
gen_arr("apisix", route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(latency_extra_label_values)))
local bandwidth_extra_label_values = extra_labels("bandwidth", ctx)
metrics.bandwidth:inc(vars.request_length,
gen_arr("ingress", route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(bandwidth_extra_label_values)))
metrics.bandwidth:inc(vars.bytes_sent,
gen_arr("egress", route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(bandwidth_extra_label_values)))
local llm_time_to_first_token = vars.llm_time_to_first_token
if llm_time_to_first_token ~= "" then
metrics.llm_latency:observe(tonumber(llm_time_to_first_token),
gen_arr(route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(extra_labels("llm_latency", ctx))))
end
if vars.llm_prompt_tokens ~= "" then
metrics.llm_prompt_tokens:inc(tonumber(vars.llm_prompt_tokens),
gen_arr(route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(extra_labels("llm_prompt_tokens", ctx))))
end
if vars.llm_completion_tokens ~= "" then
metrics.llm_completion_tokens:inc(tonumber(vars.llm_completion_tokens),
gen_arr(route_id, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(extra_labels("llm_completion_tokens", ctx))))
end
end
@@ -787,7 +787,7 @@ local function inc_llm_active_connections(ctx, value)
value,
gen_arr(route_name, route_id, matched_uri,
matched_host, service_name, service_id, consumer_name, balancer_ip,
- vars.request_type, vars.llm_model,
+ vars.request_type, vars.request_llm_model, vars.llm_model,
unpack(extra_labels("llm_active_connections", ctx)))
)
end
diff --git a/docs/en/latest/plugins/prometheus.md
b/docs/en/latest/plugins/prometheus.md
index 7c6ef303a..d64355fdd 100644
--- a/docs/en/latest/plugins/prometheus.md
+++ b/docs/en/latest/plugins/prometheus.md
@@ -275,12 +275,12 @@ You should see an output similar to the following:
```text
# HELP apisix_bandwidth Total bandwidth in bytes consumed per Service in Apisix
# TYPE apisix_bandwidth counter
-apisix_bandwidth{type="egress",route="",service="",consumer="",node="",request_type="traditional_http",llm_model=""}
8417
-apisix_bandwidth{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
1420
-apisix_bandwidth{type="egress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
1420
-apisix_bandwidth{type="ingress",route="",service="",consumer="",node="",request_type="traditional_http",llm_model=""}
189
-apisix_bandwidth{type="ingress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
332
-apisix_bandwidth{type="ingress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
332
+apisix_bandwidth{type="egress",route="",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""}
8417
+apisix_bandwidth{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
1420
+apisix_bandwidth{type="egress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
1420
+apisix_bandwidth{type="ingress",route="",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""}
189
+apisix_bandwidth{type="ingress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
332
+apisix_bandwidth{type="ingress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
332
# HELP apisix_etcd_modify_indexes Etcd modify index for APISIX keys
# TYPE apisix_etcd_modify_indexes gauge
apisix_etcd_modify_indexes{key="consumers"} 0
diff --git a/docs/zh/latest/plugins/prometheus.md
b/docs/zh/latest/plugins/prometheus.md
index d3f4a0825..21361e358 100644
--- a/docs/zh/latest/plugins/prometheus.md
+++ b/docs/zh/latest/plugins/prometheus.md
@@ -275,12 +275,12 @@ curl "http://127.0.0.1:9091/apisix/prometheus/metrics"
```text
# HELP apisix_bandwidth Total bandwidth in bytes consumed per Service in Apisix
# TYPE apisix_bandwidth counter
-apisix_bandwidth{type="egress",route="",service="",consumer="",node="",request_type="traditional_http",llm_model=""}
8417
-apisix_bandwidth{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
1420
-apisix_bandwidth{type="egress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
1420
-apisix_bandwidth{type="ingress",route="",service="",consumer="",node="",request_type="traditional_http",llm_model=""}
189
-apisix_bandwidth{type="ingress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
332
-apisix_bandwidth{type="ingress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""}
332
+apisix_bandwidth{type="egress",route="",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""}
8417
+apisix_bandwidth{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
1420
+apisix_bandwidth{type="egress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
1420
+apisix_bandwidth{type="ingress",route="",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""}
189
+apisix_bandwidth{type="ingress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
332
+apisix_bandwidth{type="ingress",route="2",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""}
332
# HELP apisix_etcd_modify_indexes Etcd modify index for APISIX keys
# TYPE apisix_etcd_modify_indexes gauge
apisix_etcd_modify_indexes{key="consumers"} 0
diff --git a/t/APISIX.pm b/t/APISIX.pm
index 57ae00f80..305044e4c 100644
--- a/t/APISIX.pm
+++ b/t/APISIX.pm
@@ -674,7 +674,7 @@ _EOC_
require("apisix").http_exit_worker()
}
- log_format main escape=default '\$remote_addr - \$remote_user
[\$time_local] \$http_host "\$request" \$status \$body_bytes_sent
\$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr
\$upstream_status \$upstream_response_time
"\$upstream_scheme://\$upstream_host\$upstream_uri" \$llm_model
\$llm_time_to_first_token \$llm_prompt_tokens \$llm_completion_tokens';
+ log_format main escape=default '\$remote_addr - \$remote_user
[\$time_local] \$http_host "\$request" \$status \$body_bytes_sent
\$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr
\$upstream_status \$upstream_response_time
"\$upstream_scheme://\$upstream_host\$upstream_uri" \$request_llm_model
\$llm_model \$llm_time_to_first_token \$llm_prompt_tokens
\$llm_completion_tokens';
# fake server, only for test
server {
@@ -861,6 +861,7 @@ _EOC_
set \$request_type 'traditional_http';
set \$llm_time_to_first_token '0';
+ set \$request_llm_model '';
set \$llm_model '';
set \$llm_prompt_tokens '0';
set \$llm_completion_tokens '0';
diff --git a/t/plugin/ai-proxy3.t b/t/plugin/ai-proxy3.t
index e05ce01c0..4f1382371 100644
--- a/t/plugin/ai-proxy3.t
+++ b/t/plugin/ai-proxy3.t
@@ -171,12 +171,12 @@ passed
=== TEST 2: send request
--- request
POST /anything
-{ "messages": [ { "role": "system", "content": "You are a mathematician" }, {
"role": "user", "content": "What is 1+1?"} ] }
+{"messages":[{"role":"system","content":"You are a
mathematician"},{"role":"user","content":"What is 1+1?"}], "model": "gpt-4"}
--- error_code: 200
--- response_body eval
qr/.*completion_tokens.*/
--- access_log eval
-qr/.*gpt-3.5-turbo \d+ 10 20.*/
+qr/.*gpt-4 gpt-3.5-turbo \d+ 10 20.*/
@@ -328,4 +328,4 @@ passed
--- response_body_like eval
qr/6data: \[DONE\]\n\n/
--- access_log eval
-qr/.*gpt-3.5-turbo 2\d\d 15 20.*/
+qr/.*gpt-4 gpt-3.5-turbo 2\d\d 15 20.*/
diff --git a/t/plugin/prometheus-ai-proxy.t b/t/plugin/prometheus-ai-proxy.t
index d5058457f..d00341412 100644
--- a/t/plugin/prometheus-ai-proxy.t
+++ b/t/plugin/prometheus-ai-proxy.t
@@ -160,7 +160,7 @@ __DATA__
=== TEST 2: send a chat request
--- request
POST /chat
-{"messages":[{"role":"user","content":"What is 1+1?"}]}
+{"messages":[{"role":"user","content":"What is 1+1?"}], "model": "gpt-3"}
--- error_code: 200
@@ -169,7 +169,7 @@ POST /chat
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_latency_bucket\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4",le="\d+"\}
1/
+qr/apisix_llm_latency_bucket\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4",le="\d+"\}
1/
@@ -177,7 +177,7 @@
qr/apisix_llm_latency_bucket\{.*route_id="1",.*,node="openai-gpt4".*.*request_ty
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4"\}
1/
+qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\}
1/
@@ -185,7 +185,7 @@
qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_typ
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4"\}
\d+/
+qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\}
\d+/
@@ -193,7 +193,7 @@
qr/apisix_llm_latency_count\{.*route_id="1",.*,node="openai-gpt4".*.*request_typ
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_prompt_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4"\}
8/
+qr/apisix_llm_prompt_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\}
8/
@@ -201,7 +201,7 @@
qr/apisix_llm_prompt_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_typ
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_completion_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4"\}
5/
+qr/apisix_llm_completion_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\}
5/
@@ -209,7 +209,7 @@
qr/apisix_llm_completion_tokens\{.*route_id="1",.*,node="openai-gpt4".*.*request
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_llm_active_connections\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",llm_model="gpt-4"\}
0/
+qr/apisix_llm_active_connections\{.*route_id="1",.*,node="openai-gpt4".*.*request_type="ai_chat",request_llm_model="gpt-3",llm_model="gpt-4"\}
0/
diff --git a/t/plugin/prometheus.t b/t/plugin/prometheus.t
index 197e3e8a1..d29406886 100644
--- a/t/plugin/prometheus.t
+++ b/t/plugin/prometheus.t
@@ -141,7 +141,7 @@ apisix_etcd_reachable 1
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -198,7 +198,7 @@ passed
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -206,7 +206,7 @@
qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="127.0.
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_latency_count\{type="request",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_http_latency_count\{type="request",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -277,7 +277,7 @@ passed
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="2",service="1",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="2",service="1",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -394,7 +394,7 @@ passed
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="404",route="3",matched_uri="\/hello3",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
2/
+qr/apisix_http_status\{code="404",route="3",matched_uri="\/hello3",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
2/
diff --git a/t/plugin/prometheus2.t b/t/plugin/prometheus2.t
index 4ad771e78..9c19f0da2 100644
--- a/t/plugin/prometheus2.t
+++ b/t/plugin/prometheus2.t
@@ -134,7 +134,7 @@ passed
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="1",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -180,7 +180,7 @@ apikey: auth-one
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="200",route="1",matched_uri="\/hello",matched_host="",service="",consumer="jack",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_http_status\{code="200",route="1",matched_uri="\/hello",matched_host="",service="",consumer="jack",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -256,7 +256,7 @@ GET /not_found
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="404",route="",matched_uri="",matched_host="",service="",consumer="",node="",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_http_status\{code="404",route="",matched_uri="",matched_host="",service="",consumer="",node="",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -275,7 +275,7 @@ qr/404 Not Found/
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="404",route="9",matched_uri="\/foo\*",matched_host="foo.com",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_http_status\{code="404",route="9",matched_uri="\/foo\*",matched_host="foo.com",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -294,7 +294,7 @@ qr/404 Not Found/
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="404",route="9",matched_uri="\/bar\*",matched_host="bar.com",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_http_status\{code="404",route="9",matched_uri="\/bar\*",matched_host="bar.com",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -767,7 +767,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="route_name",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="route_name",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -810,7 +810,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="route_name",service="1",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="route_name",service="1",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -873,7 +873,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="1",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="1",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
@@ -917,7 +917,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="1",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="1",service="service_name",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
\d+/
diff --git a/t/plugin/prometheus3.t b/t/plugin/prometheus3.t
index e55f7b503..0440b1e84 100644
--- a/t/plugin/prometheus3.t
+++ b/t/plugin/prometheus3.t
@@ -270,4 +270,4 @@ opentracing
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="200",route="1",matched_uri="\/opentracing",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model=""\}
1/
+qr/apisix_http_status\{code="200",route="1",matched_uri="\/opentracing",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model=""\}
1/
diff --git a/t/plugin/prometheus4.t b/t/plugin/prometheus4.t
index 8e2192f62..160a13d5b 100644
--- a/t/plugin/prometheus4.t
+++ b/t/plugin/prometheus4.t
@@ -121,7 +121,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_bandwidth\{type="egress",route="10",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",upstream_addr="127.0.0.1:1980",upstream_status="200"\}
\d+/
+qr/apisix_bandwidth\{type="egress",route="10",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",upstream_addr="127.0.0.1:1980",upstream_status="200"\}
\d+/
@@ -143,7 +143,7 @@ GET /hello
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_status\{code="200",route="10",matched_uri="\/hello",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",dummy=""\}
\d+/
+qr/apisix_http_status\{code="200",route="10",matched_uri="\/hello",matched_host="",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",dummy=""\}
\d+/
@@ -195,11 +195,11 @@ plugin_attr:
--- request
GET /apisix/prometheus/metrics
--- response_body eval
-qr/apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",le="15"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",le="55"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",le="105"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",le="205"\}
\d+
-apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",llm_model="",le="505"\}
\d+/
+qr/apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",le="15"\}
\d+
+apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",le="55"\}
\d+
+apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",le="105"\}
\d+
+apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",le="205"\}
\d+
+apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",request_type="traditional_http",request_llm_model="",llm_model="",le="505"\}
\d+/