This is an automated email from the ASF dual-hosted git repository.
jerryshao pushed a commit to branch branch-1.2
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/branch-1.2 by this push:
new 8eeff9d98e [Cherry-pick to branch-1.2] [MINOR] fix(mcp-server):
URL-encode path segments in MCP REST client (#10799) (#10806)
8eeff9d98e is described below
commit 8eeff9d98e86a3a8e7a003623b38cd16e4df42b7
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Apr 20 10:09:39 2026 +0800
[Cherry-pick to branch-1.2] [MINOR] fix(mcp-server): URL-encode path
segments in MCP REST client (#10799) (#10806)
**Cherry-pick Information:**
- Original commit: d2268ac15d8e90cbdf929ccf2dde83e988751f64
- Target branch: `branch-1.2`
- Status: ✅ Clean cherry-pick (no conflicts)
Co-authored-by: Jerry Shao <[email protected]>
---
.../plain/plain_rest_client_catalog_operation.py | 7 +-
.../plain/plain_rest_client_fileset_operation.py | 17 +-
.../plain/plain_rest_client_job_operation.py | 28 +-
.../plain/plain_rest_client_model_operation.py | 30 +-
.../plain/plain_rest_client_policy_operation.py | 25 +-
.../plain/plain_rest_client_schema_operation.py | 8 +-
.../plain/plain_rest_client_statistic_operation.py | 21 +-
.../plain/plain_rest_client_table_operation.py | 14 +-
.../plain/plain_rest_client_tag_operation.py | 25 +-
.../plain/plain_rest_client_topic_operation.py | 10 +-
mcp-server/mcp_server/client/plain/utils.py | 6 +
.../unit/client/__init__.py} | 17 -
mcp-server/tests/unit/client/test_url_encoding.py | 384 +++++++++++++++++++++
13 files changed, 522 insertions(+), 70 deletions(-)
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
index a5c4a5ec67..843cf3bfe7 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
@@ -18,7 +18,10 @@
from httpx import AsyncClient
from mcp_server.client import CatalogOperation
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
class PlainRESTClientCatalogOperation(CatalogOperation):
@@ -28,6 +31,6 @@ class PlainRESTClientCatalogOperation(CatalogOperation):
async def get_list_of_catalogs(self) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/catalogs?details=true"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/catalogs?details=true"
)
return extract_content_from_response(response, "catalogs", [])
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_fileset_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_fileset_operation.py
index d0af2ebc9e..4c81eece1d 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_fileset_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_fileset_operation.py
@@ -16,6 +16,7 @@
# under the License.
from mcp_server.client.fileset_operation import FilesetOperation
+from mcp_server.client.plain.utils import encode_path_segment
class PlainRESTClientFilesetOperation(FilesetOperation):
@@ -27,7 +28,9 @@ class PlainRESTClientFilesetOperation(FilesetOperation):
self, catalog_name: str, schema_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/filesets"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}/filesets"
)
return response.json().get("identifiers", [])
@@ -35,7 +38,10 @@ class PlainRESTClientFilesetOperation(FilesetOperation):
self, catalog_name: str, schema_name: str, fileset_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/filesets/{fileset_name}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/filesets/{encode_path_segment(fileset_name)}"
)
return response.json().get("fileset", {})
@@ -49,7 +55,10 @@ class PlainRESTClientFilesetOperation(FilesetOperation):
sub_path: str = "/",
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}"
-
f"/filesets/{fileset_name}/files?sub_path={sub_path}&location_name={location_name}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/filesets/{encode_path_segment(fileset_name)}/files",
+ params={"sub_path": sub_path, "location_name": location_name},
)
return response.json().get("files", [])
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_job_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_job_operation.py
index 964bab841d..ddc35b9344 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_job_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_job_operation.py
@@ -19,7 +19,10 @@ from httpx import AsyncClient
from mcp_server.client.job_operation import JobOperation
from mcp_server.client.plain.exception import GravitinoException
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
class PlainRESTClientJobOperation(JobOperation):
@@ -29,33 +32,36 @@ class PlainRESTClientJobOperation(JobOperation):
async def get_job_by_id(self, job_id: str) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/jobs/runs/{job_id}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/runs/{encode_path_segment(job_id)}"
)
return extract_content_from_response(response, "job", {})
async def list_of_jobs(self, job_template_name: str) -> str:
- url = f"/api/metalakes/{self.metalake_name}/jobs/runs"
- if job_template_name:
- url += f"?jobTemplateName={job_template_name}"
-
- response = await self.rest_client.get(url)
+ response = await self.rest_client.get(
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/runs",
+ params=(
+ {"jobTemplateName": job_template_name}
+ if job_template_name
+ else {}
+ ),
+ )
return extract_content_from_response(response, "jobs", [])
async def get_job_template_by_name(self, name: str) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/jobs/templates/{name}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/templates/{encode_path_segment(name)}"
)
return extract_content_from_response(response, "jobTemplate", {})
async def list_of_job_templates(self) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/jobs/templates?details=true"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/templates?details=true"
)
return extract_content_from_response(response, "jobTemplates", [])
async def run_job(self, job_template_name: str, job_config: dict) -> str:
response = await self.rest_client.post(
- f"/api/metalakes/{self.metalake_name}/jobs/runs",
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/runs",
json={
"jobTemplateName": job_template_name,
"jobConf": job_config,
@@ -65,7 +71,7 @@ class PlainRESTClientJobOperation(JobOperation):
async def cancel_job(self, job_id: str) -> str:
response = await self.rest_client.post(
- f"/api/metalakes/{self.metalake_name}/jobs/runs/{job_id}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/jobs/runs/{encode_path_segment(job_id)}"
)
if response.status_code != 200:
raise GravitinoException(
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_model_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_model_operation.py
index 3419d4fb52..7fce30bd91 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_model_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_model_operation.py
@@ -16,6 +16,7 @@
# under the License.
from mcp_server.client import ModelOperation
+from mcp_server.client.plain.utils import encode_path_segment
class PlainRESTClientModelOperation(ModelOperation):
@@ -29,7 +30,9 @@ class PlainRESTClientModelOperation(ModelOperation):
async def list_of_models(self, catalog_name: str, schema_name: str) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}/models"
)
return response.json().get("identifiers", [])
@@ -37,7 +40,10 @@ class PlainRESTClientModelOperation(ModelOperation):
self, catalog_name: str, schema_name: str, model_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/models/{encode_path_segment(model_name)}"
)
return response.json().get("model", {})
@@ -45,8 +51,10 @@ class PlainRESTClientModelOperation(ModelOperation):
self, catalog_name: str, schema_name: str, model_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}"
- f"/versions?details=true"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/models/{encode_path_segment(model_name)}/versions?details=true"
)
return response.json().get("infos", [])
@@ -54,8 +62,11 @@ class PlainRESTClientModelOperation(ModelOperation):
self, catalog_name: str, schema_name: str, model_name: str, version:
int
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}"
- f"/versions/{version}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/models/{encode_path_segment(model_name)}"
+ f"/versions/{encode_path_segment(version)}"
)
return response.json().get("modelVersion", {})
@@ -63,7 +74,10 @@ class PlainRESTClientModelOperation(ModelOperation):
self, catalog_name: str, schema_name: str, model_name: str, alias: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/models/{model_name}/"
- f"aliases/{alias}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/models/{encode_path_segment(model_name)}"
+ f"/aliases/{encode_path_segment(alias)}"
)
return response.json().get("modelVersion", {})
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_policy_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_policy_operation.py
index 7312489861..a32663985a 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_policy_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_policy_operation.py
@@ -16,7 +16,10 @@
# under the License.
from mcp_server.client import PolicyOperation
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
class PlainRESTClientPolicyOperation(PolicyOperation):
@@ -30,7 +33,7 @@ class PlainRESTClientPolicyOperation(PolicyOperation):
async def get_list_of_policies(self) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/policies?details=true"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/policies?details=true"
)
return extract_content_from_response(response, "policies", [])
@@ -39,7 +42,7 @@ class PlainRESTClientPolicyOperation(PolicyOperation):
policy_name: str,
) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/policies/{policy_name}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/policies/{encode_path_segment(policy_name)}"
)
return extract_content_from_response(response, "policy", {})
@@ -51,7 +54,9 @@ class PlainRESTClientPolicyOperation(PolicyOperation):
policies_to_remove: list,
) -> str:
response = await self.rest_client.post(
-
f"/api/metalakes/{self.metalake_name}/objects/{metadata_type}/{metadata_full_name}/policies",
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_full_name)}/policies",
json={
"policiesToAdd": policies_to_add,
"policiesToRemove": policies_to_remove,
@@ -63,7 +68,10 @@ class PlainRESTClientPolicyOperation(PolicyOperation):
self, metadata_full_name: str, metadata_type: str, policy_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/objects/{metadata_type}/{metadata_full_name}/policies/{policy_name}",
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_full_name)}"
+ f"/policies/{encode_path_segment(policy_name)}",
)
return extract_content_from_response(response, "policy", {})
@@ -71,12 +79,15 @@ class PlainRESTClientPolicyOperation(PolicyOperation):
self, metadata_full_name: str, metadata_type: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/objects/{metadata_type}/{metadata_full_name}/policies?details=true",
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+
f"/{encode_path_segment(metadata_full_name)}/policies?details=true",
)
return extract_content_from_response(response, "policies", [])
async def list_metadata_by_policy(self, policy_name: str) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/policies/{policy_name}/objects"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/policies/{encode_path_segment(policy_name)}/objects"
)
return extract_content_from_response(response, "metadataObjects", [])
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_schema_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_schema_operation.py
index d209f822a5..4c2c76ed44 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_schema_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_schema_operation.py
@@ -18,7 +18,10 @@
from httpx import AsyncClient
from mcp_server.client import SchemaOperation
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
class PlainRESTClientSchemaOperation(SchemaOperation):
@@ -28,6 +31,7 @@ class PlainRESTClientSchemaOperation(SchemaOperation):
async def get_list_of_schemas(self, catalog_name: str) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}/schemas"
)
return extract_content_from_response(response, "identifiers", [])
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_statistic_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_statistic_operation.py
index 4417cd8b01..49a56036a5 100644
---
a/mcp-server/mcp_server/client/plain/plain_rest_client_statistic_operation.py
+++
b/mcp-server/mcp_server/client/plain/plain_rest_client_statistic_operation.py
@@ -15,7 +15,10 @@
# specific language governing permissions and limitations
# under the License.
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
from mcp_server.client.statistic_operation import StatisticOperation
@@ -28,7 +31,9 @@ class PlainRESTClientStatisticOperation(StatisticOperation):
self, metalake_name: str, metadata_type: str, metadata_fullname: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{metalake_name}/objects/{metadata_type}/{metadata_fullname}/statistics"
+ f"/api/metalakes/{encode_path_segment(metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_fullname)}/statistics"
)
return extract_content_from_response(response, "statistics", [])
@@ -44,9 +49,15 @@ class PlainRESTClientStatisticOperation(StatisticOperation):
to_inclusive: bool = False,
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{metalake_name}/objects/{metadata_type}/{metadata_fullname}/statistics/"
-
f"partitions?from={from_partition_name}&to={to_partition_name}&fromInclusive={from_inclusive}"
- f"&toInclusive={to_inclusive}"
+ f"/api/metalakes/{encode_path_segment(metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_fullname)}/statistics/partitions",
+ params={
+ "from": from_partition_name,
+ "to": to_partition_name,
+ "fromInclusive": from_inclusive,
+ "toInclusive": to_inclusive,
+ },
)
return extract_content_from_response(
response, "partitionStatistics", []
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_table_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_table_operation.py
index 806f01f388..e1bee62cab 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_table_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_table_operation.py
@@ -18,7 +18,10 @@
from httpx import AsyncClient
from mcp_server.client import TableOperation
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
class PlainRESTClientTableOperation(TableOperation):
@@ -31,7 +34,9 @@ class PlainRESTClientTableOperation(TableOperation):
self, catalog_name: str, schema_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/tables"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}/tables"
)
return extract_content_from_response(response, "identifiers", [])
@@ -39,6 +44,9 @@ class PlainRESTClientTableOperation(TableOperation):
self, catalog_name: str, schema_name: str, table_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/tables/{table_name}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/tables/{encode_path_segment(table_name)}"
)
return extract_content_from_response(response, "table", {})
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_tag_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_tag_operation.py
index 85bbc13c87..b5fabf9cb1 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_tag_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_tag_operation.py
@@ -16,7 +16,10 @@
# under the License.
from mcp_server.client.plain.exception import GravitinoException
-from mcp_server.client.plain.utils import extract_content_from_response
+from mcp_server.client.plain.utils import (
+ encode_path_segment,
+ extract_content_from_response,
+)
from mcp_server.client.tag_operation import TagOperation
@@ -27,13 +30,13 @@ class PlainRESTClientTagOperation(TagOperation):
async def list_of_tags(self) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/tags?details=true"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags?details=true"
)
return extract_content_from_response(response, "tags", [])
async def get_tag_by_name(self, tag_name: str) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/tags/{tag_name}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags/{encode_path_segment(tag_name)}"
)
return extract_content_from_response(response, "tag", {})
@@ -41,7 +44,7 @@ class PlainRESTClientTagOperation(TagOperation):
self, tag_name: str, tag_comment: str, tag_properties: dict
) -> str:
response = await self.rest_client.post(
- f"/api/metalakes/{self.metalake_name}/tags",
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags",
json={
"name": tag_name,
"comment": tag_comment,
@@ -52,7 +55,7 @@ class PlainRESTClientTagOperation(TagOperation):
async def alter_tag(self, tag_name: str, updates: list) -> str:
response = await self.rest_client.put(
- f"/api/metalakes/{self.metalake_name}/tags/{tag_name}",
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags/{encode_path_segment(tag_name)}",
json={
"updates": updates,
},
@@ -61,7 +64,7 @@ class PlainRESTClientTagOperation(TagOperation):
async def delete_tag(self, name: str) -> None:
response = await self.rest_client.delete(
- f"/api/metalakes/{self.metalake_name}/tags/{name}"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags/{encode_path_segment(name)}"
)
if response.status_code != 200:
raise GravitinoException(
@@ -77,7 +80,9 @@ class PlainRESTClientTagOperation(TagOperation):
tags_to_disassociate: list,
) -> str:
response = await self.rest_client.post(
-
f"/api/metalakes/{self.metalake_name}/objects/{metadata_type}/{metadata_full_name}/tags",
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_full_name)}/tags",
json={
"tagsToAdd": tags_to_associate,
"tagsToRemove": tags_to_disassociate,
@@ -89,12 +94,14 @@ class PlainRESTClientTagOperation(TagOperation):
self, metadata_full_name: str, metadata_type: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/objects/{metadata_type}/{metadata_full_name}/tags?details=true"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/objects/{encode_path_segment(metadata_type)}"
+ f"/{encode_path_segment(metadata_full_name)}/tags?details=true"
)
return extract_content_from_response(response, "names", [])
async def list_metadata_by_tag(self, tag_name: str) -> str:
response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/tags/{tag_name}/objects"
+
f"/api/metalakes/{encode_path_segment(self.metalake_name)}/tags/{encode_path_segment(tag_name)}/objects"
)
return extract_content_from_response(response, "metadataObjects", [])
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_topic_operation.py
b/mcp-server/mcp_server/client/plain/plain_rest_client_topic_operation.py
index af8f105ae8..ff04720197 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_topic_operation.py
+++ b/mcp-server/mcp_server/client/plain/plain_rest_client_topic_operation.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+from mcp_server.client.plain.utils import encode_path_segment
from mcp_server.client.topic_operation import TopicOperation
@@ -29,7 +30,9 @@ class PlainRESTClientTopicOperation(TopicOperation):
async def list_of_topics(self, catalog_name: str, schema_name: str) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/topics"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}/topics"
)
return response.json().get("identifiers", [])
@@ -37,6 +40,9 @@ class PlainRESTClientTopicOperation(TopicOperation):
self, catalog_name: str, schema_name: str, topic_name: str
) -> str:
response = await self.rest_client.get(
-
f"/api/metalakes/{self.metalake_name}/catalogs/{catalog_name}/schemas/{schema_name}/topics/{topic_name}"
+ f"/api/metalakes/{encode_path_segment(self.metalake_name)}"
+ f"/catalogs/{encode_path_segment(catalog_name)}"
+ f"/schemas/{encode_path_segment(schema_name)}"
+ f"/topics/{encode_path_segment(topic_name)}"
)
return response.json().get("topic", {})
diff --git a/mcp-server/mcp_server/client/plain/utils.py
b/mcp-server/mcp_server/client/plain/utils.py
index ab0d197229..e5c998a57a 100644
--- a/mcp-server/mcp_server/client/plain/utils.py
+++ b/mcp-server/mcp_server/client/plain/utils.py
@@ -17,10 +17,16 @@
import json
import logging
+from urllib.parse import quote
from mcp_server.client.plain.exception import GravitinoException
+def encode_path_segment(segment: str) -> str:
+ """URL-encode a single path segment, encoding all reserved characters."""
+ return quote(str(segment), safe="")
+
+
def extract_content_from_response(response, field: str, default="") -> str:
response_json = response.json()
_handle_gravitino_exception(response_json)
diff --git
a/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
b/mcp-server/tests/unit/client/__init__.py
similarity index 56%
copy from
mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
copy to mcp-server/tests/unit/client/__init__.py
index a5c4a5ec67..13a83393a9 100644
--- a/mcp-server/mcp_server/client/plain/plain_rest_client_catalog_operation.py
+++ b/mcp-server/tests/unit/client/__init__.py
@@ -14,20 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-from httpx import AsyncClient
-
-from mcp_server.client import CatalogOperation
-from mcp_server.client.plain.utils import extract_content_from_response
-
-
-class PlainRESTClientCatalogOperation(CatalogOperation):
- def __init__(self, metalake_name: str, rest_client: AsyncClient):
- self.metalake_name = metalake_name
- self.rest_client = rest_client
-
- async def get_list_of_catalogs(self) -> str:
- response = await self.rest_client.get(
- f"/api/metalakes/{self.metalake_name}/catalogs?details=true"
- )
- return extract_content_from_response(response, "catalogs", [])
diff --git a/mcp-server/tests/unit/client/test_url_encoding.py
b/mcp-server/tests/unit/client/test_url_encoding.py
new file mode 100644
index 0000000000..36eb7cb8df
--- /dev/null
+++ b/mcp-server/tests/unit/client/test_url_encoding.py
@@ -0,0 +1,384 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Tests that user-supplied identifiers are URL-encoded before being embedded
+in request paths, preventing path traversal and query injection attacks."""
+
+import asyncio
+import unittest
+from unittest.mock import AsyncMock, MagicMock
+
+from mcp_server.client.plain.plain_rest_client_catalog_operation import (
+ PlainRESTClientCatalogOperation,
+)
+from mcp_server.client.plain.plain_rest_client_fileset_operation import (
+ PlainRESTClientFilesetOperation,
+)
+from mcp_server.client.plain.plain_rest_client_job_operation import (
+ PlainRESTClientJobOperation,
+)
+from mcp_server.client.plain.plain_rest_client_model_operation import (
+ PlainRESTClientModelOperation,
+)
+from mcp_server.client.plain.plain_rest_client_policy_operation import (
+ PlainRESTClientPolicyOperation,
+)
+from mcp_server.client.plain.plain_rest_client_schema_operation import (
+ PlainRESTClientSchemaOperation,
+)
+from mcp_server.client.plain.plain_rest_client_statistic_operation import (
+ PlainRESTClientStatisticOperation,
+)
+from mcp_server.client.plain.plain_rest_client_table_operation import (
+ PlainRESTClientTableOperation,
+)
+from mcp_server.client.plain.plain_rest_client_tag_operation import (
+ PlainRESTClientTagOperation,
+)
+from mcp_server.client.plain.plain_rest_client_topic_operation import (
+ PlainRESTClientTopicOperation,
+)
+
+
+def _make_mock_client(response_json: dict):
+ """Return a mock httpx.AsyncClient whose HTTP methods return a fake
response."""
+ response = MagicMock()
+ response.status_code = 200
+ response.json.return_value = response_json
+
+ client = MagicMock()
+ client.get = AsyncMock(return_value=response)
+ client.post = AsyncMock(return_value=response)
+ client.put = AsyncMock(return_value=response)
+ client.delete = AsyncMock(return_value=response)
+ return client
+
+
+def _called_url(mock_method):
+ """Extract the positional URL argument from the first call of a mock
method."""
+ return mock_method.call_args[0][0]
+
+
+def _called_params(mock_method):
+ """Extract the 'params' keyword argument from the first call of a mock
method."""
+ return mock_method.call_args[1].get("params", {})
+
+
+# Payloads that must be encoded to prevent injection
+_PATH_TRAVERSAL = "../../admin/users"
+_QUERY_INJECTION = "name?admin=true#"
+_SLASH = "cat/schema"
+_ENCODED_PATH_TRAVERSAL = "..%2F..%2Fadmin%2Fusers"
+_ENCODED_QUERY_INJECTION = "name%3Fadmin%3Dtrue%23"
+_ENCODED_SLASH = "cat%2Fschema"
+
+METALAKE = "my_metalake"
+
+
+class TestCatalogOperationUrlEncoding(unittest.TestCase):
+ def test_get_list_of_catalogs_encodes_metalake(self):
+ client = _make_mock_client({"catalogs": []})
+ op = PlainRESTClientCatalogOperation(_PATH_TRAVERSAL, client)
+ asyncio.run(op.get_list_of_catalogs())
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+
+class TestSchemaOperationUrlEncoding(unittest.TestCase):
+ def test_get_list_of_schemas_encodes_catalog_name(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientSchemaOperation(METALAKE, client)
+ asyncio.run(op.get_list_of_schemas(_PATH_TRAVERSAL))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_get_list_of_schemas_encodes_query_injection(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientSchemaOperation(METALAKE, client)
+ asyncio.run(op.get_list_of_schemas(_QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+
+class TestTableOperationUrlEncoding(unittest.TestCase):
+ def test_get_list_of_tables_encodes_path_traversal(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientTableOperation(METALAKE, client)
+ asyncio.run(op.get_list_of_tables(_PATH_TRAVERSAL, "schema"))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_load_table_encodes_all_segments(self):
+ client = _make_mock_client({"table": {}})
+ op = PlainRESTClientTableOperation(METALAKE, client)
+ asyncio.run(op.load_table(_SLASH, _SLASH, _QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertEqual(url.count(_ENCODED_SLASH), 2)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+
+class TestModelOperationUrlEncoding(unittest.TestCase):
+ def test_list_of_models_encodes_path_traversal(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientModelOperation(METALAKE, client)
+ asyncio.run(op.list_of_models(_PATH_TRAVERSAL, "schema"))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_load_model_encodes_model_name(self):
+ client = _make_mock_client({"model": {}})
+ op = PlainRESTClientModelOperation(METALAKE, client)
+ asyncio.run(op.load_model("catalog", "schema", _QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_load_model_version_by_alias_encodes_alias(self):
+ client = _make_mock_client({"modelVersion": {}})
+ op = PlainRESTClientModelOperation(METALAKE, client)
+ asyncio.run(
+ op.load_model_version_by_alias(
+ "catalog", "schema", "model", _PATH_TRAVERSAL
+ )
+ )
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+
+class TestTopicOperationUrlEncoding(unittest.TestCase):
+ def test_list_of_topics_encodes_path_traversal(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientTopicOperation(METALAKE, client)
+ asyncio.run(op.list_of_topics(_PATH_TRAVERSAL, "schema"))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_load_topic_encodes_topic_name(self):
+ client = _make_mock_client({"topic": {}})
+ op = PlainRESTClientTopicOperation(METALAKE, client)
+ asyncio.run(op.load_topic("catalog", "schema", _QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+
+class TestFilesetOperationUrlEncoding(unittest.TestCase):
+ def test_list_of_filesets_encodes_path_traversal(self):
+ client = _make_mock_client({"identifiers": []})
+ op = PlainRESTClientFilesetOperation(METALAKE, client)
+ asyncio.run(op.list_of_filesets(_PATH_TRAVERSAL, "schema"))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_load_fileset_encodes_fileset_name(self):
+ client = _make_mock_client({"fileset": {}})
+ op = PlainRESTClientFilesetOperation(METALAKE, client)
+ asyncio.run(op.load_fileset("catalog", "schema", _QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_list_files_in_fileset_uses_params_for_query_args(self):
+ """sub_path and location_name must go through params=, not be
concatenated."""
+ client = _make_mock_client({"files": []})
+ op = PlainRESTClientFilesetOperation(METALAKE, client)
+ asyncio.run(
+ op.list_files_in_fileset(
+ "catalog",
+ "schema",
+ "fileset",
+ location_name="loc?inject=1",
+ sub_path="/some/path?inject=2",
+ )
+ )
+ url = _called_url(client.get)
+ params = _called_params(client.get)
+ # Injection payloads must NOT appear raw in the path
+ self.assertNotIn("inject=1", url)
+ self.assertNotIn("inject=2", url)
+ # They must be passed as structured params so httpx encodes them
+ self.assertIn("location_name", params)
+ self.assertIn("sub_path", params)
+
+
+class TestTagOperationUrlEncoding(unittest.TestCase):
+ def test_get_tag_by_name_encodes_tag_name(self):
+ client = _make_mock_client({"tag": {}})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(op.get_tag_by_name(_PATH_TRAVERSAL))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_alter_tag_encodes_tag_name(self):
+ client = _make_mock_client({"tag": {}})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(op.alter_tag(_QUERY_INJECTION, []))
+ url = _called_url(client.put)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_delete_tag_encodes_tag_name(self):
+ client = _make_mock_client({})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(op.delete_tag(_PATH_TRAVERSAL))
+ url = _called_url(client.delete)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_associate_tag_encodes_metadata_full_name_and_type(self):
+ client = _make_mock_client({"names": []})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(
+ op.associate_tag_with_metadata(_QUERY_INJECTION, _SLASH, [], [])
+ )
+ url = _called_url(client.post)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertIn(_ENCODED_SLASH, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_list_tags_for_metadata_encodes_metadata_full_name(self):
+ client = _make_mock_client({"names": []})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(op.list_tags_for_metadata(_PATH_TRAVERSAL, "table"))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_list_metadata_by_tag_encodes_tag_name(self):
+ client = _make_mock_client({"metadataObjects": []})
+ op = PlainRESTClientTagOperation(METALAKE, client)
+ asyncio.run(op.list_metadata_by_tag(_QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+
+class TestJobOperationUrlEncoding(unittest.TestCase):
+ def test_get_job_by_id_encodes_job_id(self):
+ client = _make_mock_client({"job": {}})
+ op = PlainRESTClientJobOperation(METALAKE, client)
+ asyncio.run(op.get_job_by_id(_PATH_TRAVERSAL))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_get_job_template_by_name_encodes_name(self):
+ client = _make_mock_client({"jobTemplate": {}})
+ op = PlainRESTClientJobOperation(METALAKE, client)
+ asyncio.run(op.get_job_template_by_name(_QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_list_of_jobs_uses_params_for_template_name(self):
+ """job_template_name is a query parameter and must not be concatenated
into the path."""
+ client = _make_mock_client({"jobs": []})
+ op = PlainRESTClientJobOperation(METALAKE, client)
+ asyncio.run(op.list_of_jobs(_QUERY_INJECTION))
+ url = _called_url(client.get)
+ params = _called_params(client.get)
+ self.assertNotIn("?admin=true", url)
+ self.assertIn("jobTemplateName", params)
+
+ def test_cancel_job_encodes_job_id(self):
+ client = _make_mock_client({"job": {}})
+ op = PlainRESTClientJobOperation(METALAKE, client)
+ asyncio.run(op.cancel_job(_PATH_TRAVERSAL))
+ url = _called_url(client.post)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+
+class TestPolicyOperationUrlEncoding(unittest.TestCase):
+ def test_load_policy_encodes_policy_name(self):
+ client = _make_mock_client({"policy": {}})
+ op = PlainRESTClientPolicyOperation(METALAKE, client)
+ asyncio.run(op.load_policy(_PATH_TRAVERSAL))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_associate_policy_encodes_metadata_full_name_and_type(self):
+ client = _make_mock_client({"names": []})
+ op = PlainRESTClientPolicyOperation(METALAKE, client)
+ asyncio.run(
+ op.associate_policy_with_metadata(_QUERY_INJECTION, _SLASH, [], [])
+ )
+ url = _called_url(client.post)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertIn(_ENCODED_SLASH, url)
+ self.assertNotIn("?admin=true", url)
+
+ def test_get_policy_for_metadata_encodes_policy_name(self):
+ client = _make_mock_client({"policy": {}})
+ op = PlainRESTClientPolicyOperation(METALAKE, client)
+ asyncio.run(
+ op.get_policy_for_metadata(
+ "meta.full.name", "table", _PATH_TRAVERSAL
+ )
+ )
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def test_list_metadata_by_policy_encodes_policy_name(self):
+ client = _make_mock_client({"metadataObjects": []})
+ op = PlainRESTClientPolicyOperation(METALAKE, client)
+ asyncio.run(op.list_metadata_by_policy(_QUERY_INJECTION))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_QUERY_INJECTION, url)
+ self.assertNotIn("?admin=true", url)
+
+
+class TestStatisticOperationUrlEncoding(unittest.TestCase):
+ def test_list_of_statistics_encodes_metadata_fullname(self):
+ client = _make_mock_client({"statistics": []})
+ op = PlainRESTClientStatisticOperation(METALAKE, client)
+ asyncio.run(op.list_of_statistics(METALAKE, "table", _PATH_TRAVERSAL))
+ url = _called_url(client.get)
+ self.assertIn(_ENCODED_PATH_TRAVERSAL, url)
+ self.assertNotIn("../../", url)
+
+ def
test_list_statistic_for_partition_uses_params_for_partition_names(self):
+ """Partition names are query parameters and must not be concatenated
into the path."""
+ client = _make_mock_client({"partitionStatistics": []})
+ op = PlainRESTClientStatisticOperation(METALAKE, client)
+ asyncio.run(
+ op.list_statistic_for_partition(
+ METALAKE,
+ "table",
+ "catalog.schema.table",
+ from_partition_name=_QUERY_INJECTION,
+ to_partition_name=_PATH_TRAVERSAL,
+ )
+ )
+ url = _called_url(client.get)
+ params = _called_params(client.get)
+ self.assertNotIn("?admin=true", url)
+ self.assertNotIn("../../", url)
+ self.assertIn("from", params)
+ self.assertIn("to", params)