singhpk234 commented on code in PR #3823:
URL: https://github.com/apache/polaris/pull/3823#discussion_r2834808079
##########
polaris-core/src/main/java/org/apache/polaris/core/storage/aws/AwsSessionTagsBuilder.java:
##########
@@ -40,63 +41,25 @@ private AwsSessionTagsBuilder() {
}
/**
- * Builds a list of AWS STS session tags from the principal name and
credential vending context.
- * These tags will appear in CloudTrail events for correlation purposes.
+ * Builds a list of AWS STS session tags from the principal name and
credential vending context,
+ * including only the fields specified in {@code enabledFields}.
*
- * <p>The trace ID tag is only included if {@link
CredentialVendingContext#traceId()} is present.
- * This is controlled at the source (StorageAccessConfigProvider) based on
the
- * INCLUDE_TRACE_ID_IN_SESSION_TAGS feature flag.
+ * <p>Only fields present in {@code enabledFields} will be included in the
returned list. The
+ * trace ID tag is only included if {@link SessionTagField#TRACE_ID} is in
{@code enabledFields}
+ * and {@link CredentialVendingContext#traceId()} is present (the context's
traceId is populated
+ * by the caller based on configuration).
*
* @param principalName the name of the principal requesting credentials
- * @param context the credential vending context containing catalog,
namespace, table, roles, and
- * optionally trace ID
+ * @param context the credential vending context
+ * @param enabledFields the set of {@link SessionTagField}s to include
* @return a list of STS Tags to attach to the AssumeRole request
*/
- public static List<Tag> buildSessionTags(String principalName,
CredentialVendingContext context) {
- List<Tag> tags = new ArrayList<>();
-
- // Always include all tags with "unknown" placeholder for missing values
- // This ensures consistent tag presence in CloudTrail for correlation
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_PRINCIPAL)
- .value(truncateTagValue(principalName))
- .build());
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_ROLES)
-
.value(truncateTagValue(context.activatedRoles().orElse(TAG_VALUE_UNKNOWN)))
- .build());
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_CATALOG)
-
.value(truncateTagValue(context.catalogName().orElse(TAG_VALUE_UNKNOWN)))
- .build());
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_NAMESPACE)
-
.value(truncateTagValue(context.namespace().orElse(TAG_VALUE_UNKNOWN)))
- .build());
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_TABLE)
-
.value(truncateTagValue(context.tableName().orElse(TAG_VALUE_UNKNOWN)))
- .build());
-
- // Only include trace ID if it's present in the context.
- // The context's traceId is only populated when
INCLUDE_TRACE_ID_IN_SESSION_TAGS is enabled.
- // This allows efficient credential caching when trace IDs are not needed
in session tags.
- context
- .traceId()
- .ifPresent(
- traceId ->
- tags.add(
- Tag.builder()
- .key(CredentialVendingContext.TAG_KEY_TRACE_ID)
- .value(truncateTagValue(traceId))
- .build()));
-
- return tags;
+ public static List<Tag> buildSessionTags(
+ String principalName, CredentialVendingContext context,
Set<SessionTagField> enabledFields) {
+ return enabledFields.stream()
+ .map(field -> field.buildTag(principalName, context))
+ .flatMap(java.util.Optional::stream)
Review Comment:
nit: inline import, can we please remove
##########
polaris-core/src/main/java/org/apache/polaris/core/storage/aws/SessionTagField.java:
##########
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.polaris.core.storage.aws;
+
+import java.util.Optional;
+import org.apache.polaris.core.storage.CredentialVendingContext;
+import software.amazon.awssdk.services.sts.model.Tag;
+
+/**
+ * Enum representing the supported fields that can be included as AWS STS
session tags in credential
+ * vending. Each value knows its tag key and how to extract its value from a
{@link
+ * CredentialVendingContext}.
+ *
+ * <p>These tags appear in CloudTrail events for correlation between catalog
operations and S3 data
+ * access. Configure via {@code SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL} using
the {@link #configName}
+ * of each desired field.
+ */
+public enum SessionTagField {
+ REALM("realm", CredentialVendingContext.TAG_KEY_REALM) {
+ @Override
+ public Optional<Tag> buildTag(String principalName,
CredentialVendingContext context) {
+ return
Optional.of(tag(context.realm().orElse(AwsSessionTagsBuilder.TAG_VALUE_UNKNOWN)));
+ }
+ },
+ CATALOG("catalog", CredentialVendingContext.TAG_KEY_CATALOG) {
+ @Override
+ public Optional<Tag> buildTag(String principalName,
CredentialVendingContext context) {
+ return Optional.of(
+
tag(context.catalogName().orElse(AwsSessionTagsBuilder.TAG_VALUE_UNKNOWN)));
+ }
+ },
+ NAMESPACE("namespace", CredentialVendingContext.TAG_KEY_NAMESPACE) {
+ @Override
+ public Optional<Tag> buildTag(String principalName,
CredentialVendingContext context) {
+ return
Optional.of(tag(context.namespace().orElse(AwsSessionTagsBuilder.TAG_VALUE_UNKNOWN)));
+ }
+ },
+ TABLE("table", CredentialVendingContext.TAG_KEY_TABLE) {
+ @Override
+ public Optional<Tag> buildTag(String principalName,
CredentialVendingContext context) {
+ return
Optional.of(tag(context.tableName().orElse(AwsSessionTagsBuilder.TAG_VALUE_UNKNOWN)));
+ }
+ },
Review Comment:
not related to pr : what do we wann do about views ?
##########
polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java:
##########
@@ -91,31 +91,52 @@ public static void enforceFeatureEnabledOrThrow(
.defaultValue(false)
.buildFeatureConfiguration();
- public static final FeatureConfiguration<Boolean>
INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL =
- PolarisConfiguration.<Boolean>builder()
- .key("INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL")
- .description(
- "If set to true, session tags (catalog, namespace, table,
principal, roles) will be included\n"
- + "in AWS STS AssumeRole requests for credential vending.
These tags appear in CloudTrail events,\n"
- + "enabling correlation between catalog operations and S3
data access.\n"
- + "Requires the IAM role trust policy to allow
sts:TagSession action.\n"
- + "Note that enabling this feature may lead to degradation
in temporary credential caching as \n"
- + "catalog will no longer be able to reuse credentials for
different tables/namespaces/roles.")
- .defaultValue(false)
- .buildFeatureConfiguration();
+ /**
+ * The set of fields that are supported as AWS STS session tag labels in
credential vending.
+ *
+ * <p>Supported values:
+ *
+ * <ul>
+ * <li>{@code realm} - The realm identifier for the request
+ * <li>{@code catalog} - The name of the catalog vending credentials
+ * <li>{@code namespace} - The namespace being accessed (dot-separated)
+ * <li>{@code table} - The table name being accessed
+ * <li>{@code principal} - The principal name requesting credentials
+ * <li>{@code roles} - Comma-separated list of activated principal roles
+ * <li>{@code trace_id} - OpenTelemetry trace ID (WARNING: disables
credential caching)
+ * </ul>
+ */
+ public static final List<String> SUPPORTED_SESSION_TAG_FIELDS =
+ List.<String>of("realm", "catalog", "namespace", "table", "principal",
"roles", "trace_id");
- public static final FeatureConfiguration<Boolean>
INCLUDE_TRACE_ID_IN_SESSION_TAGS =
- PolarisConfiguration.<Boolean>builder()
- .key("INCLUDE_TRACE_ID_IN_SESSION_TAGS")
+ /**
+ * The config name for the trace ID session tag field. Included as a
constant because callers need
+ * to check for its presence specifically — enabling it populates the trace
ID into the credential
+ * vending context, which disables credential caching.
+ */
+ public static final String SESSION_TAG_FIELD_TRACE_ID = "trace_id";
+
+ public static final FeatureConfiguration<List<String>>
SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL =
+ PolarisConfiguration.<List<String>>builder()
+ .key("SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL")
.description(
- "If set to true (and
INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL is also true), the OpenTelemetry\n"
- + "trace ID will be included as a session tag in AWS STS
AssumeRole requests. This enables\n"
- + "end-to-end correlation between catalog operations
(Polaris events), credential vending (CloudTrail),\n"
- + "and metrics reports from compute engines.\n"
- + "WARNING: Enabling this feature completely disables
credential caching because every request\n"
+ "A comma-separated list of fields to include as session tags in
AWS STS AssumeRole requests\n"
+ + "for credential vending. These tags appear in CloudTrail
events, enabling correlation between\n"
+ + "catalog operations and S3 data access. An empty list
(default) disables session tags entirely.\n"
+ + "Requires the IAM role trust policy to allow
sts:TagSession action.\n"
+ + "\n"
+ + "Supported fields: "
+ + String.join(", ", SUPPORTED_SESSION_TAG_FIELDS)
+ + "\n"
+ + "\n"
+ + "Note: each additional field may contribute to AWS STS
packed policy size (max 2048 characters).\n"
+ + "Fields with long values (e.g. deeply nested namespaces)
can cause STS policy size limit errors.\n"
+ + "Choose only the fields needed for your CloudTrail
correlation requirements.\n"
+ + "\n"
+ + "WARNING: Including 'trace_id' completely disables
credential caching because every request\n"
+ "has a unique trace ID. This may significantly increase
latency and STS API costs.\n"
- + "Consider leaving this disabled unless end-to-end tracing
correlation is critical.")
- .defaultValue(false)
+ + "Fields that vary per-request (table, namespace, roles)
degrade caching but do not disable it.")
Review Comment:
i didn't get this statement, like we anways can't reuse the creds vended for
a role / namespace / table anyways right ?
##########
polaris-core/src/main/java/org/apache/polaris/core/config/FeatureConfiguration.java:
##########
@@ -91,31 +91,52 @@ public static void enforceFeatureEnabledOrThrow(
.defaultValue(false)
.buildFeatureConfiguration();
- public static final FeatureConfiguration<Boolean>
INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL =
- PolarisConfiguration.<Boolean>builder()
- .key("INCLUDE_SESSION_TAGS_IN_SUBSCOPED_CREDENTIAL")
- .description(
- "If set to true, session tags (catalog, namespace, table,
principal, roles) will be included\n"
- + "in AWS STS AssumeRole requests for credential vending.
These tags appear in CloudTrail events,\n"
- + "enabling correlation between catalog operations and S3
data access.\n"
- + "Requires the IAM role trust policy to allow
sts:TagSession action.\n"
- + "Note that enabling this feature may lead to degradation
in temporary credential caching as \n"
- + "catalog will no longer be able to reuse credentials for
different tables/namespaces/roles.")
- .defaultValue(false)
- .buildFeatureConfiguration();
+ /**
+ * The set of fields that are supported as AWS STS session tag labels in
credential vending.
+ *
+ * <p>Supported values:
+ *
+ * <ul>
+ * <li>{@code realm} - The realm identifier for the request
+ * <li>{@code catalog} - The name of the catalog vending credentials
+ * <li>{@code namespace} - The namespace being accessed (dot-separated)
+ * <li>{@code table} - The table name being accessed
+ * <li>{@code principal} - The principal name requesting credentials
+ * <li>{@code roles} - Comma-separated list of activated principal roles
+ * <li>{@code trace_id} - OpenTelemetry trace ID (WARNING: disables
credential caching)
+ * </ul>
+ */
+ public static final List<String> SUPPORTED_SESSION_TAG_FIELDS =
+ List.<String>of("realm", "catalog", "namespace", "table", "principal",
"roles", "trace_id");
- public static final FeatureConfiguration<Boolean>
INCLUDE_TRACE_ID_IN_SESSION_TAGS =
- PolarisConfiguration.<Boolean>builder()
- .key("INCLUDE_TRACE_ID_IN_SESSION_TAGS")
+ /**
+ * The config name for the trace ID session tag field. Included as a
constant because callers need
+ * to check for its presence specifically — enabling it populates the trace
ID into the credential
+ * vending context, which disables credential caching.
Review Comment:
it technically doesn't disables this, it just reduces the cache hit right, i
wonder if there is a better way to put it
##########
polaris-core/src/test/java/org/apache/polaris/service/storage/aws/AwsCredentialsStorageIntegrationTest.java:
##########
@@ -1475,6 +1492,93 @@ public void testSessionTagsWithEmptyContext() {
.noneMatch(tag -> tag.key().equals("polaris:trace_id"));
}
+ @Test
+ public void testRealmSessionTagIncludedWhenConfigured() {
+ StsClient stsClient = Mockito.mock(StsClient.class);
+ String roleARN = "arn:aws:iam::012345678901:role/jdoe";
+ String externalId = "externalId";
+ String bucket = "bucket";
+ String warehouseKeyPrefix = "path/to/warehouse";
+
+ ArgumentCaptor<AssumeRoleRequest> requestCaptor =
+ ArgumentCaptor.forClass(AssumeRoleRequest.class);
+
Mockito.when(stsClient.assumeRole(requestCaptor.capture())).thenReturn(ASSUME_ROLE_RESPONSE);
+
+ CredentialVendingContext context =
+ CredentialVendingContext.builder()
+ .realm(Optional.of("my-realm"))
+ .catalogName(Optional.of("test-catalog"))
+ .namespace(Optional.of("db.schema"))
+ .tableName(Optional.of("my_table"))
+ .activatedRoles(Optional.of("admin"))
+ .build();
+
+ new AwsCredentialsStorageIntegration(
+ AwsStorageConfigurationInfo.builder()
+ .addAllowedLocation(s3Path(bucket, warehouseKeyPrefix))
+ .roleARN(roleARN)
+ .externalId(externalId)
+ .build(),
+ stsClient)
+ .getSubscopedCreds(
+ SESSION_TAGS_WITH_REALM_CONFIG,
+ true,
+ Set.of(s3Path(bucket, warehouseKeyPrefix)),
+ Set.of(s3Path(bucket, warehouseKeyPrefix)),
+ POLARIS_PRINCIPAL,
+ Optional.empty(),
+ context);
+
+ AssumeRoleRequest capturedRequest = requestCaptor.getValue();
+ // 6 tags: realm + catalog + namespace + table + principal + roles
+ Assertions.assertThat(capturedRequest.tags()).hasSize(6);
+ Assertions.assertThat(capturedRequest.tags())
+ .anyMatch(tag -> tag.key().equals("polaris:realm") &&
tag.value().equals("my-realm"));
+ Assertions.assertThat(capturedRequest.tags())
+ .anyMatch(tag -> tag.key().equals("polaris:catalog") &&
tag.value().equals("test-catalog"));
+
Assertions.assertThat(capturedRequest.transitiveTagKeys()).contains("polaris:realm");
+ }
+
+ @Test
+ public void testRealmSessionTagNotIncludedWhenNotConfigured() {
+ StsClient stsClient = Mockito.mock(StsClient.class);
+ String roleARN = "arn:aws:iam::012345678901:role/jdoe";
+ String externalId = "externalId";
+ String bucket = "bucket";
+ String warehouseKeyPrefix = "path/to/warehouse";
+
+ ArgumentCaptor<AssumeRoleRequest> requestCaptor =
+ ArgumentCaptor.forClass(AssumeRoleRequest.class);
+
Mockito.when(stsClient.assumeRole(requestCaptor.capture())).thenReturn(ASSUME_ROLE_RESPONSE);
+
+ CredentialVendingContext context =
+ CredentialVendingContext.builder()
+ .realm(Optional.of("my-realm"))
+ .catalogName(Optional.of("test-catalog"))
+ .build();
+
+ // SESSION_TAGS_ENABLED_CONFIG does NOT include "realm" in the field list
+ new AwsCredentialsStorageIntegration(
+ AwsStorageConfigurationInfo.builder()
+ .addAllowedLocation(s3Path(bucket, warehouseKeyPrefix))
+ .roleARN(roleARN)
+ .externalId(externalId)
+ .build(),
+ stsClient)
+ .getSubscopedCreds(
+ SESSION_TAGS_ENABLED_CONFIG,
+ true,
+ Set.of(s3Path(bucket, warehouseKeyPrefix)),
+ Set.of(s3Path(bucket, warehouseKeyPrefix)),
+ POLARIS_PRINCIPAL,
+ Optional.empty(),
+ context);
Review Comment:
minor: can we reuse code from both the fuction i believe we just different
in credential vending context right and then we can just send
credentialVendingContext in the input and get back the requestCaptor so we can
do the assertions
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]