somandal commented on code in PR #16723: URL: https://github.com/apache/pinot/pull/16723#discussion_r2317089975
########## pinot-common/src/main/java/org/apache/pinot/common/audit/AuditUrlPathFilter.java: ########## @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.audit; + +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.Paths; +import java.util.Arrays; +import javax.inject.Singleton; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * URL path filter utility that uses Java's PathMatcher with glob patterns + * to determine if a URL path should be excluded from processing. + * + * This class provides powerful pattern matching capabilities including: + * - Wildcards: * (within path segment), ** (across path segments), ? (single character) + * - Character sets: [abc], [a-z], [!abc] + * - Grouping: {api,v1,v2} + * + * Examples: + * - health - exact match + * - api/* - matches api/users but not api/v1/users + * - api/** - matches api/users and api/v1/users + * - api/v[12]/users - matches api/v1/users and api/v2/users + * - api/{users,groups}/list - matches api/users/list and api/groups/list + */ +@Singleton +public class AuditUrlPathFilter { + private static final Logger LOG = LoggerFactory.getLogger(AuditUrlPathFilter.class); + private static final String PREFIX_GLOB = "glob:"; + private static final String PREFIX_REGEX = "regex:"; + + private static boolean testPattern(Path path, String pattern) { Review Comment: nit: let's rename this? maybe to something like `matches`? ########## pinot-common/src/main/java/org/apache/pinot/common/audit/AuditUrlPathFilter.java: ########## @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.audit; + +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.Paths; +import java.util.Arrays; +import javax.inject.Singleton; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * URL path filter utility that uses Java's PathMatcher with glob patterns + * to determine if a URL path should be excluded from processing. + * + * This class provides powerful pattern matching capabilities including: + * - Wildcards: * (within path segment), ** (across path segments), ? (single character) + * - Character sets: [abc], [a-z], [!abc] + * - Grouping: {api,v1,v2} + * + * Examples: + * - health - exact match + * - api/* - matches api/users but not api/v1/users + * - api/** - matches api/users and api/v1/users + * - api/v[12]/users - matches api/v1/users and api/v2/users + * - api/{users,groups}/list - matches api/users/list and api/groups/list + */ +@Singleton +public class AuditUrlPathFilter { + private static final Logger LOG = LoggerFactory.getLogger(AuditUrlPathFilter.class); + private static final String PREFIX_GLOB = "glob:"; + private static final String PREFIX_REGEX = "regex:"; + + private static boolean testPattern(Path path, String pattern) { + try { + String globPattern = pattern; + if (!globPattern.startsWith(PREFIX_GLOB) && !globPattern.startsWith(PREFIX_REGEX)) { + globPattern = PREFIX_GLOB + globPattern; + } + + PathMatcher matcher = FileSystems.getDefault().getPathMatcher(globPattern); + if (matcher.matches(path)) { + return true; + } + } catch (Exception e) { + LOG.error("Invalid pattern '{}', skipping", pattern, e); Review Comment: nit: make this `warn` instead of `error` ########## pinot-common/src/main/java/org/apache/pinot/common/audit/AuditUrlPathFilter.java: ########## @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.audit; + +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.Paths; +import java.util.Arrays; +import javax.inject.Singleton; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * URL path filter utility that uses Java's PathMatcher with glob patterns + * to determine if a URL path should be excluded from processing. + * + * This class provides powerful pattern matching capabilities including: + * - Wildcards: * (within path segment), ** (across path segments), ? (single character) + * - Character sets: [abc], [a-z], [!abc] + * - Grouping: {api,v1,v2} + * + * Examples: + * - health - exact match + * - api/* - matches api/users but not api/v1/users + * - api/** - matches api/users and api/v1/users + * - api/v[12]/users - matches api/v1/users and api/v2/users + * - api/{users,groups}/list - matches api/users/list and api/groups/list Review Comment: nit: update java docs to mention regex pattern is allowed provided regex prefix is set in the pattern? ########## pinot-common/src/main/java/org/apache/pinot/common/audit/AuditConfigManager.java: ########## @@ -121,40 +58,18 @@ private static <T> T mapPrefixedConfigToObject(Map<String, String> clusterConfig return AuditLogger.OBJECT_MAPPER.convertValue(subsetConfig.toMap(), configClass); } - /** Review Comment: was the removal of all the java docs for methods intentional? ########## pinot-common/src/main/java/org/apache/pinot/common/audit/AuditUrlPathFilter.java: ########## @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.audit; + +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.Paths; +import java.util.Arrays; +import javax.inject.Singleton; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * URL path filter utility that uses Java's PathMatcher with glob patterns + * to determine if a URL path should be excluded from processing. + * + * This class provides powerful pattern matching capabilities including: + * - Wildcards: * (within path segment), ** (across path segments), ? (single character) + * - Character sets: [abc], [a-z], [!abc] + * - Grouping: {api,v1,v2} + * + * Examples: + * - health - exact match + * - api/* - matches api/users but not api/v1/users + * - api/** - matches api/users and api/v1/users + * - api/v[12]/users - matches api/v1/users and api/v2/users + * - api/{users,groups}/list - matches api/users/list and api/groups/list + */ +@Singleton +public class AuditUrlPathFilter { + private static final Logger LOG = LoggerFactory.getLogger(AuditUrlPathFilter.class); + private static final String PREFIX_GLOB = "glob:"; + private static final String PREFIX_REGEX = "regex:"; + + private static boolean testPattern(Path path, String pattern) { + try { + String globPattern = pattern; + if (!globPattern.startsWith(PREFIX_GLOB) && !globPattern.startsWith(PREFIX_REGEX)) { Review Comment: i see that regex is also supported, should we take this as a config option rather than hardcoding to glob always if not specified? or is the intent to always use glob but just provide regex as an override option? ########## pinot-common/src/test/java/org/apache/pinot/common/audit/AuditUrlPathFilterTest.java: ########## @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.audit; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.assertj.core.api.Assertions.assertThat; + + +/** + * Unit tests for {@link AuditUrlPathFilter}. + * Tests the filter's delegation to PathMatcher and its input handling logic. + */ +public class AuditUrlPathFilterTest { + + private AuditUrlPathFilter _filter; + + @BeforeMethod + public void setUp() { + _filter = new AuditUrlPathFilter(); + } + + // ===== Input Validation Tests ===== + + @Test + public void testNullUrlPath() { + assertThat(_filter.isExcluded(null, "health")).isFalse(); + } + + @Test + public void testEmptyUrlPath() { + assertThat(_filter.isExcluded("", "health")).isFalse(); + assertThat(_filter.isExcluded(" ", "health")).isFalse(); + } + + @Test + public void testNullExcludePatterns() { + assertThat(_filter.isExcluded("api/users", null)).isFalse(); + } + + @Test + public void testEmptyExcludePatterns() { + assertThat(_filter.isExcluded("api/users", "")).isFalse(); + assertThat(_filter.isExcluded("api/users", " ")).isFalse(); + } + + @Test + public void testBothParametersBlank() { + assertThat(_filter.isExcluded(null, null)).isFalse(); + assertThat(_filter.isExcluded("", "")).isFalse(); + } + + // ===== Multiple Pattern Processing Tests ===== + + @Test + public void testMultiplePatternsCommaSeparated() { + String patterns = "health,api/users,admin"; + + assertThat(_filter.isExcluded("health", patterns)).isTrue(); + assertThat(_filter.isExcluded("api/users", patterns)).isTrue(); + assertThat(_filter.isExcluded("admin", patterns)).isTrue(); + assertThat(_filter.isExcluded("metrics", patterns)).isFalse(); + } + + @Test + public void testMultiplePatternsWithTrimmingAndEmptyElements() { + String patterns = " health , , api/users , , "; + + assertThat(_filter.isExcluded("health", patterns)).isTrue(); + assertThat(_filter.isExcluded("api/users", patterns)).isTrue(); + assertThat(_filter.isExcluded("metrics", patterns)).isFalse(); + } + + @Test + public void testAnyPatternMatchesReturnsTrue() { + String patterns = "nonexistent1,health,nonexistent2"; + + assertThat(_filter.isExcluded("health", patterns)).isTrue(); + assertThat(_filter.isExcluded("nonexistent1", patterns)).isTrue(); + assertThat(_filter.isExcluded("other", patterns)).isFalse(); + } + + // ===== Prefix Handling Tests ===== + + @Test + public void testAutomaticGlobPrefixAddition() { + assertThat(_filter.isExcluded("health", "health")).isTrue(); + assertThat(_filter.isExcluded("api/users", "api/*")).isTrue(); + } + + @Test + public void testExplicitGlobPrefix() { + assertThat(_filter.isExcluded("health", "glob:health")).isTrue(); + assertThat(_filter.isExcluded("api/users", "glob:api/*")).isTrue(); + } + + @Test + public void testExplicitRegexPrefix() { + String pattern = "regex:api/v[0-9]+/.*"; + assertThat(_filter.isExcluded("api/v1/users", pattern)).isTrue(); + assertThat(_filter.isExcluded("api/v123/anything", pattern)).isTrue(); + assertThat(_filter.isExcluded("api/va/users", pattern)).isFalse(); + } + + @Test + public void testMixedPrefixes() { + String patterns = "glob:health,regex:api/v[0-9]+/.*,admin"; + + assertThat(_filter.isExcluded("health", patterns)).isTrue(); + assertThat(_filter.isExcluded("api/v1/users", patterns)).isTrue(); + assertThat(_filter.isExcluded("admin", patterns)).isTrue(); + } + + // ===== Error Handling Tests ===== + + @Test + public void testInvalidPatternIsSkipped() { + String patterns = "api/v[123,health,{unclosed"; + + assertThat(_filter.isExcluded("health", patterns)).isTrue(); + assertThat(_filter.isExcluded("api/v1", patterns)).isFalse(); + } + + @Test + public void testInvalidPathHandling() { + String invalidPath = "path\0with\0nulls"; + assertThat(_filter.isExcluded(invalidPath, "health")).isFalse(); + } + + @Test + public void testAllInvalidPatternsReturnFalse() { + String patterns = "[unclosed,{unclosed,\\invalid"; + + assertThat(_filter.isExcluded("anything", patterns)).isFalse(); + } + + // ===== Integration Test ===== + + @Test + public void testBasicIntegrationScenario() { + String patterns = "health,api/*,admin/**"; Review Comment: nit: why is this marked as an integration test? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
