chenboat commented on code in PR #10897: URL: https://github.com/apache/pinot/pull/10897#discussion_r1235876975
########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,27 +70,54 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); + likePattern = likePattern.substring(start, end); + StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + // handling SQL wildcards by replacing them with corresponding regex equivalents Review Comment: Can you be more specific on what characters are wildchards in SQL? Please give a complete list. ########## pinot-common/src/test/java/org/apache/pinot/common/utils/RegexpPatternConverterUtilsTest.java: ########## @@ -125,4 +125,27 @@ public void testTrailingSize2() { String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("z%"); assertEquals(regexpLikePattern, "^z"); } + + @Test + public void testEscapedWildcard1() { + String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("a\\_b_\\"); + assertEquals(regexpLikePattern, "^a\\_b.\\\\$"); + String luceneRegExpPattern = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePattern); + assertEquals(luceneRegExpPattern, "a\\_b.\\\\"); Review Comment: Ditto as above. ########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,27 +70,54 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); + likePattern = likePattern.substring(start, end); + StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + // handling SQL wildcards by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; Review Comment: Can we extract all codes below into a function about escape char handling? It is pretty long at the current form. ########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,27 +70,54 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); + likePattern = likePattern.substring(start, end); + StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + // handling SQL wildcards by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; + while (i < likePattern.length()) { + char c = likePattern.charAt(i); if (c == '_') { - sb.replace(i, i + 1, "."); + sb.append(isPrevCharBackSlash ? c : "."); } else if (c == '%') { - sb.replace(i, i + 1, ".*"); - i++; + sb.append(isPrevCharBackSlash ? c : ".*"); + } else if (indexOf(REGEXP_METACHARACTERS, c) >= 0) { + sb.append(BACK_SLASH).append(c); + } else { + if (isPrevCharBackSlash) { + // this means the previous character is a \ + // but it was not used for escaping SQL wildcards + // so let's escape this \ in the output + // this case is separately handled outside of the meta characters list + sb.append(BACK_SLASH); + } + sb.append(c); } - i++; + isPrevCharBackSlash = (c == BACK_SLASH); + ++i; + } + + // handle trailing \ + if (isPrevCharBackSlash) { + sb.append(BACK_SLASH); } + sb.append(suffix); return sb.toString(); } + private static int indexOf(char[] arr, char c) { Review Comment: why re-imple indexOf rather than using existing lib such as https://commons.apache.org/proper/commons-collections/apidocs/org/apache/commons/collections4/IterableUtils.html#find-java.lang.Iterable-org.apache.commons.collections4.Predicate- -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org