atris commented on code in PR #10897: URL: https://github.com/apache/pinot/pull/10897#discussion_r1236485819
########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,24 +72,56 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); - sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + likePattern = likePattern.substring(start, end); + return escapeMetaCharsAndWildcards(likePattern, prefix, suffix); + } + /** + * Escapes the provided pattern by considering the following constraints: + * <ul> + * <li> SQL wildcards escaping is handled (_, %) </li> + * <li> Regex meta characters escaping is handled </li> + * </ul> + * @param input the provided input string + * @param prefix the prefix to be added to the output string + * @param suffix the suffix to be added to the output string + * @return the final output string + */ + private static String escapeMetaCharsAndWildcards(String input, String prefix, String suffix) { + StringBuilder sb = new StringBuilder(); + sb.append(prefix); + // handling SQL wildcards (_, %) by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; + while (i < input.length()) { Review Comment: Nit: Cache this length locally ########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,24 +72,56 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); - sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + likePattern = likePattern.substring(start, end); + return escapeMetaCharsAndWildcards(likePattern, prefix, suffix); + } + /** + * Escapes the provided pattern by considering the following constraints: + * <ul> + * <li> SQL wildcards escaping is handled (_, %) </li> + * <li> Regex meta characters escaping is handled </li> + * </ul> + * @param input the provided input string + * @param prefix the prefix to be added to the output string + * @param suffix the suffix to be added to the output string + * @return the final output string + */ + private static String escapeMetaCharsAndWildcards(String input, String prefix, String suffix) { + StringBuilder sb = new StringBuilder(); + sb.append(prefix); + // handling SQL wildcards (_, %) by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; + while (i < input.length()) { + char c = input.charAt(i); if (c == '_') { - sb.replace(i, i + 1, "."); + sb.append(isPrevCharBackSlash ? c : "."); } else if (c == '%') { - sb.replace(i, i + 1, ".*"); - i++; + sb.append(isPrevCharBackSlash ? c : ".*"); + } else if (Chars.indexOf(REGEXP_METACHARACTERS, c) >= 0) { + sb.append(BACK_SLASH).append(c); + } else { + if (isPrevCharBackSlash) { + // this means the previous character is a \ + // but it was not used for escaping SQL wildcards + // so let's escape this \ in the output + // this case is separately handled outside of the meta characters list + sb.append(BACK_SLASH); + } + sb.append(c); } - i++; + isPrevCharBackSlash = (c == BACK_SLASH); + ++i; Review Comment: Nit: Why the pre increment? ########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,24 +72,56 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); - sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + likePattern = likePattern.substring(start, end); + return escapeMetaCharsAndWildcards(likePattern, prefix, suffix); + } + /** + * Escapes the provided pattern by considering the following constraints: + * <ul> + * <li> SQL wildcards escaping is handled (_, %) </li> + * <li> Regex meta characters escaping is handled </li> + * </ul> + * @param input the provided input string + * @param prefix the prefix to be added to the output string + * @param suffix the suffix to be added to the output string + * @return the final output string + */ + private static String escapeMetaCharsAndWildcards(String input, String prefix, String suffix) { + StringBuilder sb = new StringBuilder(); + sb.append(prefix); + // handling SQL wildcards (_, %) by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; + while (i < input.length()) { + char c = input.charAt(i); if (c == '_') { - sb.replace(i, i + 1, "."); + sb.append(isPrevCharBackSlash ? c : "."); } else if (c == '%') { - sb.replace(i, i + 1, ".*"); - i++; + sb.append(isPrevCharBackSlash ? c : ".*"); + } else if (Chars.indexOf(REGEXP_METACHARACTERS, c) >= 0) { Review Comment: Nit: I wonder if we can model this as a switch statement. The if branching hurts my eyes ########## pinot-common/src/main/java/org/apache/pinot/common/utils/RegexpPatternConverterUtils.java: ########## @@ -64,24 +71,42 @@ public static String likeToRegexpLike(String likePattern) { break; } - String escaped = escapeMetaCharacters(likePattern.substring(start, end)); - StringBuilder sb = new StringBuilder(escaped.length() + 2); + likePattern = likePattern.substring(start, end); + StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(escaped); - sb.append(suffix); + // handling SQL wildcards by replacing them with corresponding regex equivalents + // we ignore them if the SQL wildcards are escaped int i = 0; - while (i < sb.length()) { - char c = sb.charAt(i); + boolean isPrevCharBackSlash = false; + while (i < likePattern.length()) { + char c = likePattern.charAt(i); if (c == '_') { - sb.replace(i, i + 1, "."); + sb.append(isPrevCharBackSlash ? c : "."); } else if (c == '%') { - sb.replace(i, i + 1, ".*"); - i++; + sb.append(isPrevCharBackSlash ? c : ".*"); Review Comment: That should not impact doing the processing in a separate method. I would still advise moving to a separate method to ensure a higher degree of readability. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org