morrySnow commented on code in PR #49087: URL: https://github.com/apache/doris/pull/49087#discussion_r2000177596
########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first, IntegerLiteral second) { */ @ExecFunction(name = "right") public static Expression right(StringLikeLiteral first, IntegerLiteral second) { - if (second.getValue() < (- first.getValue().length()) || Math.abs(second.getValue()) == 0) { + int inputLength = first.getValue().offsetByCodePoints(0, first.getValue().length()); + if (second.getValue() < (- inputLength) || Math.abs(second.getValue()) == 0) { return castStringLikeLiteral(first, ""); - } else if (second.getValue() > first.getValue().length()) { + } else if (second.getValue() > inputLength) { return first; } else { + int index = first.getValue().offsetByCodePoints(0, second.getValue()); Review Comment: throw exception when `second.getValue < 0`. when `second.getValue < 0` should call `int index = first.getValue().offsetByCodePoints(inputLength, second.getValue());` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## Review Comment: all offsetByCodePoints should be replaced by codePointCount ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first, IntegerLiteral second) { */ @ExecFunction(name = "right") public static Expression right(StringLikeLiteral first, IntegerLiteral second) { - if (second.getValue() < (- first.getValue().length()) || Math.abs(second.getValue()) == 0) { + int inputLength = first.getValue().offsetByCodePoints(0, first.getValue().length()); Review Comment: ```suggestion int inputLength = first.getValue().codePointCount(0, first.getValue().length()); ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## Review Comment: we'd better add FE UT too to ensure these functions work as expected rather than throw exception ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) { return new VarcharLiteral(sb.toString()); } + /** + * split by char by empty string considering emoji + * @param first input string to be split + * @return ArrayLiteral + */ + public static Expression splitByGrapheme(StringLikeLiteral first) { + List<String> result = new ArrayList<>(); + int length = first.getValue().length(); + for (int i = 0; i < length; ) { + int codePoint = first.getValue().codePointAt(i); + int charCount = Character.charCount(codePoint); + result.add(first.getValue().substring(i, i + charCount)); + i += charCount; + } + List<Literal> items = new ArrayList<>(); + for (String s : result) { + items.add((Literal) castStringLikeLiteral(first, s)); + } Review Comment: then remove this for loop ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -289,12 +292,14 @@ public static Expression replace(StringLikeLiteral first, StringLikeLiteral seco */ @ExecFunction(name = "left") public static Expression left(StringLikeLiteral first, IntegerLiteral second) { + int inputLength = first.getValue().offsetByCodePoints(0, first.getValue().length()); if (second.getValue() <= 0) { return castStringLikeLiteral(first, ""); - } else if (second.getValue() < first.getValue().length()) { - return castStringLikeLiteral(first, first.getValue().substring(0, second.getValue())); - } else { + } else if (second.getValue() > inputLength) { Review Comment: ```suggestion } else if (second.getValue() >= inputLength) { ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) { return new VarcharLiteral(sb.toString()); } + /** + * split by char by empty string considering emoji + * @param first input string to be split + * @return ArrayLiteral + */ + public static Expression splitByGrapheme(StringLikeLiteral first) { Review Comment: ```suggestion public static Expression splitByGrapheme(StringLikeLiteral str) { ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -303,17 +308,19 @@ public static Expression left(StringLikeLiteral first, IntegerLiteral second) { */ @ExecFunction(name = "right") public static Expression right(StringLikeLiteral first, IntegerLiteral second) { - if (second.getValue() < (- first.getValue().length()) || Math.abs(second.getValue()) == 0) { + int inputLength = first.getValue().offsetByCodePoints(0, first.getValue().length()); + if (second.getValue() < (- inputLength) || Math.abs(second.getValue()) == 0) { return castStringLikeLiteral(first, ""); - } else if (second.getValue() > first.getValue().length()) { + } else if (second.getValue() > inputLength) { Review Comment: ```suggestion } else if (second.getValue() >= inputLength) { ``` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -333,7 +340,7 @@ public static Expression locate(StringLikeLiteral first, StringLikeLiteral secon public static Expression locate(StringLikeLiteral first, StringLikeLiteral second, IntegerLiteral third) { int result = second.getValue().indexOf(first.getValue()) + 1; if (third.getValue() <= 0 || !substringImpl(second.getValue(), third.getValue(), - second.getValue().length()).contains(first.getValue())) { + second.getValue().offsetByCodePoints(0, second.getValue().length())).contains(first.getValue())) { Review Comment: add comment to explain which situation do you process when u call `!substringImpl(second.getValue(), third.getValue(), second.getValue().offsetByCodePoints(0, second.getValue().length())).contains(first.getValue())` ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) { return new VarcharLiteral(sb.toString()); } + /** + * split by char by empty string considering emoji + * @param first input string to be split + * @return ArrayLiteral + */ + public static Expression splitByGrapheme(StringLikeLiteral first) { + List<String> result = new ArrayList<>(); Review Comment: List<Literal> result = Lists.newArrayListWithExpectedSize(first.getValue().length()); ########## fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java: ########## @@ -659,6 +666,27 @@ public static Expression space(IntegerLiteral first) { return new VarcharLiteral(sb.toString()); } + /** + * split by char by empty string considering emoji + * @param first input string to be split + * @return ArrayLiteral + */ + public static Expression splitByGrapheme(StringLikeLiteral first) { + List<String> result = new ArrayList<>(); + int length = first.getValue().length(); + for (int i = 0; i < length; ) { + int codePoint = first.getValue().codePointAt(i); + int charCount = Character.charCount(codePoint); + result.add(first.getValue().substring(i, i + charCount)); Review Comment: maybe a better way ```suggestion result.add(castStringLikeLiteral(first, new String(new int[]{codePoint}, 0, 1))); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org