This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-lang.git
commit 28cbf74d7c5875095456996264c43a3689a82db5 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Tue Apr 15 09:43:31 2025 -0400 [LANG-1770] StringUtils.abbreviate is not emoji aware, breaks surrogate pairs WIP test --- .../commons/lang3/StringUtilsAbbreviateTest.java | 110 ++++++++++++--------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsAbbreviateTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsAbbreviateTest.java index 1e75ce966..68efff057 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsAbbreviateTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsAbbreviateTest.java @@ -18,6 +18,7 @@ package org.apache.commons.lang3; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -29,13 +30,60 @@ */ public class StringUtilsAbbreviateTest { + /** + * Tests <a href="LANG-1770">https://issues.apache.org/jira/projects/LANG/issues/LANG-1770</a>. + */ + @Test + public void testEmoji() { + // @formatter:off + final String[] expectedResultsFox = { + "🦊...", // 4 + "🦊🦊...", + "🦊🦊🦊...", + "🦊🦊🦊🦊...", + "🦊🦊🦊🦊🦊...", + "🦊🦊🦊🦊🦊🦊...", + "🦊🦊🦊🦊🦊🦊🦊...", // 10 + }; + final String[] expectedResultsFamilyWithCodepoints = { + "👩...", + "👩🏻...", + "👩🏻...", // zero width joiner + "👩🏻👨...", + "👩🏻👨🏻...", + "👩🏻👨🏻...", + "👩🏻👨🏻👦..." + }; + final String[] expectedResultsFamilyWithGrapheme = { + "👩🏻👨🏻👦🏻👦🏻...", // 4 + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼...", + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽...", + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾...", + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾👩🏿👨🏿👦🏿👦🏿...", + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾👩🏿👨🏿👦🏿👦🏿👩🏻👨🏻👦🏻👦🏻...", + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾👩🏿👨🏿👦🏿👦🏿👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼..." // 10 + }; + // @formatter:on + for (int i = 4; i <= 10; i++) { + final String abbreviateResult = StringUtils.abbreviate("🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊", i); + assertNotNull(abbreviateResult); + // assertEquals(expectedResultsFox[i - 4], abbreviateResult); + } + for (int i = 4; i <= 10; i++) { + final String abbreviateResult = StringUtils.abbreviate( + "👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾👩🏿👨🏿👦🏿👦🏿👩🏻👨🏻👦🏻👦🏻👩🏼👨🏼👦🏼👦🏼👩🏽👨🏽👦🏽👦🏽👩🏾👨🏾👦🏾👦🏾👩🏿👨🏿👦🏿👦🏿", + i); + assertNotNull(abbreviateResult); + // assertEquals(expectedResultsFamilyWithCodepoints[i - 4], abbreviateResult); + } + } + private void assertAbbreviateWithAbbrevMarkerAndOffset(final String expected, final String abbrevMarker, final int offset, final int maxWidth) { final String abcdefghijklmno = "abcdefghijklmno"; final String message = "abbreviate(String,String,int,int) failed"; final String actual = StringUtils.abbreviate(abcdefghijklmno, abbrevMarker, offset, maxWidth); if (offset >= 0 && offset < abcdefghijklmno.length()) { - assertTrue(actual.indexOf((char) ('a' + offset)) != -1, - message + " -- should contain offset character"); + assertTrue(actual.indexOf((char) ('a' + offset)) != -1, message + " -- should contain offset character"); } assertTrue(actual.length() <= maxWidth, () -> message + " -- should not be greater than maxWidth"); assertEquals(expected, actual, message); @@ -46,8 +94,7 @@ private void assertAbbreviateWithOffset(final String expected, final int offset, final String message = "abbreviate(String,int,int) failed"; final String actual = StringUtils.abbreviate(abcdefghijklmno, offset, maxWidth); if (offset >= 0 && offset < abcdefghijklmno.length()) { - assertTrue(actual.indexOf((char) ('a' + offset)) != -1, - message + " -- should contain offset character"); + assertTrue(actual.indexOf((char) ('a' + offset)) != -1, message + " -- should contain offset character"); } assertTrue(actual.length() <= maxWidth, () -> message + " -- should not be greater than maxWidth"); assertEquals(expected, actual, message); @@ -59,7 +106,6 @@ public void testAbbreviate_StringInt() { assertEquals("", StringUtils.abbreviate("", 10)); assertEquals("short", StringUtils.abbreviate("short", 10)); assertEquals("Now is ...", StringUtils.abbreviate("Now is the time for all good men to come to the aid of their party.", 10)); - final String raspberry = "raspberry peach"; assertEquals("raspberry p...", StringUtils.abbreviate(raspberry, 14)); assertEquals("raspberry peach", StringUtils.abbreviate("raspberry peach", 15)); @@ -69,11 +115,7 @@ public void testAbbreviate_StringInt() { assertEquals("abcdefg", StringUtils.abbreviate("abcdefg", 8)); assertEquals("a...", StringUtils.abbreviate("abcdefg", 4)); assertEquals("", StringUtils.abbreviate("", 4)); - - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abc", 3), - "StringUtils.abbreviate expecting IllegalArgumentException"); + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abc", 3), "StringUtils.abbreviate expecting IllegalArgumentException"); } @Test @@ -81,19 +123,12 @@ public void testAbbreviate_StringIntInt() { assertNull(StringUtils.abbreviate(null, 10, 12)); assertEquals("", StringUtils.abbreviate("", 0, 10)); assertEquals("", StringUtils.abbreviate("", 2, 10)); - - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abcdefghij", 0, 3), + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", 0, 3), "StringUtils.abbreviate expecting IllegalArgumentException"); - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abcdefghij", 5, 6), + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", 5, 6), "StringUtils.abbreviate expecting IllegalArgumentException"); - final String raspberry = "raspberry peach"; assertEquals("raspberry peach", StringUtils.abbreviate(raspberry, 11, 15)); - assertNull(StringUtils.abbreviate(null, 7, 14)); assertAbbreviateWithOffset("abcdefg...", -1, 10); assertAbbreviateWithOffset("abcdefg...", 0, 10); @@ -124,7 +159,6 @@ public void testAbbreviate_StringStringInt() { assertEquals("", StringUtils.abbreviate("", "...", 2)); assertEquals("wai**", StringUtils.abbreviate("waiheke", "**", 5)); assertEquals("And af,,,,", StringUtils.abbreviate("And after a long time, he finally met his son.", ",,,,", 10)); - final String raspberry = "raspberry peach"; assertEquals("raspberry pe..", StringUtils.abbreviate(raspberry, "..", 14)); assertEquals("raspberry peach", StringUtils.abbreviate("raspberry peach", "---*---", 15)); @@ -134,10 +168,7 @@ public void testAbbreviate_StringStringInt() { assertEquals("abcdefg", StringUtils.abbreviate("abcdefg", "_-", 8)); assertEquals("abc.", StringUtils.abbreviate("abcdefg", ".", 4)); assertEquals("", StringUtils.abbreviate("", 4)); - - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abcdefghij", "...", 3), + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "...", 3), "StringUtils.abbreviate expecting IllegalArgumentException"); } @@ -147,19 +178,12 @@ public void testAbbreviate_StringStringIntInt() { assertNull(StringUtils.abbreviate(null, "...", 10, 12)); assertEquals("", StringUtils.abbreviate("", null, 0, 10)); assertEquals("", StringUtils.abbreviate("", "...", 2, 10)); - - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abcdefghij", "::", 0, 2), + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "::", 0, 2), "StringUtils.abbreviate expecting IllegalArgumentException"); - assertThrows( - IllegalArgumentException.class, - () -> StringUtils.abbreviate("abcdefghij", "!!!", 5, 6), + assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "!!!", 5, 6), "StringUtils.abbreviate expecting IllegalArgumentException"); - final String raspberry = "raspberry peach"; assertEquals("raspberry peach", StringUtils.abbreviate(raspberry, "--", 12, 15)); - assertNull(StringUtils.abbreviate(null, ";", 7, 14)); assertAbbreviateWithAbbrevMarkerAndOffset("abcdefgh;;", ";;", -1, 10); assertAbbreviateWithAbbrevMarkerAndOffset("abcdefghi.", ".", 0, 10); @@ -183,7 +207,7 @@ public void testAbbreviate_StringStringIntInt() { assertAbbreviateWithAbbrevMarkerAndOffset("+ghijklmno", "+", Integer.MAX_VALUE, 10); } - //Fixed LANG-1463 + // Fixed LANG-1463 @Test public void testAbbreviateMarkerWithEmptyString() { final String greaterThanMaxTest = "much too long text"; @@ -198,34 +222,22 @@ public void testAbbreviateMiddle() { assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 0)); assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 3)); assertEquals("ab.f", StringUtils.abbreviateMiddle("abcdef", ".", 4)); - // JIRA issue (LANG-405) example (slightly different than actual expected result) - assertEquals( - "A very long text with un...f the text is complete.", - StringUtils.abbreviateMiddle( - "A very long text with unimportant stuff in the middle but interesting start and " + - "end to see if the text is complete.", "...", 50)); - + assertEquals("A very long text with un...f the text is complete.", StringUtils.abbreviateMiddle( + "A very long text with unimportant stuff in the middle but interesting start and " + "end to see if the text is complete.", "...", 50)); // Test a much longer text :) final String longText = "Start text" + StringUtils.repeat("x", 10000) + "Close text"; - assertEquals( - "Start text->Close text", - StringUtils.abbreviateMiddle(longText, "->", 22)); - + assertEquals("Start text->Close text", StringUtils.abbreviateMiddle(longText, "->", 22)); // Test negative length assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", -1)); - // Test boundaries // Fails to change anything as method ensures first and last char are kept assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 1)); assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 2)); - // Test length of n=1 assertEquals("a", StringUtils.abbreviateMiddle("a", ".", 1)); - // Test smallest length that can lead to success assertEquals("a.d", StringUtils.abbreviateMiddle("abcd", ".", 3)); - // More from LANG-405 assertEquals("a..f", StringUtils.abbreviateMiddle("abcdef", "..", 4)); assertEquals("ab.ef", StringUtils.abbreviateMiddle("abcdef", ".", 5));