This is an automated email from the ASF dual-hosted git repository. nmalin pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/ofbiz-framework.git
The following commit(s) were added to refs/heads/trunk by this push: new 511ca1830d Improved: Truncate encoded string (OFBIZ-13167) (#844) 511ca1830d is described below commit 511ca1830d00c2002d4dc2f9f24a8c27264bedde Author: Nicolas Malin <nicolas.ma...@nereide.fr> AuthorDate: Tue Nov 12 10:15:40 2024 +0100 Improved: Truncate encoded string (OFBIZ-13167) (#844) * Amélioration de la function de truncate string On model form, when you use a field description with a text size like : <form...> <field name="myfield" ...><display size="15"/></field> </form> and your field myField contains a string with a special character like "my char é, so bad truncate", the widget rendering displays "my char &ea...ate" because when truncate is executed, the string is encoded and some chars are encoded : "my char &eacut;, so bad truncate". The size between the encoded string and the displayed string are different. So to fix this, we implemented a new method to truncate the content. This method identify each special char as one char, and truncate. "my char é, so bad truncate" encoded to "my char &eacut;, so bad truncate" become "my char &eacut;, ...ate" and not "my char &eac...ate" Thanks: Charles Steltzlen who help to solve it --- .../org/apache/ofbiz/base/util/StringUtil.java | 97 ++++++++++++++++++++++ .../apache/ofbiz/base/util/StringUtilTests.java | 30 +++++++ .../java/org/apache/ofbiz/widget/WidgetWorker.java | 3 +- .../widget/renderer/macro/MacroFormRenderer.java | 2 +- .../macro/RenderableFtlFormElementsBuilder.java | 6 +- 5 files changed, 133 insertions(+), 5 deletions(-) diff --git a/framework/base/src/main/java/org/apache/ofbiz/base/util/StringUtil.java b/framework/base/src/main/java/org/apache/ofbiz/base/util/StringUtil.java index 8179fdcced..3df81e5c1c 100644 --- a/framework/base/src/main/java/org/apache/ofbiz/base/util/StringUtil.java +++ b/framework/base/src/main/java/org/apache/ofbiz/base/util/StringUtil.java @@ -22,6 +22,7 @@ import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -30,6 +31,8 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringTokenizer; +import java.util.regex.MatchResult; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -46,6 +49,7 @@ public final class StringUtil { public static final StringUtil INSTANCE = new StringUtil(); private static final String MODULE = StringUtil.class.getName(); private static final Map<String, Pattern> SUBSTITUTION_PATTERN_MAP = createSubstitutionPatternMap(); + private static final Pattern SPECIAL_CHAR = Pattern.compile("\\&[\\#\\S&&[^\\&]]+\\;"); private static Map<String, Pattern> createSubstitutionPatternMap() { Map<String, Pattern> substitutionPatternMap = new LinkedHashMap<>(); // Preserve insertion order @@ -407,6 +411,99 @@ public final class StringUtil { return new StringWrapper(theString); } + /** + * For a content if the size large thant the textLength given, truncate it with to textSize and replace + * removed characters by '…' + * minimum size for truncate is 5 + * each 5 steps length the truncate add a character at the end + * @param content + * @param textLength + * @return truncate string prepare for correct displaying. + */ + public static String truncateEncodedStringToLength(String content, Integer textLength) { + if (UtilValidate.isEmpty(content) + || content.length() < textLength) { + return content; + } + int keepEndingChar = Double.valueOf(textLength / 5).intValue(); + int start = textLength <= 5 ? textLength : textLength - 1 - keepEndingChar; + int end = content.length() - keepEndingChar; + if (SPECIAL_CHAR.matcher(content).find()) { + start = getTruncateStartLimit(content, start); + end = getTruncateEndLimit(content, keepEndingChar); + if (end <= start) { + return content; + } + } + return String.format("%s…%s", + content.substring(0, start), + textLength <= 5 ? "" : content.substring(end)); + } + + /** + * Find the end of potential special char to scrap correctly with a special char present on the start scrap limit + * @param content + * @param startTruncateIndex + * @return index on content to start the truncate + */ + private static int getTruncateStartLimit(String content, int startTruncateIndex) { + if (startTruncateIndex < 0) { + return 0; + } + // convert any special char as one char + // we need to count each special char and the encoded char corresponding to + Matcher matcher = SPECIAL_CHAR.matcher(content); + int nbSpeCharFound = 0; + int nbCharToEscape = 0; + for (MatchResult matchResult : matcher.results() + .sorted(Comparator.comparingInt(MatchResult::end)) + .toList()) { + if (matchResult.start() < startTruncateIndex + nbCharToEscape - nbSpeCharFound) { + nbCharToEscape += matchResult.end() - matchResult.start(); + // we control that the special char isn't cut, if it's the case, return the end + if (matchResult.end() >= startTruncateIndex + nbCharToEscape - nbSpeCharFound) { + return matchResult.end(); + } + nbSpeCharFound++; + } + } + return startTruncateIndex + nbCharToEscape - nbSpeCharFound; + } + + /** + * Find the start of potential special char to truncate correctly with a special char present on the limit + * @param content + * @param keepEndingChar + * @return index on content to stop the truncate + */ + private static int getTruncateEndLimit(String content, int keepEndingChar) { + if (content.length() <= 5) { + return content.length(); + } + // convert any special char as one char + // we need to count each special char and the encoded char corresponding to + Matcher matcher = SPECIAL_CHAR.matcher(content); + int endCursor = content.length(); + int nbSpeCharFound = 0; + int nbCharToEscape = 0; + for (MatchResult matchResult : matcher.results() + .sorted(Comparator.comparingInt(MatchResult::end)) + .collect(Collectors.collectingAndThen(Collectors.toList(), list -> { + Collections.reverse(list); + return list; + }))) { + if (matchResult.end() > endCursor - nbCharToEscape + nbSpeCharFound - keepEndingChar) { + nbCharToEscape += matchResult.end() - matchResult.start(); + // we control that the special char isn't cut, if it's the case, return the start + if (matchResult.start() <= endCursor - nbCharToEscape + nbSpeCharFound - keepEndingChar) { + return matchResult.start(); + } + nbSpeCharFound++; + } + } + return endCursor - nbCharToEscape + nbSpeCharFound - keepEndingChar; + } + /** * A super-lightweight object to wrap a String object. Mainly used with FTL * templates to avoid the general HTML auto-encoding that is now done through diff --git a/framework/base/src/test/java/org/apache/ofbiz/base/util/StringUtilTests.java b/framework/base/src/test/java/org/apache/ofbiz/base/util/StringUtilTests.java index 48c1d2c861..67269c7b23 100644 --- a/framework/base/src/test/java/org/apache/ofbiz/base/util/StringUtilTests.java +++ b/framework/base/src/test/java/org/apache/ofbiz/base/util/StringUtilTests.java @@ -219,4 +219,34 @@ public class StringUtilTests { StringUtil.convertOperatorSubstitutions( "one @and two @or three @gt four @gteq five @lt six @lteq seven")); } + + @Test + public void testTruncateString() { + assertEquals("no truncate", "this is a truncated long string", + StringUtil.truncateEncodedStringToLength("this is a truncated long string", 40)); + assertEquals("no truncate to short", "this", + StringUtil.truncateEncodedStringToLength("this", 5)); + assertEquals("normal", "this is a t…ing", + StringUtil.truncateEncodedStringToLength("this is a truncated long string", 15)); + assertEquals("normal short", "this …", + StringUtil.truncateEncodedStringToLength("this is a truncated long string", 5)); + assertEquals("with parenthesis", "this ( are … ok", + StringUtil.truncateEncodedStringToLength("this ( are managed correctly ) ok", 15)); + assertEquals("with parenthesis at end", "this ( are …d )", + StringUtil.truncateEncodedStringToLength("this ( are managed correctly, with the end )", 15)); + assertEquals("with parenthesis and semicolon ignored", "this ( are …d )", + StringUtil.truncateEncodedStringToLength("this ( are a semicolon far ; managed correctly, with the end )", 15)); + assertEquals("with parenthesis and semicolon closer", "this ( are;…d )", + StringUtil.truncateEncodedStringToLength("this ( are; a semicolon closer managed correctly, with the end )", 15)); + assertEquals("with parenthesis and é managed", "this ( are&eacut;…end", + StringUtil.truncateEncodedStringToLength("this ( are&eacut; managed correctly, with the ) end", 15)); + assertEquals("with parenthesis and é é managed", "this ( a&eacut;e&eacut;…end", + StringUtil.truncateEncodedStringToLength("this ( a&eacut;e&eacut; managed correctly, with the ) end", 15)); + assertEquals("with parenthesis and é closer", "this ( are …n&eacut;d", + StringUtil.truncateEncodedStringToLength("this ( are & closer managed correctly, with th&e ) en&eacut;d", 15)); + assertEquals("with parenthesis and é é closer", "this ( are …&eacut;&eacut;d", + StringUtil.truncateEncodedStringToLength("this ( are & closer managed correctly, with th&e )en&eacut;&eacut;d", 15)); + assertEquals("with parenthesis and # # closer", "this ( are …ëëd", + StringUtil.truncateEncodedStringToLength("this ( are & closer managed correctly, with th&e )enëëd", 15)); + } } diff --git a/framework/widget/src/main/java/org/apache/ofbiz/widget/WidgetWorker.java b/framework/widget/src/main/java/org/apache/ofbiz/widget/WidgetWorker.java index d13529a272..e5ef25e3e0 100644 --- a/framework/widget/src/main/java/org/apache/ofbiz/widget/WidgetWorker.java +++ b/framework/widget/src/main/java/org/apache/ofbiz/widget/WidgetWorker.java @@ -26,6 +26,7 @@ import javax.servlet.http.HttpServletResponse; import org.apache.http.client.utils.URIBuilder; import org.apache.ofbiz.base.util.Debug; +import org.apache.ofbiz.base.util.StringUtil; import org.apache.ofbiz.base.util.UtilHttp; import org.apache.ofbiz.security.CsrfUtil; import org.apache.ofbiz.base.util.UtilGenerics; @@ -159,7 +160,7 @@ public final class WidgetWorker { // if description is truncated, always use description as title if (UtilValidate.isNotEmpty(description) && size > 0 && description.length() > size) { title = description; - description = description.substring(0, size) + "…"; + description = StringUtil.truncateEncodedStringToLength(description, size); } if (isNotEmpty(title)) { diff --git a/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/MacroFormRenderer.java b/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/MacroFormRenderer.java index d087fbbe09..b732b86c45 100644 --- a/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/MacroFormRenderer.java +++ b/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/MacroFormRenderer.java @@ -2319,7 +2319,7 @@ public final class MacroFormRenderer implements FormStringRenderer { // if description is truncated, always use description as title if (UtilValidate.isNotEmpty(description) && size > 0 && description.length() > size) { title = description; - description = description.substring(0, size) + "…"; + description = StringUtil.truncateEncodedStringToLength(description, size); } else if (UtilValidate.isNotEmpty(request.getAttribute("title"))) { title = request.getAttribute("title").toString(); } diff --git a/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/RenderableFtlFormElementsBuilder.java b/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/RenderableFtlFormElementsBuilder.java index beee77fa04..92e336131d 100644 --- a/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/RenderableFtlFormElementsBuilder.java +++ b/framework/widget/src/main/java/org/apache/ofbiz/widget/renderer/macro/RenderableFtlFormElementsBuilder.java @@ -156,7 +156,7 @@ public final class RenderableFtlFormElementsBuilder { boolean ajaxEnabled = inPlaceEditor != null && javaScriptEnabled; if (UtilValidate.isNotEmpty(description) && size > 0 && description.length() > size) { title = description; - description = description.substring(0, size - 8) + "..." + description.substring(description.length() - 5); + description = StringUtil.truncateEncodedStringToLength(description, size); } final RenderableFtlMacroCallBuilder builder = RenderableFtlMacroCall.builder() @@ -798,7 +798,7 @@ public final class RenderableFtlFormElementsBuilder { } if (UtilValidate.isNotEmpty(description) && size > 0 && description.length() > size) { title = description; - description = description.substring(0, size) + "…"; + description = StringUtil.truncateEncodedStringToLength(description, size); } else if (UtilValidate.isNotEmpty(request.getAttribute("title"))) { title = request.getAttribute("title").toString(); } @@ -1162,7 +1162,7 @@ public final class RenderableFtlFormElementsBuilder { private String truncate(String value, int maxCharacterLength) { if (maxCharacterLength > 8 && value.length() > maxCharacterLength) { - return value.substring(0, maxCharacterLength - 8) + "..." + value.substring(value.length() - 5); + return StringUtil.truncateEncodedStringToLength(value, maxCharacterLength); } return value; }