This is an automated email from the ASF dual-hosted git repository. elharo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/maven.git
The following commit(s) were added to refs/heads/master by this push: new 688c9c8d01 [MNG-8241] Handle non-BMP characters when comparing versions (#2071) 688c9c8d01 is described below commit 688c9c8d011f07ae8cbb2285959a319922e98219 Author: Elliotte Rusty Harold <elh...@users.noreply.github.com> AuthorDate: Sun Feb 9 14:48:53 2025 +0000 [MNG-8241] Handle non-BMP characters when comparing versions (#2071) * Handle non-BMP characters * Treat non-ASCII digits as strings --- .../artifact/versioning/ComparableVersion.java | 23 ++++++++--- .../artifact/versioning/ComparableVersionTest.java | 44 ++++++++++++++++++---- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java b/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java index 2ed439d375..f76a4d15ea 100644 --- a/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java +++ b/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java @@ -42,7 +42,7 @@ * <li>unlimited number of version components,</li> * <li>version components in the text can be digits or strings,</li> * <li>strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering. - * Well-known qualifiers (case insensitive) are:<ul> + * Well-known qualifiers (case-insensitive) are:<ul> * <li><code>alpha</code> or <code>a</code></li> * <li><code>beta</code> or <code>b</code></li> * <li><code>milestone</code> or <code>m</code></li> @@ -51,9 +51,9 @@ * <li><code>(the empty string)</code> or <code>ga</code> or <code>final</code></li> * <li><code>sp</code></li> * </ul> - * Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive), + * Unknown qualifiers are considered after known qualifiers, with lexical order (always case-insensitive), * </li> - * <li>a hyphen usually precedes a qualifier, and is always less important than digits/number, for example + * <li>A hyphen usually precedes a qualifier, and is always less important than digits/number. For example * {@code 1.0.RC2 < 1.0-RC3 < 1.0.1}; but prefer {@code 1.0.0-RC1} over {@code 1.0.0.RC1}, and more * generally: {@code 1.0.X2 < 1.0-X3 < 1.0.1} for any string {@code X}; but prefer {@code 1.0.0-X1} * over {@code 1.0.0.X1}.</li> @@ -656,7 +656,20 @@ public final void parseVersion(String version) { int startIndex = 0; for (int i = 0; i < version.length(); i++) { - char c = version.charAt(i); + char character = version.charAt(i); + int c = character; + if (Character.isHighSurrogate(character)) { + // read the next character as a low surrogate and combine into a single int + try { + char low = version.charAt(i + 1); + char[] both = {character, low}; + c = Character.codePointAt(both, 0); + i++; + } catch (IndexOutOfBoundsException ex) { + // high surrogate without low surrogate. Not a lot we can do here except treat it as a regular + // character + } + } if (c == '.') { if (i == startIndex) { @@ -687,7 +700,7 @@ public final void parseVersion(String version) { stack.push(list); } isCombination = false; - } else if (Character.isDigit(c)) { + } else if (c >= '0' && c <= '9') { // Check for ASCII digits only if (!isDigit && i > startIndex) { // X1 isCombination = true; diff --git a/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java b/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java index d7616405bd..219d760bab 100644 --- a/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java +++ b/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java @@ -27,7 +27,6 @@ /** * Test ComparableVersion. - * */ @SuppressWarnings("unchecked") class ComparableVersionTest { @@ -222,6 +221,23 @@ void testLeadingZeroes() { checkVersionsOrder("0.2", "1.0.7"); } + @Test + void testDigitGreaterThanNonAscii() { + ComparableVersion c1 = new ComparableVersion("1"); + ComparableVersion c2 = new ComparableVersion("é"); + assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + "\uD835\uDFE4"); + assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " + "1"); + } + + @Test + void testDigitGreaterThanNonBmpCharacters() { + ComparableVersion c1 = new ComparableVersion("1"); + // MATHEMATICAL SANS-SERIF DIGIT TWO + ComparableVersion c2 = new ComparableVersion("\uD835\uDFE4"); + assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + "\uD835\uDFE4"); + assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " + "1"); + } + @Test void testGetCanonical() { // MNG-7700 @@ -238,13 +254,25 @@ void testGetCanonical() { @Test void testCompareDigitToLetter() { - ComparableVersion c1 = new ComparableVersion("7"); - ComparableVersion c2 = new ComparableVersion("J"); - ComparableVersion c3 = new ComparableVersion("c"); - assertTrue(c1.compareTo(c2) > 0, "expected 7 > J"); - assertTrue(c2.compareTo(c1) < 0, "expected J < 1"); - assertTrue(c1.compareTo(c3) > 0, "expected 7 > c"); - assertTrue(c3.compareTo(c1) < 0, "expected c < 7"); + ComparableVersion seven = new ComparableVersion("7"); + ComparableVersion capitalJ = new ComparableVersion("J"); + ComparableVersion lowerCaseC = new ComparableVersion("c"); + // Digits are greater than letters + assertTrue(seven.compareTo(capitalJ) > 0, "expected 7 > J"); + assertTrue(capitalJ.compareTo(seven) < 0, "expected J < 1"); + assertTrue(seven.compareTo(lowerCaseC) > 0, "expected 7 > c"); + assertTrue(lowerCaseC.compareTo(seven) < 0, "expected c < 7"); + } + + @Test + void testNonAsciiDigits() { // These should not be treated as digits. + ComparableVersion asciiOne = new ComparableVersion("1"); + ComparableVersion arabicEight = new ComparableVersion("\u0668"); + ComparableVersion asciiNine = new ComparableVersion("9"); + assertTrue(asciiOne.compareTo(arabicEight) > 0, "expected " + "1" + " > " + "\u0668"); + assertTrue(arabicEight.compareTo(asciiOne) < 0, "expected " + "\u0668" + " < " + "1"); + assertTrue(asciiNine.compareTo(arabicEight) > 0, "expected " + "9" + " > " + "\u0668"); + assertTrue(arabicEight.compareTo(asciiNine) < 0, "expected " + "\u0668" + " < " + "9"); } @Test