This is an automated email from the ASF dual-hosted git repository.

elharo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/maven.git


The following commit(s) were added to refs/heads/master by this push:
     new 688c9c8d01 [MNG-8241] Handle non-BMP characters when comparing 
versions (#2071)
688c9c8d01 is described below

commit 688c9c8d011f07ae8cbb2285959a319922e98219
Author: Elliotte Rusty Harold <elh...@users.noreply.github.com>
AuthorDate: Sun Feb 9 14:48:53 2025 +0000

    [MNG-8241] Handle non-BMP characters when comparing versions (#2071)
    
    * Handle non-BMP characters
    * Treat non-ASCII digits as strings
---
 .../artifact/versioning/ComparableVersion.java     | 23 ++++++++---
 .../artifact/versioning/ComparableVersionTest.java | 44 ++++++++++++++++++----
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git 
a/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java
 
b/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java
index 2ed439d375..f76a4d15ea 100644
--- 
a/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java
+++ 
b/compat/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java
@@ -42,7 +42,7 @@
  * <li>unlimited number of version components,</li>
  * <li>version components in the text can be digits or strings,</li>
  * <li>strings are checked for well-known qualifiers and the qualifier 
ordering is used for version ordering.
- *     Well-known qualifiers (case insensitive) are:<ul>
+ *     Well-known qualifiers (case-insensitive) are:<ul>
  *     <li><code>alpha</code> or <code>a</code></li>
  *     <li><code>beta</code> or <code>b</code></li>
  *     <li><code>milestone</code> or <code>m</code></li>
@@ -51,9 +51,9 @@
  *     <li><code>(the empty string)</code> or <code>ga</code> or 
<code>final</code></li>
  *     <li><code>sp</code></li>
  *     </ul>
- *     Unknown qualifiers are considered after known qualifiers, with lexical 
order (always case insensitive),
+ *     Unknown qualifiers are considered after known qualifiers, with lexical 
order (always case-insensitive),
  *   </li>
- * <li>a hyphen usually precedes a qualifier, and is always less important 
than digits/number, for example
+ * <li>A hyphen usually precedes a qualifier, and is always less important 
than digits/number. For example
  *   {@code 1.0.RC2 < 1.0-RC3 < 1.0.1}; but prefer {@code 1.0.0-RC1} over 
{@code 1.0.0.RC1}, and more
  *   generally: {@code 1.0.X2 < 1.0-X3 < 1.0.1} for any string {@code X}; but 
prefer {@code 1.0.0-X1}
  *   over {@code 1.0.0.X1}.</li>
@@ -656,7 +656,20 @@ public final void parseVersion(String version) {
         int startIndex = 0;
 
         for (int i = 0; i < version.length(); i++) {
-            char c = version.charAt(i);
+            char character = version.charAt(i);
+            int c = character;
+            if (Character.isHighSurrogate(character)) {
+                // read the next character as a low surrogate and combine into 
a single int
+                try {
+                    char low = version.charAt(i + 1);
+                    char[] both = {character, low};
+                    c = Character.codePointAt(both, 0);
+                    i++;
+                } catch (IndexOutOfBoundsException ex) {
+                    // high surrogate without low surrogate. Not a lot we can 
do here except treat it as a regular
+                    // character
+                }
+            }
 
             if (c == '.') {
                 if (i == startIndex) {
@@ -687,7 +700,7 @@ public final void parseVersion(String version) {
                     stack.push(list);
                 }
                 isCombination = false;
-            } else if (Character.isDigit(c)) {
+            } else if (c >= '0' && c <= '9') { // Check for ASCII digits only
                 if (!isDigit && i > startIndex) {
                     // X1
                     isCombination = true;
diff --git 
a/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java
 
b/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java
index d7616405bd..219d760bab 100644
--- 
a/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java
+++ 
b/compat/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java
@@ -27,7 +27,6 @@
 
 /**
  * Test ComparableVersion.
- *
  */
 @SuppressWarnings("unchecked")
 class ComparableVersionTest {
@@ -222,6 +221,23 @@ void testLeadingZeroes() {
         checkVersionsOrder("0.2", "1.0.7");
     }
 
+    @Test
+    void testDigitGreaterThanNonAscii() {
+        ComparableVersion c1 = new ComparableVersion("1");
+        ComparableVersion c2 = new ComparableVersion("é");
+        assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + 
"\uD835\uDFE4");
+        assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " 
+ "1");
+    }
+
+    @Test
+    void testDigitGreaterThanNonBmpCharacters() {
+        ComparableVersion c1 = new ComparableVersion("1");
+        // MATHEMATICAL SANS-SERIF DIGIT TWO
+        ComparableVersion c2 = new ComparableVersion("\uD835\uDFE4");
+        assertTrue(c1.compareTo(c2) > 0, "expected " + "1" + " > " + 
"\uD835\uDFE4");
+        assertTrue(c2.compareTo(c1) < 0, "expected " + "\uD835\uDFE4" + " < " 
+ "1");
+    }
+
     @Test
     void testGetCanonical() {
         // MNG-7700
@@ -238,13 +254,25 @@ void testGetCanonical() {
 
     @Test
     void testCompareDigitToLetter() {
-        ComparableVersion c1 = new ComparableVersion("7");
-        ComparableVersion c2 = new ComparableVersion("J");
-        ComparableVersion c3 = new ComparableVersion("c");
-        assertTrue(c1.compareTo(c2) > 0, "expected 7 > J");
-        assertTrue(c2.compareTo(c1) < 0, "expected J < 1");
-        assertTrue(c1.compareTo(c3) > 0, "expected 7 > c");
-        assertTrue(c3.compareTo(c1) < 0, "expected c < 7");
+        ComparableVersion seven = new ComparableVersion("7");
+        ComparableVersion capitalJ = new ComparableVersion("J");
+        ComparableVersion lowerCaseC = new ComparableVersion("c");
+        // Digits are greater than letters
+        assertTrue(seven.compareTo(capitalJ) > 0, "expected 7 > J");
+        assertTrue(capitalJ.compareTo(seven) < 0, "expected J < 1");
+        assertTrue(seven.compareTo(lowerCaseC) > 0, "expected 7 > c");
+        assertTrue(lowerCaseC.compareTo(seven) < 0, "expected c < 7");
+    }
+
+    @Test
+    void testNonAsciiDigits() { // These should not be treated as digits.
+        ComparableVersion asciiOne = new ComparableVersion("1");
+        ComparableVersion arabicEight = new ComparableVersion("\u0668");
+        ComparableVersion asciiNine = new ComparableVersion("9");
+        assertTrue(asciiOne.compareTo(arabicEight) > 0, "expected " + "1" + " 
> " + "\u0668");
+        assertTrue(arabicEight.compareTo(asciiOne) < 0, "expected " + "\u0668" 
+ " < " + "1");
+        assertTrue(asciiNine.compareTo(arabicEight) > 0, "expected " + "9" + " 
> " + "\u0668");
+        assertTrue(arabicEight.compareTo(asciiNine) < 0, "expected " + 
"\u0668" + " < " + "9");
     }
 
     @Test

Reply via email to