Repository: commons-lang
Updated Branches:
  refs/heads/master 1e5c2b877 -> a40b2a907


LANG-1269: Wrong name or result of StringUtils#getJaroWinklerDistance (closes 
#198)

deprecate StringUtils#getJaroWinklerDistance and add 
StringUtils#getJaroWinklerSimilarity instead


Project: http://git-wip-us.apache.org/repos/asf/commons-lang/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-lang/commit/a40b2a90
Tree: http://git-wip-us.apache.org/repos/asf/commons-lang/tree/a40b2a90
Diff: http://git-wip-us.apache.org/repos/asf/commons-lang/diff/a40b2a90

Branch: refs/heads/master
Commit: a40b2a907a69e51675d7d0502b2608833c4da343
Parents: 1e5c2b8
Author: pascalschumacher <pascalschumac...@gmx.net>
Authored: Sat Oct 22 11:55:32 2016 +0200
Committer: pascalschumacher <pascalschumac...@gmx.net>
Committed: Fri Oct 28 19:19:22 2016 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |  1 +
 .../org/apache/commons/lang3/StringUtils.java   | 51 ++++++++++++++++++++
 .../apache/commons/lang3/StringUtilsTest.java   | 28 +++++++++++
 3 files changed, 80 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-lang/blob/a40b2a90/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b73f6bc..437d59b 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
 
   <release version="3.6" date="2016-MM-DD" description="TBD">
+    <action issue="LANG-1269" type="fix" dev="paschuma">Wrong name or result 
of StringUtils#getJaroWinklerDistance</action>
     <action issue="LANG-1188" type="fix" 
dev="paschuma">StringUtils#join(T...): warning: [unchecked] Possible heap 
pollution from parameterized vararg type T</action>
     <action issue="LANG-1144" type="fix" dev="ggregory" due-to="Waldemar 
Maier, Gary Gregory">Multiple calls of 
org.apache.commons.lang3.concurrent.LazyInitializer.initialize() are 
possible.</action>
     <action issue="LANG-1276" type="fix" dev="pschumacher" due-to="Andy 
Klimczak">StrBuilder#replaceAll ArrayIndexOutOfBoundsException</action>

http://git-wip-us.apache.org/repos/asf/commons-lang/blob/a40b2a90/src/main/java/org/apache/commons/lang3/StringUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java 
b/src/main/java/org/apache/commons/lang3/StringUtils.java
index f143177..a6c7c56 100644
--- a/src/main/java/org/apache/commons/lang3/StringUtils.java
+++ b/src/main/java/org/apache/commons/lang3/StringUtils.java
@@ -8009,7 +8009,9 @@ public class StringUtils {
      * @return result distance
      * @throws IllegalArgumentException if either String input {@code null}
      * @since 3.3
+     * @deprecated as of 3.6, due to a misleading name, use {@link 
#getJaroWinklerSimilarity()} instead
      */
+    @Deprecated
     public static double getJaroWinklerDistance(final CharSequence first, 
final CharSequence second) {
         final double DEFAULT_SCALING_FACTOR = 0.1;
 
@@ -8027,6 +8029,55 @@ public class StringUtils {
         return Math.round(jw * 100.0D) / 100.0D;
     }
 
+    /**
+     * <p>Find the Jaro Winkler Similarity which indicates the similarity 
score between two Strings.</p>
+     *
+     * <p>The Jaro measure is the weighted sum of percentage of matched 
characters from each file and transposed characters. 
+     * Winkler increased this measure for matching initial characters.</p>
+     *
+     * <p>This implementation is based on the Jaro Winkler similarity algorithm
+     * from <a 
href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance";>http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>.</p>
+     * 
+     * <pre>
+     * StringUtils.getJaroWinklerSimilarity(null, null)          = 
IllegalArgumentException
+     * StringUtils.getJaroWinklerSimilarity("","")               = 0.0
+     * StringUtils.getJaroWinklerSimilarity("","a")              = 0.0
+     * StringUtils.getJaroWinklerSimilarity("aaapppp", "")       = 0.0
+     * StringUtils.getJaroWinklerSimilarity("frog", "fog")       = 0.93
+     * StringUtils.getJaroWinklerSimilarity("fly", "ant")        = 0.0
+     * StringUtils.getJaroWinklerSimilarity("elephant", "hippo") = 0.44
+     * StringUtils.getJaroWinklerSimilarity("hippo", "elephant") = 0.44
+     * StringUtils.getJaroWinklerSimilarity("hippo", "zzzzzzzz") = 0.0
+     * StringUtils.getJaroWinklerSimilarity("hello", "hallo")    = 0.88
+     * StringUtils.getJaroWinklerSimilarity("ABC Corporation", "ABC Corp") = 
0.93
+     * StringUtils.getJaroWinklerSimilarity("D N H Enterprises Inc", "D &amp; 
H Enterprises, Inc.") = 0.95
+     * StringUtils.getJaroWinklerSimilarity("My Gym Children's Fitness 
Center", "My Gym. Childrens Fitness") = 0.92
+     * StringUtils.getJaroWinklerSimilarity("PENNSYLVANIA", "PENNCISYLVNIA") = 
0.88
+     * </pre>
+     *
+     * @param first the first String, must not be null
+     * @param second the second String, must not be null
+     * @return result similarity
+     * @throws IllegalArgumentException if either String input {@code null}
+     * @since 3.6
+     */
+    public static double getJaroWinklerSimilarity(final CharSequence first, 
final CharSequence second) {
+        final double DEFAULT_SCALING_FACTOR = 0.1;
+
+        if (first == null || second == null) {
+            throw new IllegalArgumentException("Strings must not be null");
+        }
+
+        int[] mtp = matches(first, second);
+        double m = mtp[0];
+        if (m == 0) {
+            return 0D;
+        }
+        double j = ((m / first.length() + m / second.length() + (m - mtp[1]) / 
m)) / 3;
+        double jw = j < 0.7D ? j : j + Math.min(DEFAULT_SCALING_FACTOR, 1D / 
mtp[3]) * mtp[2] * (1D - j);
+        return Math.round(jw * 100.0D) / 100.0D;
+    }
+
     private static int[] matches(final CharSequence first, final CharSequence 
second) {
         CharSequence max, min;
         if (first.length() > second.length()) {

http://git-wip-us.apache.org/repos/asf/commons-lang/blob/a40b2a90/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java 
b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
index 82fee92..524bd8d 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
@@ -2385,6 +2385,34 @@ public class StringUtilsTest {
     }
 
     @Test
+    public void testGetJaroWinklerSimilarity_StringString() {
+        assertEquals(0.93d, StringUtils.getJaroWinklerSimilarity("frog", 
"fog"), 0.0d);
+        assertEquals(0.0d, StringUtils.getJaroWinklerSimilarity("fly", "ant"), 
0.0d);
+        assertEquals(0.44d, StringUtils.getJaroWinklerSimilarity("elephant", 
"hippo"), 0.0d);
+        assertEquals(0.84d, StringUtils.getJaroWinklerSimilarity("dwayne", 
"duane"), 0.0d);
+        assertEquals(0.93d, StringUtils.getJaroWinklerSimilarity("ABC 
Corporation", "ABC Corp"), 0.0d);
+        assertEquals(0.95d, StringUtils.getJaroWinklerSimilarity("D N H 
Enterprises Inc", "D & H Enterprises, Inc."), 0.0d);
+        assertEquals(0.92d, StringUtils.getJaroWinklerSimilarity("My Gym 
Children's Fitness Center", "My Gym. Childrens Fitness"), 0.0d);
+        assertEquals(0.88d, 
StringUtils.getJaroWinklerSimilarity("PENNSYLVANIA", "PENNCISYLVNIA"), 0.0d);
+        assertEquals(0.63d, StringUtils.getJaroWinklerSimilarity("Haus 
Ingeborg", "Ingeborg Esser"), 0.0d);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetJaroWinklerSimilarity_NullNull() throws Exception {
+        StringUtils.getJaroWinklerSimilarity(null, null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetJaroWinklerSimilarity_StringNull() throws Exception {
+        StringUtils.getJaroWinklerSimilarity(" ", null);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testGetJaroWinklerSimilarity_NullString() throws Exception {
+        StringUtils.getJaroWinklerSimilarity(null, "clear");
+    }
+
+    @Test
     public void testGetFuzzyDistance() throws Exception {
         assertEquals(0, StringUtils.getFuzzyDistance("", "", Locale.ENGLISH));
         assertEquals(0, StringUtils.getFuzzyDistance("Workshop", "b", 
Locale.ENGLISH));

Reply via email to