Repository: commons-text Updated Branches: refs/heads/master b11451b26 -> fe20a173e
TEXT-76: Remove Math.round from Jaro Winkler distance, returning complete jw value Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/fe20a173 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/fe20a173 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/fe20a173 Branch: refs/heads/master Commit: fe20a173e01a0d73e75d0907fed3a371d34d0f8b Parents: b11451b Author: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Authored: Wed Apr 5 22:05:06 2017 +1200 Committer: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Committed: Wed Apr 5 22:05:06 2017 +1200 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../commons/text/similarity/JaroWinklerDistance.java | 3 +-- .../text/similarity/JaroWinklerDistanceTest.java | 13 +++++++------ .../ParameterizedSimilarityScoreFromTest.java | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/fe20a173/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 45b639c..cd59174 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove. <body> <release version="1.1" date="tbd" description="tbd"> + <action issue="TEXT-76" type="fix" dev="kinow">Correct round issue in Jaro Winkler implementation</action> <action issue="TEXT-72" type="fix" dev="chtompki">Similar to LANG-1025, clirr fails site build.</action> </release> http://git-wip-us.apache.org/repos/asf/commons-text/blob/fe20a173/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java index 0190c7b..f1c4d01 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java @@ -75,7 +75,6 @@ public class JaroWinklerDistance implements SimilarityScore<Double> { @Override public Double apply(final CharSequence left, final CharSequence right) { final double defaultScalingFactor = 0.1; - final double percentageRoundValue = 100.0; if (left == null || right == null) { throw new IllegalArgumentException("Strings must not be null"); @@ -88,7 +87,7 @@ public class JaroWinklerDistance implements SimilarityScore<Double> { } double j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3; double jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j); - return Math.round(jw * percentageRoundValue) / percentageRoundValue; + return jw; } /** http://git-wip-us.apache.org/repos/asf/commons-text/blob/fe20a173/src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java index 84276f1..ad23099 100644 --- a/src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java @@ -35,13 +35,14 @@ public class JaroWinklerDistanceTest { @Test public void testGetJaroWinklerDistance_StringString() { - assertEquals(0.93d, (double) distance.apply("frog", "fog"), 0.0d); + assertEquals(0.92499d, (double) distance.apply("frog", "fog"), 0.00001d); assertEquals(0.0d, (double) distance.apply("fly", "ant"), 0.0d); - assertEquals(0.44d, (double) distance.apply("elephant", "hippo"), 0.0d); - assertEquals(0.93d, (double) distance.apply("ABC Corporation", "ABC Corp"), 0.0d); - assertEquals(0.95d, (double) distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.0d); - assertEquals(0.92d, (double) distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.0d); - assertEquals(0.88d, (double) distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.0d); + assertEquals(0.44166d, (double) distance.apply("elephant", "hippo"), 0.00001d); + assertEquals(0.92740d, (double) distance.apply("ABC Corporation", "ABC Corp"), 0.00001d); + assertEquals(0.94580d, (double) distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.00001d); + assertEquals(0.921458d, (double) distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.00001d); + assertEquals(0.882329d, (double) distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00001d); + assertEquals(0.996598d, (double) distance.apply("/opt/software1", "/opt/software2"), 0.00001d); } @Test(expected = IllegalArgumentException.class) http://git-wip-us.apache.org/repos/asf/commons-text/blob/fe20a173/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java index 654ae4e..13dcea0 100644 --- a/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java +++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedSimilarityScoreFromTest.java @@ -54,9 +54,9 @@ public class ParameterizedSimilarityScoreFromTest<R> { public static Iterable<Object[]> parameters() { return Arrays.asList( new Object[][] { - { new JaroWinklerDistance(), "elephant", "hippo", 0.44 }, - { new JaroWinklerDistance(), "hippo", "elephant", 0.44 }, - { new JaroWinklerDistance(), "hippo", "zzzzzzzz", 0.0 }, + { new LevenshteinDistance(), "elephant", "hippo", 7 }, + { new LevenshteinDistance(), "hippo", "elephant", 7 }, + { new LevenshteinDistance(), "hippo", "zzzzzzzz", 8 }, { new SimilarityScore<Boolean>() {