Fix [TEXT-131] JaroWinklerDistance: Calculation deviates from definition
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/5d148549 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/5d148549 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/5d148549 Branch: refs/heads/master Commit: 5d148549bc6ea8501016856547e27aed58b116c3 Parents: 4546f45 Author: Jan Martin Keil <jan-martin.k...@uni-jena.de> Authored: Thu Aug 2 23:20:21 2018 +0200 Committer: Jan Martin Keil <jan-martin.k...@uni-jena.de> Committed: Thu Aug 2 23:22:33 2018 +0200 ---------------------------------------------------------------------- .../commons/text/similarity/JaroWinklerDistance.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/5d148549/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java index 915cd5c..74ea4f7 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java @@ -86,16 +86,16 @@ public class JaroWinklerDistance implements SimilarityScore<Double> { return 0D; } final double j = ((m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m)) / 3; - final double jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j); + final double jw = j < 0.7D ? j : j + defaultScalingFactor * mtp[2] * (1D - j); return jw; } /** - * This method returns the Jaro-Winkler string matches, half transpositions, prefix, max array. + * This method returns the Jaro-Winkler string matches, half transpositions, prefix array. * * @param first the first string to be matched * @param second the second string to be matched - * @return mtp array containing: matches, half transpositions, prefix, and max length + * @return mtp array containing: matches, half transpositions, and prefix */ protected static int[] matches(final CharSequence first, final CharSequence second) { CharSequence max, min; @@ -143,14 +143,14 @@ public class JaroWinklerDistance implements SimilarityScore<Double> { } } int prefix = 0; - for (int mi = 0; mi < min.length(); mi++) { + for (int mi = 0; mi < Math.min(4, min.length()); mi++) { if (first.charAt(mi) == second.charAt(mi)) { prefix++; } else { break; } } - return new int[] {matches, halfTranspositions, prefix, max.length()}; + return new int[] {matches, halfTranspositions, prefix}; } }