Repository: commons-text Updated Branches: refs/heads/master 9dd58bce9 -> 1e7d2aa50
SANDBOX-488 rename FuzzyDistance to FuzzyScore Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/1e7d2aa5 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/1e7d2aa5 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/1e7d2aa5 Branch: refs/heads/master Commit: 1e7d2aa5057ad5e067ec9cd762ab8772546bc777 Parents: 9dd58bc Author: Bruno P. Kinoshita <[email protected]> Authored: Sat Feb 14 13:56:55 2015 -0200 Committer: Bruno P. Kinoshita <[email protected]> Committed: Sat Feb 14 13:56:55 2015 -0200 ---------------------------------------------------------------------- .../commons/text/similarity/FuzzyDistance.java | 133 ------------------- .../commons/text/similarity/FuzzyScore.java | 133 +++++++++++++++++++ .../text/similarity/FuzzyDistanceTest.java | 75 ----------- .../commons/text/similarity/FuzzyScoreTest.java | 75 +++++++++++ 4 files changed, 208 insertions(+), 208 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java b/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java deleted file mode 100644 index 4d175a0..0000000 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyDistance.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.similarity; - -import java.util.Locale; - -/** - * A matching algorithm that is similar to the searching algorithms implemented in editors such - * as Sublime Text, TextMate, Atom and others. - * - * <p> - * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score - * indicates a higher similarity. - * </p> - * - * <p> - * This code has been adapted from Apache Commons Lang 3.3. - * </p> - */ -public class FuzzyDistance implements StringMetric<Integer> { - - /** - * <p> - * Find the Fuzzy Distance which indicates the similarity score between two - * Strings. This method uses the default locale. - * </p> - * - * @param term a full term that should be matched against, must not be null - * @param query the query that will be matched against a term, must not be - * null - * @return result score - * @throws IllegalArgumentException if either String input {@code null} - */ - @Override - public Integer compare(CharSequence term, CharSequence query) { - return compare(term, query, Locale.getDefault()); - } - - /** - * <p> - * Find the Fuzzy Distance which indicates the similarity score between two - * Strings. - * </p> - * - * <pre> - * distance.compare(null, null, null) = IllegalArgumentException - * distance.compare("", "", Locale.ENGLISH) = 0 - * distance.compare("Workshop", "b", Locale.ENGLISH) = 0 - * distance.compare("Room", "o", Locale.ENGLISH) = 1 - * distance.compare("Workshop", "w", Locale.ENGLISH) = 1 - * distance.compare("Workshop", "ws", Locale.ENGLISH) = 2 - * distance.compare("Workshop", "wo", Locale.ENGLISH) = 4 - * distance.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 - * </pre> - * - * @param term a full term that should be matched against, must not be null - * @param query the query that will be matched against a term, must not be - * null - * @param locale This string matching logic is case insensitive. A locale is - * necessary to normalize both Strings to lower case. - * @return result score - * @throws IllegalArgumentException if either String input {@code null} or - * Locale input {@code null} - */ - public Integer compare(CharSequence term, CharSequence query, Locale locale) { - if (term == null || query == null) { - throw new IllegalArgumentException("Strings must not be null"); - } else if (locale == null) { - throw new IllegalArgumentException("Locale must not be null"); - } - - // fuzzy logic is case insensitive. We normalize the Strings to lower - // case right from the start. Turning characters to lower case - // via Character.toLowerCase(char) is unfortunately insufficient - // as it does not accept a locale. - final String termLowerCase = term.toString().toLowerCase(locale); - final String queryLowerCase = query.toString().toLowerCase(locale); - - // the resulting score - int score = 0; - - // the position in the term which will be scanned next for potential - // query character matches - int termIndex = 0; - - // index of the previously matched character in the term - int previousMatchingCharacterIndex = Integer.MIN_VALUE; - - for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) { - final char queryChar = queryLowerCase.charAt(queryIndex); - - boolean termCharacterMatchFound = false; - for (; termIndex < termLowerCase.length() - && !termCharacterMatchFound; termIndex++) { - final char termChar = termLowerCase.charAt(termIndex); - - if (queryChar == termChar) { - // simple character matches result in one point - score++; - - // subsequent character matches further improve - // the score. - if (previousMatchingCharacterIndex + 1 == termIndex) { - score += 2; - } - - previousMatchingCharacterIndex = termIndex; - - // we can leave the nested loop. Every character in the - // query can match at most one character in the term. - termCharacterMatchFound = true; - } - } - } - - return score; - } - -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java new file mode 100644 index 0000000..3e72d05 --- /dev/null +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import java.util.Locale; + +/** + * A matching algorithm that is similar to the searching algorithms implemented in editors such + * as Sublime Text, TextMate, Atom and others. + * + * <p> + * One point is given for every matched character. Subsequent matches yield two bonus points. A higher score + * indicates a higher similarity. + * </p> + * + * <p> + * This code has been adapted from Apache Commons Lang 3.3. + * </p> + */ +public class FuzzyScore implements StringMetric<Integer> { + + /** + * <p> + * Find the Fuzzy Score which indicates the similarity score between two + * Strings. This method uses the default locale. + * </p> + * + * @param term a full term that should be matched against, must not be null + * @param query the query that will be matched against a term, must not be + * null + * @return result score + * @throws IllegalArgumentException if either String input {@code null} + */ + @Override + public Integer compare(CharSequence term, CharSequence query) { + return compare(term, query, Locale.getDefault()); + } + + /** + * <p> + * Find the Fuzzy Score which indicates the similarity score between two + * Strings. + * </p> + * + * <pre> + * score.compare(null, null, null) = IllegalArgumentException + * score.compare("", "", Locale.ENGLISH) = 0 + * score.compare("Workshop", "b", Locale.ENGLISH) = 0 + * score.compare("Room", "o", Locale.ENGLISH) = 1 + * score.compare("Workshop", "w", Locale.ENGLISH) = 1 + * score.compare("Workshop", "ws", Locale.ENGLISH) = 2 + * score.compare("Workshop", "wo", Locale.ENGLISH) = 4 + * score.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 + * </pre> + * + * @param term a full term that should be matched against, must not be null + * @param query the query that will be matched against a term, must not be + * null + * @param locale This string matching logic is case insensitive. A locale is + * necessary to normalize both Strings to lower case. + * @return result score + * @throws IllegalArgumentException if either String input {@code null} or + * Locale input {@code null} + */ + public Integer compare(CharSequence term, CharSequence query, Locale locale) { + if (term == null || query == null) { + throw new IllegalArgumentException("Strings must not be null"); + } else if (locale == null) { + throw new IllegalArgumentException("Locale must not be null"); + } + + // fuzzy logic is case insensitive. We normalize the Strings to lower + // case right from the start. Turning characters to lower case + // via Character.toLowerCase(char) is unfortunately insufficient + // as it does not accept a locale. + final String termLowerCase = term.toString().toLowerCase(locale); + final String queryLowerCase = query.toString().toLowerCase(locale); + + // the resulting score + int score = 0; + + // the position in the term which will be scanned next for potential + // query character matches + int termIndex = 0; + + // index of the previously matched character in the term + int previousMatchingCharacterIndex = Integer.MIN_VALUE; + + for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) { + final char queryChar = queryLowerCase.charAt(queryIndex); + + boolean termCharacterMatchFound = false; + for (; termIndex < termLowerCase.length() + && !termCharacterMatchFound; termIndex++) { + final char termChar = termLowerCase.charAt(termIndex); + + if (queryChar == termChar) { + // simple character matches result in one point + score++; + + // subsequent character matches further improve + // the score. + if (previousMatchingCharacterIndex + 1 == termIndex) { + score += 2; + } + + previousMatchingCharacterIndex = termIndex; + + // we can leave the nested loop. Every character in the + // query can match at most one character in the term. + termCharacterMatchFound = true; + } + } + } + + return score; + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java deleted file mode 100644 index 49e51ba..0000000 --- a/src/test/java/org/apache/commons/text/similarity/FuzzyDistanceTest.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.similarity; - -import static org.junit.Assert.assertEquals; - -import java.util.Locale; - -import org.junit.BeforeClass; -import org.junit.Test; - -/** - * Unit tests for {@link org.apache.commons.text.FuzzyDistance}. - */ -public class FuzzyDistanceTest { - - private static FuzzyDistance distance; - - @BeforeClass - public static void setUp() { - distance = new FuzzyDistance(); - } - - @Test - public void testGetFuzzyDistance() throws Exception { - assertEquals(0, (int) distance.compare("", "", Locale.ENGLISH)); - assertEquals(0, - (int) distance.compare("Workshop", "b", Locale.ENGLISH)); - assertEquals(1, - (int) distance.compare("Room", "o", Locale.ENGLISH)); - assertEquals(1, - (int) distance.compare("Workshop", "w", Locale.ENGLISH)); - assertEquals(2, - (int) distance.compare("Workshop", "ws", Locale.ENGLISH)); - assertEquals(4, - (int) distance.compare("Workshop", "wo", Locale.ENGLISH)); - assertEquals(3, (int) distance.compare( - "Apache Software Foundation", "asf", Locale.ENGLISH)); - } - - @Test(expected = IllegalArgumentException.class) - public void testGetFuzzyDistance_NullNullNull() throws Exception { - distance.compare(null, null, null); - } - - @Test(expected = IllegalArgumentException.class) - public void testGetFuzzyDistance_StringNullLoclae() throws Exception { - distance.compare(" ", null, Locale.ENGLISH); - } - - @Test(expected = IllegalArgumentException.class) - public void testGetFuzzyDistance_NullStringLocale() throws Exception { - distance.compare(null, "clear", Locale.ENGLISH); - } - - @Test(expected = IllegalArgumentException.class) - public void testGetFuzzyDistance_StringStringNull() throws Exception { - distance.compare(" ", "clear", null); - } - -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/1e7d2aa5/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java new file mode 100644 index 0000000..b2fab14 --- /dev/null +++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.similarity; + +import static org.junit.Assert.assertEquals; + +import java.util.Locale; + +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for {@link org.apache.commons.text.FuzzyScore}. + */ +public class FuzzyScoreTest { + + private static FuzzyScore score; + + @BeforeClass + public static void setUp() { + score = new FuzzyScore(); + } + + @Test + public void testGetFuzzyScore() throws Exception { + assertEquals(0, (int) score.compare("", "", Locale.ENGLISH)); + assertEquals(0, + (int) score.compare("Workshop", "b", Locale.ENGLISH)); + assertEquals(1, + (int) score.compare("Room", "o", Locale.ENGLISH)); + assertEquals(1, + (int) score.compare("Workshop", "w", Locale.ENGLISH)); + assertEquals(2, + (int) score.compare("Workshop", "ws", Locale.ENGLISH)); + assertEquals(4, + (int) score.compare("Workshop", "wo", Locale.ENGLISH)); + assertEquals(3, (int) score.compare( + "Apache Software Foundation", "asf", Locale.ENGLISH)); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetFuzzyScore_NullNullNull() throws Exception { + score.compare(null, null, null); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetFuzzyScore_StringNullLoclae() throws Exception { + score.compare(" ", null, Locale.ENGLISH); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetFuzzyScore_NullStringLocale() throws Exception { + score.compare(null, "clear", Locale.ENGLISH); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetFuzzyScore_StringStringNull() throws Exception { + score.compare(" ", "clear", null); + } + +}
