Author: sebb Date: Fri Mar 31 23:57:48 2017 New Revision: 1789764 URL: http://svn.apache.org/viewvc?rev=1789764&view=rev Log: CODEC-199 Bug in HW rule in Soundex Revert to a fix which does not entail change to public API
Modified: commons/proper/codec/trunk/src/changes/changes.xml commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java Modified: commons/proper/codec/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1789764&r1=1789763&r2=1789764&view=diff ============================================================================== --- commons/proper/codec/trunk/src/changes/changes.xml (original) +++ commons/proper/codec/trunk/src/changes/changes.xml Fri Mar 31 23:57:48 2017 @@ -59,7 +59,7 @@ The <action> type attribute can be add,u <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac calls reset() unnecessarily</action> <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano Vernaschi">Base32.HEX_DECODE_TABLE contains the wrong value 32</action> <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary Gregory">Charsets Javadoc breaks build when using Java 8</action> - <action issue="CODEC-199" dev="ggregory" type="fix" due-to="Yossi Tamari">Bug in HW rule in Soundex</action> + <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi Tamari">Bug in HW rule in Soundex</action> <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary Gregory">Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0 restriction instead of 1.4.0.</action> <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary Gregory, Sebb">Don't deprecate Charsets Charset constants in favor of Java 7's java.nio.charset.StandardCharsets</action> <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary Gregory">Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action> Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java?rev=1789764&r1=1789763&r2=1789764&view=diff ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java (original) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java Fri Mar 31 23:57:48 2017 @@ -41,7 +41,7 @@ public class Soundex implements StringEn * * @see #US_ENGLISH_MAPPING */ - public static final String US_ENGLISH_MAPPING_STRING = "0123012#02245501262301#202"; + public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202"; /** * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position @@ -179,15 +179,6 @@ public class Soundex implements StringEn } /** - * Returns the soundex mapping. - * - * @return soundexMapping. - */ - private char[] getSoundexMapping() { - return this.soundexMapping; - } - - /** * Maps the given upper-case character to its Soundex code. * * @param ch @@ -198,10 +189,10 @@ public class Soundex implements StringEn */ private char map(final char ch) { final int index = ch - 'A'; - if (index < 0 || index >= this.getSoundexMapping().length) { + if (index < 0 || index >= this.soundexMapping.length) { throw new IllegalArgumentException("The character is not mapped: " + ch); } - return this.getSoundexMapping()[index]; + return this.soundexMapping[index]; } /** @@ -234,19 +225,20 @@ public class Soundex implements StringEn return str; } final char out[] = {'0', '0', '0', '0'}; - char last, mapped; - int incount = 1, count = 1; - out[0] = str.charAt(0); - // map() throws IllegalArgumentException - last = this.map(str.charAt(0)); - while (incount < str.length() && count < out.length) { - mapped = this.map(str.charAt(incount++)); - if (mapped == '0') { - last = mapped; - } else if (mapped != '#' && mapped != last) { - out[count++] = mapped; - last = mapped; + int count = 0; + final char first = str.charAt(0); + out[count++] = first; + char lastDigit = map(first); // previous digit + for(int i = 1; i < str.length() && count < out.length ; i++) { + char ch = str.charAt(i); + if (ch == 'H' || ch == 'W') { // these are ignored completely + continue; + } + char digit = map(ch); + if (digit != '0' && digit != lastDigit) { // don't store vowels or repeats + out[count++] = digit; } + lastDigit = digit; } return new String(out); }