Merge branch 'pr/4' TEXT-20: Add salutations like Mr, Mrs, etc. This closes #4 from GitHub. Thanks to Tom Mackenzie.
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6fd10f89 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6fd10f89 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6fd10f89 Branch: refs/heads/master Commit: 6fd10f89aaa3870d91368979e4f8a32ebcfc1049 Parents: ebb2a92 9e84145 Author: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Authored: Sun Nov 20 19:52:56 2016 +1300 Committer: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Committed: Sun Nov 20 19:52:56 2016 +1300 ---------------------------------------------------------------------- .../commons/text/names/HumanNameParser.java | 17 ++++- .../org/apache/commons/text/names/Name.java | 13 +++- .../commons/text/names/HumanNameParserTest.java | 6 +- .../org/apache/commons/text/names/testNames.txt | 65 ++++++++++---------- 4 files changed, 64 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/main/java/org/apache/commons/text/names/HumanNameParser.java ---------------------------------------------------------------------- diff --cc src/main/java/org/apache/commons/text/names/HumanNameParser.java index 5718ba9,fd1608a..1348313 --- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java +++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java @@@ -100,13 -100,8 +100,17 @@@ import org.apache.commons.lang3.StringU */ public final class HumanNameParser { - private final List<String> salutations; + /** + * List of suffixes. Not exposed to users or children classes. + */ private final List<String> suffixes; + /** ++ * List of salutations. Not exposed to users or children classes. ++ */ ++ private final List<String> salutations; ++ /** + * List of prefixes. Not exposed to users or children classes. + */ private final List<String> prefixes; /** @@@ -132,30 -130,34 +139,34 @@@ * @throws NullPointerException if name is null. * @return The name object */ - public Name parse(String name) { + public Name parse(final String name) { Objects.requireNonNull(name, "Parameter 'name' must not be null."); - NameString nameString = new NameString(name); + final NameString nameString = new NameString(name); // TODO compile regexes only once when the parser is created - String salutations = StringUtils.join(this.salutations, " |") + ""; - String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*"; - String prefixes = StringUtils.join(this.prefixes, " |") + " "; + final String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*"; + final String prefixes = StringUtils.join(this.prefixes, " |") + " "; ++ final String salutations = StringUtils.join(this.salutations, " |") + " "; // The regex use is a bit tricky. *Everything* matched by the regex will be replaced, // but you can select a particular parenthesized submatch to be returned. // Also, note that each regex requres that the preceding ones have been run, and matches chopped out. // names that starts or end w/ an apostrophe break this - String salutationRegex = "^(?i)(("+salutations+")\\.)"; - String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\)) "; - String suffixRegex = "(?i),* *((" + suffixes + ")$)"; - String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$"; + final String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\)) "; + final String suffixRegex = "(?i),* *((" + suffixes + ")$)"; + final String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$"; ++ final String salutationRegex = "^(?i)(("+salutations+")\\.)"; // note the lookahead, which isn't returned or replaced - String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))"; - String firstRegex = "(?i)^([^ ]+)"; + final String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))"; + final String firstRegex = "(?i)^([^ ]+)"; + String salutation = nameString.chopWithRegex(salutationRegex, 1); + // get nickname, if there is one - String nickname = nameString.chopWithRegex(nicknamesRegex, 2); + final String nickname = nameString.chopWithRegex(nicknamesRegex, 2); // get suffix, if there is one - String suffix = nameString.chopWithRegex(suffixRegex, 1); + final String suffix = nameString.chopWithRegex(suffixRegex, 1); // flip the before-comma and after-comma parts of the name nameString.flip(","); http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/main/java/org/apache/commons/text/names/Name.java ---------------------------------------------------------------------- diff --cc src/main/java/org/apache/commons/text/names/Name.java index 6545b84,ef3d36a..8b6f267 --- a/src/main/java/org/apache/commons/text/names/Name.java +++ b/src/main/java/org/apache/commons/text/names/Name.java @@@ -25,43 -25,17 +25,49 @@@ import java.util.Objects */ public final class Name { + /** + * Leading initial. e.g. <em>F.</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String leadingInitial; + /** ++ * Salutation. e.g. <em>Dr.</em>, as in <em>Dr. Jekyll</em>, or <em>Mr.</em>, as in <em>Mr. Hyde</em/>. ++ */ + private final String salutation; ++ /** + * The first name, e.g. <em>Francisco</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String firstName; + /** + * The nickname, e.g. <em>Chico</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String nickName; + /** + * The middle name, e.g. <em>Silva</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String middleName; + /** + * The last name, e.g. <em>Zhao</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String lastName; + /** + * The suffix, e.g. <em>II</em>, as in <em>Francisco ('Chico') Silva Zhao II</em>. + */ private final String suffix; - Name(String leadingInitial, String salutation, String firstName, String nickName, String middleName, String lastName, String suffix) { + /** + * Create a Name. + * + * @param leadingInitial the leading initial ++ * @param salutation the salutation + * @param firstName the first name + * @param nickName the nickname + * @param middleName the middle name + * @param lastName the last name + * @param suffix a suffix + */ - Name(final String leadingInitial, final String firstName, final String nickName, final String middleName, final String lastName, final String suffix) { ++ Name(final String leadingInitial, final String salutation, final String firstName, final String nickName, final String middleName, final String lastName, final String suffix) { this.leadingInitial = leadingInitial; + this.salutation = salutation; this.firstName = firstName; this.nickName = nickName; this.middleName = middleName; http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java ---------------------------------------------------------------------- diff --cc src/test/java/org/apache/commons/text/names/HumanNameParserTest.java index f473206,22c96cc..1dd6085 --- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java +++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java @@@ -70,10 -70,14 +70,14 @@@ public class HumanNameParserTest * * @param record a CSVRecord representing one record in the input file. */ - private void validateRecord(CSVRecord record) { - Name result = nameParser.parse(record.get(Columns.Name)); + private void validateRecord(final CSVRecord record) { + final Name result = nameParser.parse(record.get(Columns.Name)); - long recordNum = record.getRecordNumber(); + final long recordNum = record.getRecordNumber(); + - assertThat("Wrong LeadingInit in record " + recordNum, ++ assertThat("Wrong Salutation in record " + recordNum, + result.getSalutation(), equalTo(record.get(Columns.Salutation))); + assertThat("Wrong LeadingInit in record " + recordNum, result.getLeadingInitial(), equalTo(record.get(Columns.LeadingInit)));