[
https://issues.apache.org/jira/browse/OPENNLP-1416?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17649737#comment-17649737
]
ASF GitHub Bot commented on OPENNLP-1416:
-----------------------------------------
mawiesne commented on code in PR #461:
URL: https://github.com/apache/opennlp/pull/461#discussion_r1053282816
##########
opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java:
##########
@@ -154,61 +153,49 @@ public class ADNameSampleStream implements
ObjectStream<NameSample> {
private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
- /**
+ /*
* To keep the last left contraction part
*/
private String leftContractionPart = null;
private final boolean splitHyphenatedTokens;
/**
- * Creates a new {@link NameSample} stream from a line stream, i.e.
- * {@link ObjectStream}<{@link String}>, that could be a
- * {@link PlainTextByLineStream} object.
+ * Initializes a new {@link ADNameSampleStream} stream from a {@link
ObjectStream<String>},
+ * that could be a {@link PlainTextByLineStream} object.
*
- * @param lineStream
- * a stream of lines as {@link String}
- * @param splitHyphenatedTokens
- * if true hyphenated tokens will be separated: "carros-monstro"
>
- * "carros" "-" "monstro"
+ * @param lineStream An {@link ObjectStream<String>} as input.
+ * @param splitHyphenatedTokens If {@code true} hyphenated tokens will be
separated:
+ * "carros-monstro" > "carros" "-" "monstro".
*/
public ADNameSampleStream(ObjectStream<String> lineStream, boolean
splitHyphenatedTokens) {
this.adSentenceStream = new ADSentenceStream(lineStream);
this.splitHyphenatedTokens = splitHyphenatedTokens;
}
/**
- * Creates a new {@link NameSample} stream from a {@link InputStream}
+ * Initializes a new {@link ADNameSampleStream} from an {@link
InputStreamFactory}
*
- * @param in
- * the Corpus {@link InputStream}
- * @param charsetName
- * the charset of the Arvores Deitadas Corpus
- * @param splitHyphenatedTokens
- * if true hyphenated tokens will be separated: "carros-monstro"
>
- * "carros" "-" "monstro"
+ * @param in The Corpus {@link InputStreamFactory}.
+ * @param charsetName The {@link java.nio.charset.Charset charset} to use
+ * for reading of the corpus.
+ * @param splitHyphenatedTokens If {@code true} hyphenated tokens will be
separated:
+ * "carros-monstro" > "carros" "-" "monstro".
*/
@Deprecated
public ADNameSampleStream(InputStreamFactory in, String charsetName,
boolean splitHyphenatedTokens) throws IOException {
-
- try {
- this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
- in, charsetName));
- this.splitHyphenatedTokens = splitHyphenatedTokens;
- } catch (UnsupportedEncodingException e) {
- // UTF-8 is available on all JVMs, will never happen
- throw new IllegalStateException(e);
- }
+ this(new PlainTextByLineStream(in, charsetName), splitHyphenatedTokens);
}
private int textID = -1;
+ @Override
public NameSample read() throws IOException {
Sentence paragraph;
// we should look for text here.
- while ((paragraph = this.adSentenceStream.read()) != null) {
+ if ((paragraph = this.adSentenceStream.read()) != null) {
Review Comment:
The first element read is being directly returned. Therefore `while` made no
sense here.
> Enhance JavaDoc in opennlp.tools.formats.ad package
> ---------------------------------------------------
>
> Key: OPENNLP-1416
> URL: https://issues.apache.org/jira/browse/OPENNLP-1416
> Project: OpenNLP
> Issue Type: Improvement
> Components: Formats
> Affects Versions: 2.1.0
> Reporter: Martin Wiesner
> Assignee: Martin Wiesner
> Priority: Minor
> Fix For: 2.1.1
>
>
> The JavaDoc the _opennlp.tools.formats.ad_ package suffers from several
> inconsistencies and missing descriptions. Moreover, several typos are present
> that need sanitizing.
> It needs enhancements and/or additions to provide more clarity for readers.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)