ACCUMULO-375 tweaked split order and staticified factory code for performance
git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241957 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/fa359318 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/fa359318 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/fa359318 Branch: refs/heads/master Commit: fa35931803c83151406739014446cb954382d50f Parents: dfe26ba Author: Adam Fuchs <afu...@apache.org> Authored: Wed Feb 8 16:10:29 2012 +0000 Committer: Adam Fuchs <afu...@apache.org> Committed: Wed Feb 8 16:10:29 2012 +0000 ---------------------------------------------------------------------- .../examples/wikisearch/ingest/ArticleExtractor.java | 9 +++++++-- .../examples/wikisearch/ingest/WikipediaInputFormat.java | 6 +++--- 2 files changed, 10 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/fa359318/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java index a3dcf8d..0699cfa 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java @@ -124,9 +124,14 @@ public class ArticleExtractor { public ArticleExtractor() {} - public Article extract(Reader reader) { - XMLInputFactory xmlif = XMLInputFactory.newInstance(); + private static XMLInputFactory xmlif = XMLInputFactory.newInstance(); + + static + { xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE); + } + + public Article extract(Reader reader) { XMLStreamReader xmlr = null; http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/fa359318/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java index dd2eeb9..731d02c 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java @@ -116,11 +116,11 @@ public class WikipediaInputFormat extends TextInputFormat { int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration()); - for(InputSplit split:superSplits) + for(int group = 0; group < numGroups; group++) { - FileSplit fileSplit = (FileSplit)split; - for(int group = 0; group < numGroups; group++) + for(InputSplit split:superSplits) { + FileSplit fileSplit = (FileSplit)split; splits.add(new WikipediaInputSplit(fileSplit,group)); } }