Updated Branches: refs/heads/1.4.5-SNAPSHOT [created] e15859054
ACCUMULO-354 added boolean instead of null to detect presence of next value git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1238696 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/b9cf2945 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/b9cf2945 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/b9cf2945 Branch: refs/heads/1.4.5-SNAPSHOT Commit: b9cf2945ee33ace0726298601462d02b0e226190 Parents: 72fbb54 Author: Billie Rinaldi <bil...@apache.org> Authored: Tue Jan 31 16:53:40 2012 +0000 Committer: Billie Rinaldi <bil...@apache.org> Committed: Tue Jan 31 16:53:40 2012 +0000 ---------------------------------------------------------------------- .../wikisearch/ingest/WikipediaIngester.java | 3 ++ .../wikisearch/iterator/TextIndexTest.java | 43 ++++++++++++++++++++ 2 files changed, 46 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java index 31c8472..50415a7 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java @@ -88,6 +88,7 @@ public class WikipediaIngester extends Configured implements Tool { columns.add(new Column("fi\0" + family)); } TextIndexCombiner.setColumns(setting, columns); + TextIndexCombiner.setLossyness(setting, true); tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class)); } @@ -102,6 +103,7 @@ public class WikipediaIngester extends Configured implements Tool { // Add the UID combiner IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); + GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class)); } @@ -110,6 +112,7 @@ public class WikipediaIngester extends Configured implements Tool { // Add the UID combiner IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); + GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class)); } http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java ---------------------------------------------------------------------- diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java index 22ef9aa..7297b5a 100644 --- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java +++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java @@ -139,4 +139,47 @@ public class TextIndexTest { Assert.assertTrue(offsets.get(4) == 15); Assert.assertTrue(offsets.get(5) == 19); } + + @Test + public void testEmptyValue() throws InvalidProtocolBufferException { + Builder builder = createBuilder(); + builder.addWordOffset(13); + builder.addWordOffset(15); + builder.addWordOffset(19); + builder.setNormalizedTermFrequency(0.12f); + + values.add(new Value("".getBytes())); + values.add(new Value(builder.build().toByteArray())); + values.add(new Value("".getBytes())); + + builder = createBuilder(); + builder.addWordOffset(1); + builder.addWordOffset(5); + builder.setNormalizedTermFrequency(0.1f); + + values.add(new Value(builder.build().toByteArray())); + values.add(new Value("".getBytes())); + + builder = createBuilder(); + builder.addWordOffset(3); + builder.setNormalizedTermFrequency(0.05f); + + values.add(new Value(builder.build().toByteArray())); + values.add(new Value("".getBytes())); + + Value result = combiner.reduce(new Key(), values.iterator()); + + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); + + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); + + List<Integer> offsets = info.getWordOffsetList(); + Assert.assertTrue(offsets.size() == 6); + Assert.assertTrue(offsets.get(0) == 1); + Assert.assertTrue(offsets.get(1) == 3); + Assert.assertTrue(offsets.get(2) == 5); + Assert.assertTrue(offsets.get(3) == 13); + Assert.assertTrue(offsets.get(4) == 15); + Assert.assertTrue(offsets.get(5) == 19); + } }