Updated Branches:
  refs/heads/1.4.5-SNAPSHOT [created] e15859054

ACCUMULO-354 added boolean instead of null to detect presence of next value

git-svn-id: 
https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1238696 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/b9cf2945
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/b9cf2945
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/b9cf2945

Branch: refs/heads/1.4.5-SNAPSHOT
Commit: b9cf2945ee33ace0726298601462d02b0e226190
Parents: 72fbb54
Author: Billie Rinaldi <bil...@apache.org>
Authored: Tue Jan 31 16:53:40 2012 +0000
Committer: Billie Rinaldi <bil...@apache.org>
Committed: Tue Jan 31 16:53:40 2012 +0000

----------------------------------------------------------------------
 .../wikisearch/ingest/WikipediaIngester.java    |  3 ++
 .../wikisearch/iterator/TextIndexTest.java      | 43 ++++++++++++++++++++
 2 files changed, 46 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
----------------------------------------------------------------------
diff --git 
a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
 
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
index 31c8472..50415a7 100644
--- 
a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
+++ 
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
@@ -88,6 +88,7 @@ public class WikipediaIngester extends Configured implements 
Tool {
           columns.add(new Column("fi\0" + family));
         }
         TextIndexCombiner.setColumns(setting, columns);
+        TextIndexCombiner.setLossyness(setting, true);
         
         tops.attachIterator(tableName, setting, 
EnumSet.allOf(IteratorScope.class));
       }
@@ -102,6 +103,7 @@ public class WikipediaIngester extends Configured 
implements Tool {
       // Add the UID combiner
       IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", 
GlobalIndexUidCombiner.class);
       GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      GlobalIndexUidCombiner.setLossyness(setting, true);
       tops.attachIterator(indexTableName, setting, 
EnumSet.allOf(IteratorScope.class));
     }
     
@@ -110,6 +112,7 @@ public class WikipediaIngester extends Configured 
implements Tool {
       // Add the UID combiner
       IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", 
GlobalIndexUidCombiner.class);
       GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      GlobalIndexUidCombiner.setLossyness(setting, true);
       tops.attachIterator(reverseIndexTableName, setting, 
EnumSet.allOf(IteratorScope.class));
     }
     

http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/b9cf2945/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
----------------------------------------------------------------------
diff --git 
a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
 
b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
index 22ef9aa..7297b5a 100644
--- 
a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
+++ 
b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
@@ -139,4 +139,47 @@ public class TextIndexTest {
     Assert.assertTrue(offsets.get(4) == 15);
     Assert.assertTrue(offsets.get(5) == 19);
   }
+  
+  @Test
+  public void testEmptyValue() throws InvalidProtocolBufferException {
+    Builder builder = createBuilder();
+    builder.addWordOffset(13);
+    builder.addWordOffset(15);
+    builder.addWordOffset(19);
+    builder.setNormalizedTermFrequency(0.12f);
+    
+    values.add(new Value("".getBytes()));
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    builder = createBuilder();
+    builder.addWordOffset(1);
+    builder.addWordOffset(5);
+    builder.setNormalizedTermFrequency(0.1f);
+    
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    builder = createBuilder();
+    builder.addWordOffset(3);
+    builder.setNormalizedTermFrequency(0.05f);
+    
+    values.add(new Value(builder.build().toByteArray()));
+    values.add(new Value("".getBytes()));
+    
+    Value result = combiner.reduce(new Key(), values.iterator());
+    
+    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
+    
+    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
+    
+    List<Integer> offsets = info.getWordOffsetList();
+    Assert.assertTrue(offsets.size() == 6);
+    Assert.assertTrue(offsets.get(0) == 1);
+    Assert.assertTrue(offsets.get(1) == 3);
+    Assert.assertTrue(offsets.get(2) == 5);
+    Assert.assertTrue(offsets.get(3) == 13);
+    Assert.assertTrue(offsets.get(4) == 15);
+    Assert.assertTrue(offsets.get(5) == 19);
+  }
 }

Reply via email to