KYLIN-1636 fix compile

Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/dc1c2f66
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/dc1c2f66
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/dc1c2f66

Branch: refs/heads/1.5.x-HBase1.x
Commit: dc1c2f66ec5fa9bdfd965ff023b26a357c9d1cd9
Parents: 5c3990e
Author: Yang Li <liy...@apache.org>
Authored: Sun May 1 19:38:34 2016 +0800
Committer: Yang Li <liy...@apache.org>
Committed: Sun May 1 19:38:34 2016 +0800

----------------------------------------------------------------------
 .../gridtable/benchmark/GTScannerBenchmark.java | 217 ++++++++++++++++
 .../benchmark/GTScannerBenchmark2.java          | 241 ++++++++++++++++++
 .../benchmark/SortedGTRecordGenerator.java      | 245 +++++++++++++++++++
 3 files changed, 703 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/dc1c2f66/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark.java
 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark.java
new file mode 100644
index 0000000..302258b
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.gridtable.benchmark;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.ImmutableBitSet;
+import org.apache.kylin.gridtable.GTInfo;
+import org.apache.kylin.gridtable.GTRecord;
+import org.apache.kylin.gridtable.GTSampleCodeSystem;
+import org.apache.kylin.gridtable.GTScanRequest;
+import org.apache.kylin.gridtable.IGTScanner;
+import org.apache.kylin.gridtable.GTInfo.Builder;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Benchmark of processing 10 million GTRecords. 5 dimensions of type int4, 
and 2 measures of type long8.
+ */
+public class GTScannerBenchmark {
+
+    final GTInfo info;
+    final SortedGTRecordGenerator gen;
+
+    final ImmutableBitSet dimensions = ImmutableBitSet.valueOf(0, 1, 2, 3, 4);
+    final ImmutableBitSet metrics = ImmutableBitSet.valueOf(5, 6);
+    final String[] aggrFuncs = new String[] { "SUM", "SUM" };
+
+    final long N = 10000000; // 10M
+    final long genTime;
+
+    public GTScannerBenchmark() {
+        Builder builder = GTInfo.builder();
+        builder.setCodeSystem(new GTSampleCodeSystem());
+        DataType tint = DataType.getType("int4");
+        DataType tlong = DataType.getType("long8");
+        builder.setColumns(tint, tint, tint, tint, tint, tlong, tlong);
+        builder.setPrimaryKey(ImmutableBitSet.valueOf(0, 1, 2, 3, 4));
+        info = builder.build();
+
+        gen = new SortedGTRecordGenerator(info);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(100, 4, null);
+        gen.addMeasure(8);
+        gen.addMeasure(8);
+
+        // warm up
+        long t = System.currentTimeMillis();
+        testGenerate();
+        genTime = System.currentTimeMillis() - t;
+    }
+
+    @SuppressWarnings("unused")
+    public void testGenerate() {
+        long count = 0;
+        for (GTRecord rec : gen.generate(N)) {
+            count++;
+        }
+    }
+
+    @Test
+    public void testAggregate2() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1));
+    }
+
+    @Test
+    public void testAggregate2_() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 2));
+    }
+
+    @Test
+    public void testAggregate4() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1, 2, 3));
+    }
+
+    @Test
+    public void testAggregate5() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1, 2, 3, 4));
+    }
+
+    @SuppressWarnings("unused")
+    private void testAggregate(ImmutableBitSet groupBy) throws IOException {
+        long t = System.currentTimeMillis();
+        GTScanRequest req = new GTScanRequest(info, null, dimensions, groupBy, 
metrics, aggrFuncs, null, true, 10);
+        IGTScanner scanner = req.decorateScanner(gen.generate(N));
+
+        long count = 0;
+        for (GTRecord rec : scanner) {
+            count++;
+        }
+
+        t = System.currentTimeMillis() - t;
+        System.out.println(N + " records aggregated to " + count + ", " + 
calcSpeed(t) + "K rec/sec");
+    }
+
+    private int calcSpeed(long t) {
+        double sec = (double) (t - genTime) / 1000;
+        return (int) (N / sec / 1000);
+    }
+
+    @Test
+    public void testFilter1() throws IOException {
+        testFilter(eq(col(1), 1, 5, 7));
+    }
+
+    @Test
+    public void testFilter2() throws IOException {
+        testFilter( //
+                and( //
+                        gt(col(0), 5), //
+                        eq(col(2), 2, 4)));
+    }
+
+    @Test
+    public void testFilter3() throws IOException {
+        testFilter( //
+                and( //
+                        gt(col(0), 2), //
+                        eq(col(4), 1, 3, 5, 9, 12, 14, 23, 43, 52, 78, 92), //
+                        or( //
+                                eq(col(1), 2, 4), //
+                                eq(col(2), 2, 4, 5, 9))));
+    }
+
+    @SuppressWarnings("unused")
+    private void testFilter(TupleFilter filter) throws IOException {
+        long t = System.currentTimeMillis();
+        GTScanRequest req = new GTScanRequest(info, null, 
info.getAllColumns(), filter);
+        IGTScanner scanner = req.decorateScanner(gen.generate(N));
+
+        long count = 0;
+        for (GTRecord rec : scanner) {
+            count++;
+        }
+        
+        t = System.currentTimeMillis() - t;
+        System.out.println(N + " records filtered to " + count + ", " + 
calcSpeed(t) + "K rec/sec");
+    }
+
+    private LogicalTupleFilter and(TupleFilter... filters) {
+        return logical(FilterOperatorEnum.AND, filters);
+    }
+
+    private LogicalTupleFilter or(TupleFilter... filters) {
+        return logical(FilterOperatorEnum.OR, filters);
+    }
+
+    private LogicalTupleFilter logical(FilterOperatorEnum op, TupleFilter[] 
filters) {
+        LogicalTupleFilter r = new LogicalTupleFilter(op);
+        for (TupleFilter f : filters)
+            r.addChild(f);
+        return r;
+    }
+
+    private CompareTupleFilter gt(ColumnTupleFilter col, int v) {
+        CompareTupleFilter r = new CompareTupleFilter(FilterOperatorEnum.GT);
+        r.addChild(col);
+
+        int c = col.getColumn().getColumnDesc().getZeroBasedIndex();
+        int len = info.getCodeSystem().maxCodeLength(c);
+        ByteArray bytes = new ByteArray(len);
+        BytesUtil.writeLong(v, bytes.array(), bytes.offset(), len);
+        r.addChild(new ConstantTupleFilter(bytes));
+
+        return r;
+    }
+
+    private CompareTupleFilter eq(ColumnTupleFilter col, int... values) {
+        CompareTupleFilter r = new CompareTupleFilter(FilterOperatorEnum.IN);
+        r.addChild(col);
+
+        List<ByteArray> list = Lists.newArrayList();
+        for (int v : values) {
+            int c = col.getColumn().getColumnDesc().getZeroBasedIndex();
+            int len = info.getCodeSystem().maxCodeLength(c);
+            ByteArray bytes = new ByteArray(len);
+            BytesUtil.writeLong(v, bytes.array(), bytes.offset(), len);
+            list.add(bytes);
+        }
+        r.addChild(new ConstantTupleFilter(list));
+        return r;
+    }
+
+    private ColumnTupleFilter col(int i) {
+        return new ColumnTupleFilter(info.colRef(i));
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/dc1c2f66/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
new file mode 100644
index 0000000..1d4a2af
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/GTScannerBenchmark2.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.gridtable.benchmark;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.common.util.ImmutableBitSet;
+import org.apache.kylin.gridtable.GTInfo;
+import org.apache.kylin.gridtable.GTRecord;
+import org.apache.kylin.gridtable.GTSampleCodeSystem;
+import org.apache.kylin.gridtable.GTScanRequest;
+import org.apache.kylin.gridtable.IGTScanner;
+import org.apache.kylin.gridtable.GTInfo.Builder;
+import org.apache.kylin.gridtable.benchmark.SortedGTRecordGenerator.Randomizer;
+import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.apache.kylin.metadata.filter.ColumnTupleFilter;
+import org.apache.kylin.metadata.filter.CompareTupleFilter;
+import org.apache.kylin.metadata.filter.ConstantTupleFilter;
+import org.apache.kylin.metadata.filter.LogicalTupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter;
+import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Benchmark of processing 10 million GTRecords. 5 dimensions of type int4, 
and 2 measures of type long8.
+ * 
+ * All the same as GTScannerBenchmark except for the last measure is 
single-value HLLC
+ */
+@Ignore
+public class GTScannerBenchmark2 {
+
+    final GTInfo info;
+    final SortedGTRecordGenerator gen;
+
+    final ImmutableBitSet dimensions = ImmutableBitSet.valueOf(0, 1, 2, 3, 4);
+    final ImmutableBitSet metrics = ImmutableBitSet.valueOf(5, 6);
+    final String[] aggrFuncs = new String[] { "SUM", "COUNT_DISTINCT" };
+
+    final long N = 10000000; // 10M
+    final long genTime;
+
+    public GTScannerBenchmark2() {
+        Builder builder = GTInfo.builder();
+        builder.setCodeSystem(new GTSampleCodeSystem());
+        DataType tint = DataType.getType("int4");
+        DataType tlong = DataType.getType("long8");
+        DataType thllc = DataType.getType("hllc(12)");
+        builder.setColumns(tint, tint, tint, tint, tint, tlong, thllc);
+        builder.setPrimaryKey(ImmutableBitSet.valueOf(0, 1, 2, 3, 4));
+        info = builder.build();
+
+        gen = new SortedGTRecordGenerator(info);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(10, 4, null);
+        gen.addDimension(100, 4, null);
+        gen.addMeasure(8);
+        gen.addMeasure(8, new Randomizer() {
+            HyperLogLogPlusCounter hllc = new HyperLogLogPlusCounter(12);
+            
+            @Override
+            public int fillRandom(Random rand, byte[] array, int offset) {
+                try {
+                    hllc.clear();
+                    hllc.add(rand.nextInt());
+                    ByteBuffer wrap = ByteBuffer.wrap(array, offset, 
array.length - offset);
+                    hllc.writeRegisters(wrap);
+                    return wrap.position() - offset;
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        });
+
+        // warm up
+        long t = System.currentTimeMillis();
+        testGenerate();
+        genTime = System.currentTimeMillis() - t;
+    }
+
+    @SuppressWarnings("unused")
+    public void testGenerate() {
+        long count = 0;
+        for (GTRecord rec : gen.generate(N)) {
+            count++;
+        }
+    }
+
+    @Test
+    public void testAggregate2() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1));
+    }
+
+    @Test
+    public void testAggregate2_() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 2));
+    }
+
+    @Test
+    public void testAggregate4() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1, 2, 3));
+    }
+
+    @Test
+    public void testAggregate5() throws IOException {
+        testAggregate(ImmutableBitSet.valueOf(0, 1, 2, 3, 4));
+    }
+
+    @SuppressWarnings("unused")
+    private void testAggregate(ImmutableBitSet groupBy) throws IOException {
+        long t = System.currentTimeMillis();
+        GTScanRequest req = new GTScanRequest(info, null, dimensions, groupBy, 
metrics, aggrFuncs, null, true, 10);
+        IGTScanner scanner = req.decorateScanner(gen.generate(N));
+
+        long count = 0;
+        for (GTRecord rec : scanner) {
+            count++;
+        }
+
+        t = System.currentTimeMillis() - t;
+        System.out.println(N + " records aggregated to " + count + ", " + 
calcSpeed(t) + "K rec/sec");
+    }
+
+    private int calcSpeed(long t) {
+        double sec = (double) (t - genTime) / 1000;
+        return (int) (N / sec / 1000);
+    }
+
+    @Test
+    public void testFilter1() throws IOException {
+        testFilter(eq(col(1), 1, 5, 7));
+    }
+
+    @Test
+    public void testFilter2() throws IOException {
+        testFilter( //
+                and( //
+                        gt(col(0), 5), //
+                        eq(col(2), 2, 4)));
+    }
+
+    @Test
+    public void testFilter3() throws IOException {
+        testFilter( //
+                and( //
+                        gt(col(0), 2), //
+                        eq(col(4), 1, 3, 5, 9, 12, 14, 23, 43, 52, 78, 92), //
+                        or( //
+                                eq(col(1), 2, 4), //
+                                eq(col(2), 2, 4, 5, 9))));
+    }
+
+    @SuppressWarnings("unused")
+    private void testFilter(TupleFilter filter) throws IOException {
+        long t = System.currentTimeMillis();
+        GTScanRequest req = new GTScanRequest(info, null, 
info.getAllColumns(), filter);
+        IGTScanner scanner = req.decorateScanner(gen.generate(N));
+
+        long count = 0;
+        for (GTRecord rec : scanner) {
+            count++;
+        }
+        
+        t = System.currentTimeMillis() - t;
+        System.out.println(N + " records filtered to " + count + ", " + 
calcSpeed(t) + "K rec/sec");
+    }
+
+    private LogicalTupleFilter and(TupleFilter... filters) {
+        return logical(FilterOperatorEnum.AND, filters);
+    }
+
+    private LogicalTupleFilter or(TupleFilter... filters) {
+        return logical(FilterOperatorEnum.OR, filters);
+    }
+
+    private LogicalTupleFilter logical(FilterOperatorEnum op, TupleFilter[] 
filters) {
+        LogicalTupleFilter r = new LogicalTupleFilter(op);
+        for (TupleFilter f : filters)
+            r.addChild(f);
+        return r;
+    }
+
+    private CompareTupleFilter gt(ColumnTupleFilter col, int v) {
+        CompareTupleFilter r = new CompareTupleFilter(FilterOperatorEnum.GT);
+        r.addChild(col);
+
+        int c = col.getColumn().getColumnDesc().getZeroBasedIndex();
+        int len = info.getCodeSystem().maxCodeLength(c);
+        ByteArray bytes = new ByteArray(len);
+        BytesUtil.writeLong(v, bytes.array(), bytes.offset(), len);
+        r.addChild(new ConstantTupleFilter(bytes));
+
+        return r;
+    }
+
+    private CompareTupleFilter eq(ColumnTupleFilter col, int... values) {
+        CompareTupleFilter r = new CompareTupleFilter(FilterOperatorEnum.IN);
+        r.addChild(col);
+
+        List<ByteArray> list = Lists.newArrayList();
+        for (int v : values) {
+            int c = col.getColumn().getColumnDesc().getZeroBasedIndex();
+            int len = info.getCodeSystem().maxCodeLength(c);
+            ByteArray bytes = new ByteArray(len);
+            BytesUtil.writeLong(v, bytes.array(), bytes.offset(), len);
+            list.add(bytes);
+        }
+        r.addChild(new ConstantTupleFilter(list));
+        return r;
+    }
+
+    private ColumnTupleFilter col(int i) {
+        return new ColumnTupleFilter(info.colRef(i));
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/dc1c2f66/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/SortedGTRecordGenerator.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/SortedGTRecordGenerator.java
 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/SortedGTRecordGenerator.java
new file mode 100644
index 0000000..882267e
--- /dev/null
+++ 
b/core-cube/src/main/java/org/apache/kylin/gridtable/benchmark/SortedGTRecordGenerator.java
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.gridtable.benchmark;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+
+import org.apache.kylin.common.util.ByteArray;
+import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.gridtable.GTInfo;
+import org.apache.kylin.gridtable.GTRecord;
+import org.apache.kylin.gridtable.IGTScanner;
+
+import com.google.common.collect.Lists;
+
+public class SortedGTRecordGenerator {
+
+    private GTInfo info;
+    private ArrayList<ColSpec> colSpecs = Lists.newArrayList();
+
+    public SortedGTRecordGenerator(GTInfo info) {
+        this.info = info;
+    }
+
+    public void addDimension(long cardinality, int length, Map<Integer, 
Integer> weights) {
+        assert cardinality > 0;
+        ColSpec spec = new ColSpec();
+        spec.cardinality = cardinality;
+        spec.length = length;
+        spec.weights = weights;
+        colSpecs.add(spec);
+    }
+
+    public void addMeasure(int length) {
+        addMeasure(length, new BytesRandomizer(length));
+    }
+
+    public void addMeasure(int length, Randomizer randomizer) {
+        assert length > 0;
+        ColSpec spec = new ColSpec();
+        spec.length = length;
+        spec.measureRandomizer = randomizer;
+        colSpecs.add(spec);
+    }
+    
+    public IGTScanner generate(long nRows) {
+        validate();
+        return new Generator(nRows);
+    }
+
+    private void validate() {
+        if (info.getColumnCount() != colSpecs.size())
+            throw new IllegalArgumentException();
+        for (int i = 0; i < colSpecs.size(); i++) {
+            ColSpec spec = colSpecs.get(i);
+            if (info.getCodeSystem().maxCodeLength(i) < spec.length)
+                throw new IllegalArgumentException();
+        }
+    }
+
+    private class ColSpec {
+        int length;
+        long cardinality;
+        Map<Integer, Integer> weights;
+        long weightSum;
+        Randomizer measureRandomizer;
+    }
+    
+    public interface Randomizer {
+        int fillRandom(Random rand, byte[] array, int offset);
+    }
+    
+    public static class BytesRandomizer implements Randomizer {
+        final private byte bytes[];
+
+        public BytesRandomizer(int len) {
+            this.bytes = new byte[len];
+        }
+        
+        @Override
+        public int fillRandom(Random rand, byte[] array, int offset) {
+            rand.nextBytes(bytes);
+            System.arraycopy(bytes, 0, array, offset, bytes.length);
+            return bytes.length;
+        }
+    }
+
+    private class Generator implements IGTScanner {
+        final long nRows;
+        final Random rand;
+
+        int counter;
+        Distribution[] dist;
+        GTRecord rec;
+
+        public Generator(long nRows) {
+            this.nRows = nRows;
+            this.rand = new Random();
+
+            rec = new GTRecord(info);
+            dist = new Distribution[colSpecs.size()];
+            for (int i = 0; i < colSpecs.size(); i++) {
+                ColSpec spec = colSpecs.get(i);
+                rec.set(i, new ByteArray(spec.length));
+                dist[i] = new Distribution(spec, 0);
+            }
+
+        }
+
+        @Override
+        public Iterator<GTRecord> iterator() {
+            return new Iterator<GTRecord>() {
+
+                @Override
+                public boolean hasNext() {
+                    return counter < nRows;
+                }
+
+                @Override
+                public GTRecord next() {
+                    for (int i = 0; i < colSpecs.size(); i++) {
+                        ColSpec spec = colSpecs.get(i);
+                        // dimension case
+                        if (spec.cardinality > 0) {
+                            long v = dist[i].next();
+                            if (v < 0) {
+                                dist[i] = new Distribution(spec, 
parentLevelCount(i));
+                                v = dist[i].next();
+                            }
+                            ByteArray bytes = rec.get(i);
+                            assert bytes.length() == spec.length;
+                            BytesUtil.writeLong(v, bytes.array(), 
bytes.offset(), bytes.length());
+                        }
+                        // measure case
+                        else {
+                            int len = spec.measureRandomizer.fillRandom(rand, 
rec.get(i).array(), 0);
+                            rec.get(i).setLength(len);
+                        }
+                    }
+                    counter++;
+                    return rec;
+                }
+
+                private long parentLevelCount(int i) {
+                    if (i == 0)
+                        return nRows;
+                    else
+                        return dist[i - 1].leftRowsForCurValue + 1;
+                }
+
+                @Override
+                public void remove() {
+                    throw new UnsupportedOperationException();
+                }
+            };
+        }
+
+        @Override
+        public void close() throws IOException {
+        }
+
+        @Override
+        public GTInfo getInfo() {
+            return info;
+        }
+
+        @Override
+        public int getScannedRowCount() {
+            return counter;
+        }
+
+    }
+
+    private class Distribution {
+        ColSpec spec;
+        long nRows;
+        long leftRows;
+        long leftRowsForCurValue;
+        int curValue;
+
+        public Distribution(ColSpec spec, long nRows) {
+            assert spec.cardinality > 0;
+
+            this.spec = spec;
+            this.nRows = nRows;
+            this.leftRows = nRows;
+            this.leftRowsForCurValue = 0;
+            this.curValue = -1;
+
+            if (spec.weightSum == 0) {
+                spec.weightSum = spec.cardinality; // all value is weight 1 by 
default
+                if (spec.weights != null) {
+                    for (Entry<Integer, Integer> entry : 
spec.weights.entrySet()) {
+                        spec.weightSum += entry.getValue() - 1;
+                    }
+                }
+            }
+        }
+
+        private long weight(int v) {
+            if (spec.weights != null && spec.weights.containsKey(v))
+                return spec.weights.get(v);
+            else
+                return 1;
+        }
+
+        public long next() {
+            if (leftRows == 0)
+                return -1;
+
+            if (leftRowsForCurValue == 0 && curValue < spec.cardinality - 1) {
+                curValue++;
+                if (curValue == spec.cardinality - 1)
+                    leftRowsForCurValue = leftRows;
+                else
+                    leftRowsForCurValue = (long) (nRows * (double) 
weight(curValue) / (double) spec.weightSum);
+            }
+
+            leftRowsForCurValue = Math.max(leftRowsForCurValue - 1, 0);
+            leftRows--;
+            return curValue;
+        }
+    }
+
+}

Reply via email to