This is an automated email from the ASF dual-hosted git repository. jihao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 69f29c3 [TE] Cube algorithm for ratio metrics (#4246) 69f29c3 is described below commit 69f29c3c7baff354613ddccc16859ff5c6f9d368 Author: Yen-Jung Chang <cyenj...@utexas.edu> AuthorDate: Mon Jun 3 12:09:06 2019 -0700 [TE] Cube algorithm for ratio metrics (#4246) - Implement ratio cube related classes. - Add new unit tests for ratio cube. - Tests --- .../thirdeye/cube/additive/AdditiveCubeNode.java | 53 ++-- .../thirdeye/cube/additive/AdditiveDBClient.java | 12 +- .../pinot/thirdeye/cube/additive/AdditiveRow.java | 7 +- .../pinot/thirdeye/cube/cost/CostFunction.java | 1 + .../thirdeye/cube/cost/RatioCostFunction.java | 136 ++++++++++ .../apache/pinot/thirdeye/cube/data/cube/Cube.java | 3 +- .../pinot/thirdeye/cube/data/cube/CubeUtils.java | 25 ++ .../cube/data/cube/DimNameValueCostEntry.java | 54 ++-- .../cube/data/dbclient/BaseCubePinotClient.java | 12 + .../pinot/thirdeye/cube/data/dbrow/BaseRow.java | 25 ++ .../thirdeye/cube/data/node/BaseCubeNode.java | 49 +++- .../pinot/thirdeye/cube/data/node/CubeNode.java | 12 +- .../thirdeye/cube/data/node/CubeNodeUtils.java | 7 +- .../MultiDimensionalRatioSummary.java} | 76 ++---- .../MultiDimensionalSummary.java | 38 ++- .../MultiDimensionalSummaryCLITool.java | 6 +- .../pinot/thirdeye/cube/entry/SummaryUtils.java | 43 ++++ .../pinot/thirdeye/cube/ratio/RatioCubeNode.java | 285 +++++++++++++++++++++ .../pinot/thirdeye/cube/ratio/RatioDBClient.java | 113 ++++++++ .../apache/pinot/thirdeye/cube/ratio/RatioRow.java | 190 ++++++++++++++ .../thirdeye/cube/summary/BaseResponseRow.java | 7 +- .../pinot/thirdeye/cube/summary/Summary.java | 43 ++-- .../thirdeye/cube/summary/SummaryResponse.java | 33 +-- .../dashboard/resources/SummaryResource.java | 78 +++++- .../MultiDimensionalSummaryCLIToolTest.java | 1 + .../pinot/thirdeye/cube/data/cube/CubeTest.java | 14 +- .../cube/data/cube/DimNameValueCostEntryTest.java | 7 +- .../cube/data/dbrow/DimensionValuesTest.java | 1 - .../thirdeye/cube/data/dbrow/DimensionsTest.java | 1 - .../cube/data/node/AdditiveCubeNodeTest.java | 61 +++++ .../thirdeye/cube/data/node/CubeNodeTest.java | 49 ++-- .../thirdeye/cube/data/node/RatioCubeNodeTest.java | 112 ++++++++ 32 files changed, 1317 insertions(+), 237 deletions(-) diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveCubeNode.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveCubeNode.java index 1b9f85f..7becff1 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveCubeNode.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveCubeNode.java @@ -21,7 +21,7 @@ package org.apache.pinot.thirdeye.cube.additive; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.base.MoreObjects; -import java.util.Objects; +import com.google.common.base.Objects; import org.apache.pinot.thirdeye.cube.data.node.BaseCubeNode; @@ -132,52 +132,55 @@ public class AdditiveCubeNode extends BaseCubeNode<AdditiveCubeNode, AdditiveRow if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { + if (!(o instanceof AdditiveCubeNode)) { + return false; + } + if (!super.equals(o)) { return false; } AdditiveCubeNode that = (AdditiveCubeNode) o; - return getLevel() == that.getLevel() && index == that.index - && Double.compare(that.getBaselineValue(), getBaselineValue()) == 0 - && Double.compare(that.getCurrentValue(), getCurrentValue()) == 0 - && Double.compare(that.getCost(), getCost()) == 0 && Objects.equals(data, that.data); + return Double.compare(that.baselineValue, baselineValue) == 0 + && Double.compare(that.currentValue, currentValue) == 0; } @Override public int hashCode() { - return Objects - .hash(getLevel(), index, getBaselineValue(), getCurrentValue(), getCost(), data); + return Objects.hashCode(super.hashCode(), baselineValue, currentValue); } /** - * The toString method for parent node. We don't invoke parent's toString() to prevent multiple calls of toString to - * their parents. - * - * @return a simple string representation of a parent cube node, which does not toString its parent node recursively. - */ - private String toStringAsParent() { - return MoreObjects.toStringHelper(this).add("level", level).add("index", index).add("baselineValue", baselineValue) - .add("currentValue", currentValue).add("cost", cost).add("data", data).toString(); - } - - /** - * ToString that handles if the given cube node is null, i.e., a root cube node. + * ToString that handles if the given cube node is null, i.e., a root cube node. Moreover, it does not invoke + * parent's toString() to prevent multiple calls of toString to their parents. * * @param node the node to be converted to string. * - * @return a string representation of this node. + * @return a simple string representation of a parent cube node, which does not toString its parent node recursively. */ - private static String toStringAsParent(AdditiveCubeNode node) { + private String toStringAsParent(AdditiveCubeNode node) { if (node == null) { return "null"; } else { - return node.toStringAsParent(); + return MoreObjects.toStringHelper(this) + .add("level", level) + .add("index", index) + .add("baselineValue", baselineValue) + .add("currentValue", currentValue) + .add("cost", cost) + .add("data", data) + .toString(); } } @Override public String toString() { - return MoreObjects.toStringHelper(this).add("level", level).add("index", index).add("baselineValue", baselineValue) - .add("currentValue", currentValue).add("cost", cost).add("data", data).add("parent", toStringAsParent(parent)) + return MoreObjects.toStringHelper(this) + .add("level", level) + .add("index", index) + .add("baselineValue", baselineValue) + .add("currentValue", currentValue) + .add("cost", cost) + .add("data", data) + .add("parent", toStringAsParent(parent)) .toString(); } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveDBClient.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveDBClient.java index 2047801..ba7db58 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveDBClient.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveDBClient.java @@ -30,17 +30,11 @@ import org.apache.pinot.thirdeye.cube.data.dbclient.BaseCubePinotClient; import org.apache.pinot.thirdeye.cube.data.dbclient.CubeSpec; import org.apache.pinot.thirdeye.datasource.cache.QueryCache; + /** - * This class generates query requests to the backend database and retrieve the data for summary algorithm. + * This class generates query requests to the backend database and retrieve the additive metric for summary algorithm. * - * The generated requests are organized the following tree structure: - * Root level by GroupBy dimensions. - * Mid level by "baseline" or "current"; The "baseline" request is ordered before the "current" request. - * Leaf level by metric functions; This level is handled by the request itself, i.e., a request can gather multiple - * metric functions at the same time. - * The generated requests are store in a List. Because of the tree structure, the requests belong to the same - * timeline (baseline or current) are located together. Then, the requests belong to the same GroupBy dimension are - * located together. + * @see org.apache.pinot.thirdeye.cube.data.dbclient.BaseCubePinotClient */ public class AdditiveDBClient extends BaseCubePinotClient<AdditiveRow> { private String metric; diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveRow.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveRow.java index 0172d61..fdac289 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveRow.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/AdditiveRow.java @@ -20,7 +20,6 @@ package org.apache.pinot.thirdeye.cube.additive; import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; import java.util.Objects; import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; @@ -42,8 +41,7 @@ public class AdditiveRow extends BaseRow { * @param dimensionValues the dimension values of this row. */ public AdditiveRow(Dimensions dimensions, DimensionValues dimensionValues) { - this.dimensions = Preconditions.checkNotNull(dimensions); - this.dimensionValues = Preconditions.checkNotNull(dimensionValues); + super(dimensions, dimensionValues); } /** @@ -55,8 +53,7 @@ public class AdditiveRow extends BaseRow { * @param currentValue the current value of this additive metric. */ public AdditiveRow(Dimensions dimensions, DimensionValues dimensionValues, double baselineValue, double currentValue) { - this.dimensions = Preconditions.checkNotNull(dimensions); - this.dimensionValues = Preconditions.checkNotNull(dimensionValues); + super(dimensions, dimensionValues); this.baselineValue = baselineValue; this.currentValue = currentValue; } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/CostFunction.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/CostFunction.java index 75e6392..88df1bd 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/CostFunction.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/CostFunction.java @@ -35,6 +35,7 @@ public interface CostFunction { * * @return the error cost of the current node. */ + // TODO: Change to take as input nodes instead of values double computeCost(double parentChangeRatio, double baselineValue, double currentValue, double baselineSize, double currentSize, double topBaselineValue, double topCurrentValue, double topBaselineSize, double topCurrentSize); diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/RatioCostFunction.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/RatioCostFunction.java new file mode 100644 index 0000000..440956f --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/cost/RatioCostFunction.java @@ -0,0 +1,136 @@ +package org.apache.pinot.thirdeye.cube.cost; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.math.DoubleMath; +import java.util.Map; +import org.apache.pinot.thirdeye.cube.data.cube.CubeUtils; + + +/** + * Calculates the cost for ratio metrics such as O/E ratio, mean, etc. + * The calculation of cost considers change difference, change changeRatio, and node size. + */ + public class RatioCostFunction implements CostFunction { + public static final String SIZE_FACTOR_THRESHOLD_PARAM = "min_size_factor"; + + // The threshold to the contribution to overall changes in percentage + private static double epsilon = 0.00001; + private double minSizeFactor = 0.01d; // 1% + + /** + * Constructs a ratio cost function with default parameters. + */ + public RatioCostFunction() { + } + + /** + * Constructs a ratio cost function with customized parameters. + * + * Available parameters: + * SIZE_FACTOR_THRESHOLD_PARAM -> Double. Any node whose size factor is smaller than this threshold, its cost = 0. + * + * @param params the parameters for this cost function. + */ + public RatioCostFunction(Map<String, String> params) { + if (params.containsKey(SIZE_FACTOR_THRESHOLD_PARAM)) { + String pctThresholdString = params.get(SIZE_FACTOR_THRESHOLD_PARAM); + Preconditions.checkArgument(!Strings.isNullOrEmpty(pctThresholdString)); + this.minSizeFactor = Double.parseDouble(pctThresholdString); + } + } + + /** + * Returns the cost that consider change difference, change changeRatio, and node size. + * + * In brief, this function uses this formula to compute the cost: + * change difference * log(contribution percentage * change changeRatio) + * + * In addition, if a node size to overall data is smaller than 1%, then the cost is always zero. + * + * @param parentChangeRatio the changeRatio between baseline and current value of parent node. + * @param baselineValue the baseline value of the current node. + * @param currentValue the current value of the current node. + * @param baselineSize the size of baseline node. + * @param currentSize the size of current node. + * @param topBaselineValue the baseline value of the top node. + * @param topCurrentValue the current value of the top node. + * @param topBaselineSize the size of baseline data cube . + * @param topCurrentSize the size of current data cube . + * + * @return the cost that consider change difference, change changeRatio, and node size. + */ + @Override + public double computeCost(double parentChangeRatio, double baselineValue, double currentValue, double baselineSize, + double currentSize, double topBaselineValue, double topCurrentValue, double topBaselineSize, + double topCurrentSize) { + + // Contribution is the size of the node + double sizeFactor = (baselineSize + currentSize) / (topBaselineSize + topCurrentSize); + // Ignore <1% nodes + if (DoubleMath.fuzzyCompare(sizeFactor, minSizeFactor, epsilon) < 0) { + return 0d; + } + Preconditions.checkState(DoubleMath.fuzzyCompare(sizeFactor,0, epsilon) >= 0, "Contribution {} is smaller than 0.", sizeFactor); + Preconditions.checkState(DoubleMath.fuzzyCompare(sizeFactor,1, epsilon) <= 0, "Contribution {} is larger than 1", sizeFactor); + // The cost function considers change difference, change changeRatio, and node size (i.e., sizeFactor) + return fillEmptyValuesAndGetError(baselineValue, currentValue, parentChangeRatio, sizeFactor); + } + + /** + * The basic calculation of cost. + * + * @param baselineValue the baseline value of the current node. + * @param currentValue the current value of the current node. + * @param parentRatio parent's change ratio, which is used to produce a virtual change ratio for the node. + * @param sizeFactor the size factor of the node w.r.t. the entire data. + * + * @return the error cost of the given baseline and current value. + */ + private static double error(double baselineValue, double currentValue, double parentRatio, double sizeFactor) { + double expectedBaselineValue = parentRatio * baselineValue; + double expectedRatio = currentValue / expectedBaselineValue; + double weightedExpectedRatio = (expectedRatio - 1) * sizeFactor + 1; + double logExpRatio = Math.log(weightedExpectedRatio); + return (currentValue - expectedBaselineValue) * logExpRatio; + } + + /** + * Calculates the error if either baseline or current value is missing (i.e., value is zero). + * + * @param baseline the baseline value. + * @param currentValue the current value. + * @param parentRatio parent's change ratio, which is used to produce a virtual change ratio for the node. + * @param sizeFactor the size factor of the node w.r.t. the entire data. + * + * @return the error of the given baseline and current value. + */ + private static double errorWithMissingBaselineOrCurrent(double baseline, double currentValue, double parentRatio, + double sizeFactor) { + parentRatio = CubeUtils.ensureChangeRatioDirection(baseline, currentValue, parentRatio); + double logExpRatio = Math.log((parentRatio - 1) * sizeFactor + 1); + return (currentValue - baseline) * logExpRatio; + } + + /** + * Auto fill in baselineValue and currentValue using parentRatio when one of them is zero. + * If baselineValue and currentValue both are zero or parentRatio is not finite, this function returns 0. + */ + private static double fillEmptyValuesAndGetError(double baselineValue, double currentValue, double parentRatio, + double sizeFactor) { + if (Double.compare(0., parentRatio) == 0 || Double.isNaN(parentRatio)) { + parentRatio = 1d; + } + if (Double.compare(0., baselineValue) != 0 && Double.compare(0., currentValue) != 0) { + return error(baselineValue, currentValue, parentRatio, sizeFactor); + } else if (Double.compare(baselineValue, 0d) == 0 || Double.compare(currentValue, 0d) == 0) { + if (Double.compare(0., baselineValue) == 0) { + return errorWithMissingBaselineOrCurrent(0d, currentValue, parentRatio, sizeFactor); + } else { + return errorWithMissingBaselineOrCurrent(baselineValue, 0d, parentRatio, sizeFactor); + } + } else { // baselineValue and currentValue are zeros. Set cost to zero so the node will be naturally aggregated to its parent. + return 0.; + } + } +} diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/Cube.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/Cube.java index dad61b8..2c7fb93 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/Cube.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/Cube.java @@ -323,7 +323,8 @@ public class Cube { // the cube (Ca|Cb) topCurrentSize); costSet.add(new DimNameValueCostEntry(dimensionName, dimensionValue, wowNode.getBaselineValue(), - wowNode.getCurrentValue(), wowNode.getBaselineSize(), wowNode.getCurrentSize(), contributionFactor, cost)); + wowNode.getCurrentValue(), wowNode.changeRatio(), wowNode.getCurrentValue() - wowNode.getBaselineValue(), + wowNode.getBaselineSize(), wowNode.getCurrentSize(), contributionFactor, cost)); } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/CubeUtils.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/CubeUtils.java index 99be81a..9d67d53 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/CubeUtils.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/CubeUtils.java @@ -82,4 +82,29 @@ public class CubeUtils { return ret; } } + + /** + * Flips parent's change ratio if the change ratios of current node and its parent are different. + * + * @param baselineValue the baseline value of a node. + * @param currentValue the current value of a node. + * @param ratio the (parent) ratio to be flipped. + * + * @return the ratio that has the same direction as the change direction of baseline and current value. + */ + public static double ensureChangeRatioDirection(double baselineValue, double currentValue, double ratio) { + // case: value goes down but parent's value goes up + if (DoubleMath.fuzzyCompare(baselineValue, currentValue, epsilon) > 0 && DoubleMath.fuzzyCompare(ratio, 1, epsilon) > 0) { + if (Double.compare(ratio, 2) >= 0) { + ratio = 2d - (ratio - ((long) ratio - 1)); + } else { + ratio = 2d - ratio; + } + // case: value goes up but parent's value goes down + } else if (DoubleMath.fuzzyCompare(baselineValue, currentValue, epsilon) < 0 && DoubleMath.fuzzyCompare(ratio, 1, epsilon) < 0) { + ratio = 2d - ratio; + } + // return the original ratio for other cases. + return ratio; + } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntry.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntry.java index 3cbd24e..ece5d33 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntry.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntry.java @@ -25,15 +25,17 @@ import com.google.common.base.Preconditions; public class DimNameValueCostEntry implements Comparable<DimNameValueCostEntry>{ private String dimName; private String dimValue; - private double cost; - private double contributionFactor; - private double currentValue; private double baselineValue; + private double currentValue; + private double changeRatio; + private double changeDiff; private double baselineSize; private double currentSize; + private double sizeFactor; + private double cost; public DimNameValueCostEntry(String dimensionName, String dimensionValue, double baselineValue, double currentValue, - double baselineSize, double currentSize, double contributionFactor, double cost) { + double changeRatio, double changeDiff, double baselineSize, double currentSize, double sizeFactor, double cost) { Preconditions.checkNotNull(dimensionName, "dimension name cannot be null."); Preconditions.checkNotNull(dimensionValue, "dimension value cannot be null."); @@ -41,18 +43,20 @@ public class DimNameValueCostEntry implements Comparable<DimNameValueCostEntry>{ this.dimValue = dimensionValue; this.baselineValue = baselineValue; this.currentValue = currentValue; + this.changeRatio = changeRatio; + this.changeDiff = changeDiff; this.baselineSize = baselineSize; this.currentSize = currentSize; - this.contributionFactor = contributionFactor; + this.sizeFactor = sizeFactor; this.cost = cost; } - public double getContributionFactor() { - return contributionFactor; + public double getSizeFactor() { + return sizeFactor; } - public void setContributionFactor(double contributionFactor) { - this.contributionFactor = contributionFactor; + public void setSizeFactor(double sizeFactor) { + this.sizeFactor = sizeFactor; } public String getDimName() { @@ -111,6 +115,22 @@ public class DimNameValueCostEntry implements Comparable<DimNameValueCostEntry>{ this.currentSize = currentSize; } + public double getChangeRatio() { + return changeRatio; + } + + public void setChangeRatio(double changeRatio) { + this.changeRatio = changeRatio; + } + + public double getChangeDiff() { + return changeDiff; + } + + public void setChangeDiff(double changeDiff) { + this.changeDiff = changeDiff; + } + @Override public int compareTo(DimNameValueCostEntry that) { return Double.compare(this.cost, that.cost); @@ -120,14 +140,14 @@ public class DimNameValueCostEntry implements Comparable<DimNameValueCostEntry>{ public String toString() { return MoreObjects.toStringHelper("Entry") .add("dim", String.format("%s:%s", dimName, dimValue)) - .add("baselineVal", baselineValue) - .add("currentVal", currentValue) - .add("delta", currentValue - baselineValue) - .add("changeRatio", String.format("%.2f", currentValue / baselineValue)) - .add("baselineSize", baselineSize) - .add("currentSize", currentSize) - .add("sizeFactor", String.format("%.2f", contributionFactor)) - .add("cost", String.format("%.4f", cost)) + .add("baseVal", baselineValue) + .add("curVal", currentValue) + .add("ratio", String.format("%.4f", changeRatio)) + .add("delta", changeDiff) + .add("baseSize", baselineSize) + .add("curSize", currentSize) + .add("sizeFactor", String.format("%.4f", sizeFactor)) + .add("cost", String.format("%.6f", cost)) .toString(); } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbclient/BaseCubePinotClient.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbclient/BaseCubePinotClient.java index ae5327d..ea7dbc9 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbclient/BaseCubePinotClient.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbclient/BaseCubePinotClient.java @@ -45,6 +45,18 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * This class generates query requests to the backend database and retrieve the data for summary algorithm. + * + * The generated requests are organized the following tree structure: + * Root level by GroupBy dimensions. + * Mid level by "baseline" or "current"; The "baseline" request is ordered before the "current" request. + * Leaf level by metric functions; This level is handled by the request itself, i.e., a request can gather multiple + * metric functions at the same time. + * The generated requests are store in a List. Because of the tree structure, the requests belong to the same + * timeline (baseline or current) are located together. Then, the requests belong to the same GroupBy dimension are + * located together. + */ public abstract class BaseCubePinotClient<R extends Row> implements CubePinotClient<R> { protected static final Logger LOG = LoggerFactory.getLogger(BaseCubePinotClient.class); diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbrow/BaseRow.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbrow/BaseRow.java index bf02f69..889968c 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbrow/BaseRow.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/dbrow/BaseRow.java @@ -19,6 +19,7 @@ package org.apache.pinot.thirdeye.cube.data.dbrow; +import com.google.common.base.Objects; import com.google.common.base.Preconditions; @@ -26,6 +27,13 @@ public abstract class BaseRow implements Row { protected Dimensions dimensions; protected DimensionValues dimensionValues; + public BaseRow() { } + + public BaseRow(Dimensions dimensions, DimensionValues dimensionValues) { + this.dimensions = Preconditions.checkNotNull(dimensions); + this.dimensionValues = Preconditions.checkNotNull(dimensionValues); + } + @Override public Dimensions getDimensions() { return dimensions; @@ -45,4 +53,21 @@ public abstract class BaseRow implements Row { public void setDimensionValues(DimensionValues dimensionValues) { this.dimensionValues = Preconditions.checkNotNull(dimensionValues); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof BaseRow)) { + return false; + } + BaseRow baseRow = (BaseRow) o; + return Objects.equal(dimensions, baseRow.dimensions) && Objects.equal(dimensionValues, baseRow.dimensionValues); + } + + @Override + public int hashCode() { + return Objects.hashCode(dimensions, dimensionValues); + } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/BaseCubeNode.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/BaseCubeNode.java index 9a3558e..679517a 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/BaseCubeNode.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/BaseCubeNode.java @@ -20,15 +20,23 @@ package org.apache.pinot.thirdeye.cube.data.node; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.base.Objects; import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import org.apache.pinot.thirdeye.cube.data.cube.CubeUtils; import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; import org.apache.pinot.thirdeye.cube.data.dbrow.Row; +/** + * Provides basic implementation for hierarchical cube nodes. + * + * @param <N> the class of the inherited cube node. + * @param <R> the Row class of the inherited cube node. + */ public abstract class BaseCubeNode<N extends BaseCubeNode, R extends Row> implements CubeNode<N> { protected int level; protected int index; @@ -68,6 +76,13 @@ public abstract class BaseCubeNode<N extends BaseCubeNode, R extends Row> implem "Current node is not a child node of the given parent node. Current and parent dimensions: ", data.getDimensions(), parent.getDimensions()); parent.children.add(this); + // Sort node from large to small to increase stability of this algorithm. + // The reason is that the parent values will dynamically be updated whenever a child is extracted. In addition, + // large children are unlikely to be interfered by small children. Therefore, evaluating large children before + // small children can increase the stability of this algorithm. + parent.children.sort( (Object o1, Object o2) -> + (int) ((((CubeNode)o2).getBaselineSize() + ((CubeNode)o2).getCurrentSize()) - (((CubeNode)o1).getBaselineSize() + ((CubeNode)o1).getCurrentSize())) + ); } } @@ -113,18 +128,24 @@ public abstract class BaseCubeNode<N extends BaseCubeNode, R extends Row> implem return Collections.unmodifiableList(children); } + /** + * Returns the change ratio of the node if it is a finite number; otherwise, returns an alternative ratio as follows: + * 1. If originalChangeRatio is a finite number, return it; + * 2. otherwise, get the ratio from its parent. + * 3. If none is available, return 1.0. + */ @Override - public double targetChangeRatio() { + public double bootStrapChangeRatio() { double ratio = changeRatio(); - if (!Double.isInfinite(ratio) && Double.compare(ratio, 0d) != 0) { + if (Double.isFinite(ratio) && Double.compare(ratio, 0d) != 0) { return ratio; } else { ratio = originalChangeRatio(); - if (!Double.isInfinite(ratio) && Double.compare(ratio, 0d) != 0) { - return ratio; + if (Double.isFinite(ratio) && Double.compare(ratio, 0d) != 0) { + return CubeUtils.ensureChangeRatioDirection(getBaselineValue(), getCurrentValue(), ratio); } else { if (parent != null) { - return parent.targetChangeRatio(); + return CubeUtils.ensureChangeRatioDirection(getBaselineValue(), getCurrentValue(), parent.bootStrapChangeRatio()); } else { return 1.; } @@ -141,4 +162,22 @@ public abstract class BaseCubeNode<N extends BaseCubeNode, R extends Row> implem return Double.compare(1., originalChangeRatio()) <= 0; } } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof BaseCubeNode)) { + return false; + } + BaseCubeNode<?, ?> that = (BaseCubeNode<?, ?>) o; + return level == that.level && index == that.index && Double.compare(that.cost, cost) == 0 && Objects.equal(data, + that.data); + } + + @Override + public int hashCode() { + return Objects.hashCode(level, index, cost, data); + } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNode.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNode.java index ff37eb7..cca4000 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNode.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNode.java @@ -25,6 +25,11 @@ import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +/** + * Defines the operations that are used by the cube algorithm. + * + * @param <N> the class of the actual cube node. For example, ratio cube algorithm will use RatioCubeNode. + */ public interface CubeNode<N extends CubeNode> { /** @@ -176,11 +181,10 @@ public interface CubeNode<N extends CubeNode> { double changeRatio(); /** - * Return the changeRatio of the node. If the changeRatio is not a finite number, then it returns the originalChangeRatio. - * If the originalChangeRatio is not a finite number, then it bootstraps to the parents until it finds a finite - * changeRatio. If no finite changeRatio available, then it returns 1. + * Returns the change ratio of the node if it is a finite number; otherwise, provide an alternative change ratio. + * @see BaseCubeNode for the basic implementation. */ - double targetChangeRatio(); + double bootStrapChangeRatio(); /** * Returns the current changeRatio of this node is increased or decreased, i.e., returns true if changeRatio of the node >= 1.0. diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeUtils.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeUtils.java index 3c37797..16a798f 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeUtils.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeUtils.java @@ -38,8 +38,9 @@ public class CubeNodeUtils { } public static boolean equalHierarchy(CubeNode node1, CubeNode node1Parent, CubeNode node2, CubeNode node2Parent) { - boolean sameData = ObjectUtils.equals(node1, node2); - if (sameData) { + if (!ObjectUtils.equals(node1, node2)) { // Return false if data of the nodes are different. + return false; + } else { // Check hierarchy if the two given nodes have the same data value. // Check parent reference if (node1Parent != null && node1.getParent() != node1Parent) { return false; @@ -64,8 +65,6 @@ public class CubeNodeUtils { } } return true; - } else { - return false; } } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalRatioSummary.java similarity index 56% copy from thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java copy to thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalRatioSummary.java index 3591c83..a0d0066 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalRatioSummary.java @@ -1,32 +1,13 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.pinot.thirdeye.cube.additive; +package org.apache.pinot.thirdeye.cube.entry; import com.google.common.base.Preconditions; -import com.google.common.base.Strings; import com.google.common.collect.Multimap; +import java.util.ArrayList; import java.util.List; -import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; import org.apache.pinot.thirdeye.cube.cost.CostFunction; import org.apache.pinot.thirdeye.cube.data.cube.Cube; -import org.apache.pinot.thirdeye.cube.data.dbclient.CubePinotClient; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.ratio.RatioDBClient; import org.apache.pinot.thirdeye.cube.summary.Summary; import org.apache.pinot.thirdeye.cube.summary.SummaryResponse; import org.joda.time.DateTime; @@ -34,19 +15,20 @@ import org.joda.time.DateTimeZone; /** - * A portal class that is used to trigger the multi-dimensional summary algorithm and to get the summary response. + * A portal class that is used to trigger the multi-dimensional summary algorithm and to get the summary response on a + * ratio metric. */ -public class MultiDimensionalSummary { - private CubePinotClient dbClient; +public class MultiDimensionalRatioSummary { + private RatioDBClient dbClient; private CostFunction costFunction; private DateTimeZone dateTimeZone; - public MultiDimensionalSummary(CubePinotClient olapClient, CostFunction costFunction, - DateTimeZone dateTimeZone) { - Preconditions.checkNotNull(olapClient); + public MultiDimensionalRatioSummary(RatioDBClient dbClient, CostFunction costFunction, DateTimeZone dateTimeZone) { + Preconditions.checkNotNull(dbClient); Preconditions.checkNotNull(dateTimeZone); Preconditions.checkNotNull(costFunction); - this.dbClient = olapClient; + + this.dbClient = dbClient; this.costFunction = costFunction; this.dateTimeZone = dateTimeZone; } @@ -55,7 +37,8 @@ public class MultiDimensionalSummary { * Builds the summary given the given metric information. * * @param dataset the dataset of the metric. - * @param metric the name of the metric. + * @param numeratorMetric the name of the numerator metric. + * @param denominatorMetric the name of the denominator metric. * @param currentStartInclusive the start time of current data cube, inclusive. * @param currentEndExclusive the end time of the current data cube, exclusive. * @param baselineStartInclusive the start of the baseline data cube, inclusive. @@ -71,25 +54,22 @@ public class MultiDimensionalSummary { * of dimensions. * @param doOneSideError if the summary should only consider one side error. * - * @return the multi-dimensional summary. + * @return the multi-dimensional summary of a ratio metric. */ - public SummaryResponse buildSummary(String dataset, String metric, long currentStartInclusive, - long currentEndExclusive, long baselineStartInclusive, long baselineEndExclusive, Dimensions dimensions, - Multimap<String, String> dataFilters, int summarySize, int depth, List<List<String>> hierarchies, - boolean doOneSideError) throws Exception { - Preconditions.checkArgument(!Strings.isNullOrEmpty(dataset)); - Preconditions.checkArgument(!Strings.isNullOrEmpty(metric)); - Preconditions.checkArgument(currentStartInclusive < currentEndExclusive); - Preconditions.checkArgument(baselineStartInclusive < baselineEndExclusive); - Preconditions.checkNotNull(dimensions); - Preconditions.checkArgument(dimensions.size() > 0); - Preconditions.checkNotNull(dataFilters); - Preconditions.checkArgument(summarySize > 1); - Preconditions.checkNotNull(hierarchies); - Preconditions.checkArgument(depth >= 0); + public SummaryResponse buildRatioSummary(String dataset, String numeratorMetric, String denominatorMetric, + long currentStartInclusive, long currentEndExclusive, long baselineStartInclusive, long baselineEndExclusive, + Dimensions dimensions, Multimap<String, String> dataFilters, int summarySize, int depth, + List<List<String>> hierarchies, boolean doOneSideError) throws Exception { + // Check arguments + List<String> metrics = new ArrayList<>(); + metrics.add(numeratorMetric); + metrics.add(denominatorMetric); + SummaryUtils.checkArguments(dataset, metrics, currentStartInclusive, currentEndExclusive, baselineStartInclusive, + baselineEndExclusive, dimensions, dataFilters, summarySize, depth, hierarchies); dbClient.setDataset(dataset); - ((AdditiveDBClient) dbClient).setMetric(metric); + dbClient.setNumeratorMetric(numeratorMetric); + dbClient.setDenominatorMetric(denominatorMetric); dbClient.setCurrentStartInclusive(new DateTime(currentStartInclusive, dateTimeZone)); dbClient.setCurrentEndExclusive(new DateTime(currentEndExclusive, dateTimeZone)); dbClient.setBaselineStartInclusive(new DateTime(baselineStartInclusive, dateTimeZone)); @@ -107,7 +87,7 @@ public class MultiDimensionalSummary { response = summary.computeSummary(summarySize, doOneSideError); } response.setDataset(dataset); - response.setMetricName(metric); + response.setMetricName(numeratorMetric + "/" + denominatorMetric); return response; } diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummary.java similarity index 80% rename from thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java rename to thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummary.java index 3591c83..6042d13 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummary.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummary.java @@ -17,16 +17,16 @@ * under the License. */ -package org.apache.pinot.thirdeye.cube.additive; +package org.apache.pinot.thirdeye.cube.entry; import com.google.common.base.Preconditions; -import com.google.common.base.Strings; import com.google.common.collect.Multimap; +import java.util.ArrayList; import java.util.List; -import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.additive.AdditiveDBClient; import org.apache.pinot.thirdeye.cube.cost.CostFunction; import org.apache.pinot.thirdeye.cube.data.cube.Cube; -import org.apache.pinot.thirdeye.cube.data.dbclient.CubePinotClient; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; import org.apache.pinot.thirdeye.cube.summary.Summary; import org.apache.pinot.thirdeye.cube.summary.SummaryResponse; import org.joda.time.DateTime; @@ -34,19 +34,20 @@ import org.joda.time.DateTimeZone; /** - * A portal class that is used to trigger the multi-dimensional summary algorithm and to get the summary response. + * A portal class that is used to trigger the multi-dimensional summary algorithm and to get the summary response on + * an additive metric. */ public class MultiDimensionalSummary { - private CubePinotClient dbClient; + private AdditiveDBClient dbClient; private CostFunction costFunction; private DateTimeZone dateTimeZone; - public MultiDimensionalSummary(CubePinotClient olapClient, CostFunction costFunction, + public MultiDimensionalSummary(AdditiveDBClient dbClient, CostFunction costFunction, DateTimeZone dateTimeZone) { - Preconditions.checkNotNull(olapClient); + Preconditions.checkNotNull(dbClient); Preconditions.checkNotNull(dateTimeZone); Preconditions.checkNotNull(costFunction); - this.dbClient = olapClient; + this.dbClient = dbClient; this.costFunction = costFunction; this.dateTimeZone = dateTimeZone; } @@ -71,25 +72,20 @@ public class MultiDimensionalSummary { * of dimensions. * @param doOneSideError if the summary should only consider one side error. * - * @return the multi-dimensional summary. + * @return the multi-dimensional summary of an additive metric. */ public SummaryResponse buildSummary(String dataset, String metric, long currentStartInclusive, long currentEndExclusive, long baselineStartInclusive, long baselineEndExclusive, Dimensions dimensions, Multimap<String, String> dataFilters, int summarySize, int depth, List<List<String>> hierarchies, boolean doOneSideError) throws Exception { - Preconditions.checkArgument(!Strings.isNullOrEmpty(dataset)); - Preconditions.checkArgument(!Strings.isNullOrEmpty(metric)); - Preconditions.checkArgument(currentStartInclusive < currentEndExclusive); - Preconditions.checkArgument(baselineStartInclusive < baselineEndExclusive); - Preconditions.checkNotNull(dimensions); - Preconditions.checkArgument(dimensions.size() > 0); - Preconditions.checkNotNull(dataFilters); - Preconditions.checkArgument(summarySize > 1); - Preconditions.checkNotNull(hierarchies); - Preconditions.checkArgument(depth >= 0); + // Check arguments + List<String> metrics = new ArrayList<>(); + metrics.add(metric); + SummaryUtils.checkArguments(dataset, metrics, currentStartInclusive, currentEndExclusive, baselineStartInclusive, + baselineEndExclusive, dimensions, dataFilters, summarySize, depth, hierarchies); dbClient.setDataset(dataset); - ((AdditiveDBClient) dbClient).setMetric(metric); + dbClient.setMetric(metric); dbClient.setCurrentStartInclusive(new DateTime(currentStartInclusive, dateTimeZone)); dbClient.setCurrentEndExclusive(new DateTime(currentEndExclusive, dateTimeZone)); dbClient.setBaselineStartInclusive(new DateTime(baselineStartInclusive, dateTimeZone)); diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLITool.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummaryCLITool.java similarity index 97% rename from thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLITool.java rename to thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummaryCLITool.java index 963e265..5366910 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLITool.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/MultiDimensionalSummaryCLITool.java @@ -17,13 +17,15 @@ * under the License. */ -package org.apache.pinot.thirdeye.cube.additive; +package org.apache.pinot.thirdeye.cube.entry; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Multimap; +import org.apache.pinot.thirdeye.cube.additive.AdditiveDBClient; +import org.apache.pinot.thirdeye.cube.additive.AdditiveRow; import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; import org.apache.pinot.thirdeye.cube.cost.BalancedCostFunction; import org.apache.pinot.thirdeye.cube.cost.CostFunction; @@ -225,7 +227,7 @@ public class MultiDimensionalSummaryCLITool { // Initialize ThirdEye's environment ThirdEyeUtils.initLightWeightThirdEyeEnvironment(argList.get(0)); - CubePinotClient<AdditiveRow> cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); + AdditiveDBClient cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); // Convert JSON string to Objects Dimensions dimensions; diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/SummaryUtils.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/SummaryUtils.java new file mode 100644 index 0000000..a373f16 --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/entry/SummaryUtils.java @@ -0,0 +1,43 @@ +package org.apache.pinot.thirdeye.cube.entry; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Multimap; +import java.util.List; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; + + +public class SummaryUtils { + /** + * Checks the arguments of cube algorithm. + * + * @param dataset the name of dataset; cannot be null or empty. + * @param metrics the list of metrics; it needs to contain at least one metric and they cannot be null or empty. + * @param currentStartInclusive the current start time; needs to be smaller than current end time. + * @param currentEndExclusive the current end time. + * @param baselineStartInclusive the baseline start time; needs to be smaller than baseline end time. + * @param baselineEndExclusive the baseline end time. + * @param dimensions the dimensions to be explored; it needs to contains at least one dimensions. + * @param dataFilters the filter to be applied on the Pinot query; cannot be null. + * @param summarySize the summary size; needs to > 0. + * @param depth the max depth of dimensions to be drilled down; needs to be >= 0. + * @param hierarchies the hierarchy among dimensions; cannot be null. + */ + public static void checkArguments(String dataset, List<String> metrics, long currentStartInclusive, + long currentEndExclusive, long baselineStartInclusive, long baselineEndExclusive, Dimensions dimensions, + Multimap<String, String> dataFilters, int summarySize, int depth, List<List<String>> hierarchies) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(dataset)); + Preconditions.checkArgument(!metrics.isEmpty()); + for (String metric : metrics) { + Preconditions.checkArgument(!Strings.isNullOrEmpty(metric)); + } + Preconditions.checkArgument(currentStartInclusive < currentEndExclusive); + Preconditions.checkArgument(baselineStartInclusive < baselineEndExclusive); + Preconditions.checkNotNull(dimensions); + Preconditions.checkArgument(dimensions.size() > 0); + Preconditions.checkNotNull(dataFilters); + Preconditions.checkArgument(summarySize > 1); + Preconditions.checkNotNull(hierarchies); + Preconditions.checkArgument(depth >= 0); + } +} diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioCubeNode.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioCubeNode.java new file mode 100644 index 0000000..04adcd8 --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioCubeNode.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.thirdeye.cube.ratio; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.math.DoubleMath; +import org.apache.pinot.thirdeye.cube.data.cube.CubeUtils; +import org.apache.pinot.thirdeye.cube.data.node.BaseCubeNode; + + +/** + * A CubeNode for ratio metrics such as "observed over expected ratio". + */ +public class RatioCubeNode extends BaseCubeNode<RatioCubeNode, RatioRow> { + private static double epsilon = 0.0001; + + private double baselineNumeratorValue; + private double currentNumeratorValue; + private double baselineDenominatorValue; + private double currentDenominatorValue; + + /** + * Constructs a root CubeNode whose level and index is 0 and parent pointer is null. + * + * @param data the data of this root node. + */ + public RatioCubeNode(RatioRow data) { + super(data); + resetValues(); + } + + /** + * Constructs a CubeNode which is specified information. + * + * @param level the level of this node. + * @param index the index of this node that is located in its parent's children list. + * @param data the data of this node. + * @param parent the parent of this node. + */ + public RatioCubeNode(int level, int index, RatioRow data, RatioCubeNode parent) { + super(level, index, data, parent); + resetValues(); + } + + @Override + public void resetValues() { + this.baselineNumeratorValue = data.getBaselineNumeratorValue(); + this.currentNumeratorValue = data.getCurrentNumeratorValue(); + this.baselineDenominatorValue = data.getBaselineDenominatorValue(); + this.currentDenominatorValue = data.getCurrentDenominatorValue(); + } + + @Override + public void removeNodeValues(RatioCubeNode node) { + baselineNumeratorValue = CubeUtils.doubleMinus(baselineNumeratorValue, node.baselineNumeratorValue); + currentNumeratorValue = CubeUtils.doubleMinus(currentNumeratorValue, node.currentNumeratorValue); + baselineDenominatorValue = CubeUtils.doubleMinus(baselineDenominatorValue, node.baselineDenominatorValue); + currentDenominatorValue = CubeUtils.doubleMinus(currentDenominatorValue, node.currentDenominatorValue); + Preconditions.checkArgument(!(DoubleMath.fuzzyCompare(baselineNumeratorValue, 0, epsilon) < 0 + || DoubleMath.fuzzyCompare(currentNumeratorValue, 0, epsilon) < 0 + || DoubleMath.fuzzyCompare(baselineDenominatorValue, 0, epsilon) < 0 + || DoubleMath.fuzzyCompare(currentDenominatorValue, 0, epsilon) < 0)); + } + + @Override + public void addNodeValues(RatioCubeNode node) { + this.baselineNumeratorValue += node.baselineNumeratorValue; + this.currentNumeratorValue += node.currentNumeratorValue; + this.baselineDenominatorValue += node.baselineDenominatorValue; + this.currentDenominatorValue += node.currentDenominatorValue; + } + + @Override + public double getBaselineSize() { + return baselineNumeratorValue + baselineDenominatorValue; + } + + @Override + public double getCurrentSize() { + return currentNumeratorValue + currentDenominatorValue; + } + + @Override + public double getOriginalBaselineSize() { + return data.getBaselineNumeratorValue() + data.getBaselineDenominatorValue(); + } + + @Override + public double getOriginalCurrentSize() { + return data.getCurrentNumeratorValue() + data.getCurrentDenominatorValue(); + } + + /** + * Calculates the value of the given numerator and denominator. + * If denominator is non-zero, then return (numerator / denominator); + * If both numerator and denominator are zero, then return 0. + * If only denominator is zero, then return (numerator / node size). + * + * @param numerator the numerator. + * @param denominator the denominator. + * + * @return the value of the given numerator and denominator. + */ + private double calculateValue(double numerator, double denominator) { + if (!DoubleMath.fuzzyEquals(denominator, 0, epsilon)) { + return numerator / denominator; + } else if (DoubleMath.fuzzyEquals(numerator, 0, epsilon)) { + return 0d; // Let the algorithm to handle this case as a missing value. + } else { + // Divide the numerator value by node size to prevent large change diff. + return numerator / (getCurrentSize() + getBaselineSize()); + } + } + + @Override + public double getBaselineValue() { + return calculateValue(baselineNumeratorValue, baselineDenominatorValue); + } + + @Override + public double getCurrentValue() { + return calculateValue(currentNumeratorValue, currentDenominatorValue); + } + + @Override + public double getOriginalBaselineValue() { + return data.getBaselineNumeratorValue() / data.getBaselineDenominatorValue(); + } + + @Override + public double getOriginalCurrentValue() { + return data.getCurrentNumeratorValue() / data.getCurrentDenominatorValue(); + } + + @Override + public double originalChangeRatio() { + return (data.getCurrentNumeratorValue() / data.getCurrentDenominatorValue()) / (data.getBaselineNumeratorValue() / data.getBaselineDenominatorValue()); + } + + @Override + public double changeRatio() { + return (currentNumeratorValue / currentDenominatorValue) / (baselineNumeratorValue / baselineDenominatorValue); + } + + @Override + public boolean side() { + double currentValue = getCurrentValue(); + double baselineValue = getBaselineValue(); + if (!DoubleMath.fuzzyEquals(currentValue, 0, epsilon) && !DoubleMath.fuzzyEquals(baselineValue, 0, epsilon)) { + // The most common case is located first in order to reduce performance impact + return DoubleMath.fuzzyCompare(currentValue, baselineValue, epsilon) >= 0; + } else { + if (parent != null) { + if (DoubleMath.fuzzyEquals(currentValue, 0, epsilon) && DoubleMath.fuzzyEquals(baselineValue, 0, epsilon)) { + return parent.side(); + } else if (DoubleMath.fuzzyEquals(currentValue, 0, epsilon)) { + return DoubleMath.fuzzyCompare(baselineValue, parent.getBaselineValue(), epsilon) < 0; + } else { //if (DoubleMath.fuzzyEquals(baselineValue, 0, epsilon)) { + return DoubleMath.fuzzyCompare(currentValue, parent.getCurrentValue(), epsilon) >= 0; + } + } else { + return DoubleMath.fuzzyCompare(currentValue, baselineValue, epsilon) >= 0; + } + } + } + + /** + * Returns the baseline numerator value. + * + * @return the baseline numerator value. + */ + public double getBaselineNumeratorValue() { + return baselineNumeratorValue; + } + + /** + * Returns the baseline denominator value. + * + * @return the baseline denominator value. + */ + public double getBaselineDenominatorValue() { + return baselineDenominatorValue; + } + + /** + * Returns the current numerator value. + * + * @return the current numerator value. + */ + public double getCurrentNumeratorValue() { + return currentNumeratorValue; + } + + /** + * Returns the current denominator value. + * + * @return the current denominator value. + */ + public double getCurrentDenominatorValue() { + return currentDenominatorValue; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof RatioCubeNode)) { + return false; + } + if (!super.equals(o)) { + return false; + } + RatioCubeNode that = (RatioCubeNode) o; + return Double.compare(that.baselineNumeratorValue, baselineNumeratorValue) == 0 + && Double.compare(that.currentNumeratorValue, currentNumeratorValue) == 0 + && Double.compare(that.baselineDenominatorValue, baselineDenominatorValue) == 0 + && Double.compare(that.currentDenominatorValue, currentDenominatorValue) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(super.hashCode(), baselineNumeratorValue, currentNumeratorValue, baselineDenominatorValue, + currentDenominatorValue); + } + + /** + * ToString that handles if the given cube node is null, i.e., a root cube node. Moreover, it does not invoke + * parent's toString() to prevent multiple calls of toString to their parents. + * + * @param node the node to be converted to string. + * + * @return a simple string representation of a parent cube node, which does not toString its parent node recursively. + */ + private String toStringAsParent(RatioCubeNode node) { + if (node == null) { + return "null"; + } else { + return MoreObjects.toStringHelper(this) + .add("level", level) + .add("index", index) + .add("baselineNumeratorValue", baselineNumeratorValue) + .add("baselineDenominatorValue", baselineDenominatorValue) + .add("currentNumeratorValue", currentNumeratorValue) + .add("currentDenominatorValue", currentDenominatorValue) + .add("cost", cost) + .add("data", data) + .toString(); + } + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("level", level) + .add("index", index) + .add("baselineNumeratorValue", baselineNumeratorValue) + .add("baselineDenominatorValue", baselineDenominatorValue) + .add("currentNumeratorValue", currentNumeratorValue) + .add("currentDenominatorValue", currentDenominatorValue) + .add("cost", cost) + .add("data", data) + .add("parent", toStringAsParent(parent)) + .toString(); + } +} diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioDBClient.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioDBClient.java new file mode 100644 index 0000000..dc5bc56 --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioDBClient.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.thirdeye.cube.ratio; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.data.dbclient.CubeTag; +import org.apache.pinot.thirdeye.cube.data.dbclient.BaseCubePinotClient; +import org.apache.pinot.thirdeye.cube.data.dbclient.CubeSpec; +import org.apache.pinot.thirdeye.datasource.cache.QueryCache; + + +/** + * This class generates query requests to the backend database and retrieve the metrics that compose the ratio metric + * for summary algorithm. + * + * @see org.apache.pinot.thirdeye.cube.data.dbclient.BaseCubePinotClient + */ +public class RatioDBClient extends BaseCubePinotClient<RatioRow> { + private String numeratorMetric = ""; + private String denominatorMetric = ""; + + /** + * Constructs a DB client to the ratio metric. + * + * @param queryCache the query cache to Pinot DB. + */ + public RatioDBClient(QueryCache queryCache) { + super(queryCache); + } + + /** + * Sets the numerator metric of the ratio metric. + * + * @param numeratorMetric the numerator metric of the ratio metric. + */ + public void setNumeratorMetric(String numeratorMetric) { + this.numeratorMetric = numeratorMetric; + } + + /** + * Sets the denominator metric of the ratio metric. + * + * @param denominatorMetric the denominator metric of the ratio metric. + */ + public void setDenominatorMetric(String denominatorMetric) { + this.denominatorMetric = denominatorMetric; + } + + @Override + protected List<CubeSpec> getCubeSpecs() { + List<CubeSpec> cubeSpecs = new ArrayList<>(); + + cubeSpecs.add( + new CubeSpec(CubeTag.BaselineNumerator, numeratorMetric, baselineStartInclusive, baselineEndExclusive)); + cubeSpecs.add( + new CubeSpec(CubeTag.BaselineDenominator, denominatorMetric, baselineStartInclusive, baselineEndExclusive)); + cubeSpecs.add(new CubeSpec(CubeTag.CurrentNumerator, numeratorMetric, currentStartInclusive, currentEndExclusive)); + cubeSpecs.add( + new CubeSpec(CubeTag.CurrentDenominator, denominatorMetric, currentStartInclusive, currentEndExclusive)); + + return cubeSpecs; + } + + @Override + protected void fillValueToRowTable(Map<List<String>, RatioRow> rowTable, Dimensions dimensions, + List<String> dimensionValues, double value, CubeTag tag) { + + if (Double.compare(0d, value) < 0 && !Double.isInfinite(value)) { + RatioRow row = rowTable.get(dimensionValues); + if (row == null) { + row = new RatioRow(dimensions, new DimensionValues(dimensionValues)); + rowTable.put(dimensionValues, row); + } + switch (tag) { + case BaselineNumerator: + row.setBaselineNumeratorValue(value); + break; + case BaselineDenominator: + row.setBaselineDenominatorValue(value); + break; + case CurrentNumerator: + row.setCurrentNumeratorValue(value); + break; + case CurrentDenominator: + row.setCurrentDenominatorValue(value); + break; + default: + throw new IllegalArgumentException("Unsupported CubeTag: " + tag.name()); + } + } + } +} diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioRow.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioRow.java new file mode 100644 index 0000000..8e19aea --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/ratio/RatioRow.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.pinot.thirdeye.cube.ratio; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.data.node.CubeNode; +import org.apache.pinot.thirdeye.cube.data.dbrow.BaseRow; + + +/** + * Stores the ratio metric that is returned from DB. + */ +public class RatioRow extends BaseRow { + protected double baselineNumeratorValue; + protected double currentNumeratorValue; + protected double baselineDenominatorValue; + protected double currentDenominatorValue; + + /** + * Constructs an ratio row. + * + * @param dimensions the dimension names of this row. + * @param dimensionValues the dimension values of this row. + */ + public RatioRow(Dimensions dimensions, DimensionValues dimensionValues) { + super(dimensions, dimensionValues); + this.baselineNumeratorValue = 0.0; + this.currentNumeratorValue = 0.0; + this.baselineDenominatorValue = 0.0; + this.currentDenominatorValue = 0.0; + } + + /** + * Constructs an ratio row. + * + * @param dimensions the dimension names of this row. + * @param dimensionValues the dimension values of this row. + * @param baselineNumeratorValue the baseline numerator of this ratio row. + * @param baselineDenominatorValue the baseline denominator of this ratio row. + * @param currentNumeratorValue the current numerator of this ratio row. + * @param currentDenominatorValue the current denominator of this ratio row. + */ + public RatioRow(Dimensions dimensions, DimensionValues dimensionValues, double baselineNumeratorValue, + double baselineDenominatorValue, double currentNumeratorValue, double currentDenominatorValue) { + super(dimensions, dimensionValues); + this.baselineNumeratorValue = baselineNumeratorValue; + this.baselineDenominatorValue = baselineDenominatorValue; + this.currentNumeratorValue = currentNumeratorValue; + this.currentDenominatorValue = currentDenominatorValue; + } + + /** + * Returns the baseline numerator of this ratio row. + * + * @return the baseline numerator of this ratio row. + */ + public double getBaselineNumeratorValue() { + return baselineNumeratorValue; + } + + /** + * Sets the baseline numerator value of this ratio row. + * + * @param baselineNumeratorValue the baseline numerator value of this ratio row. + */ + public void setBaselineNumeratorValue(double baselineNumeratorValue) { + this.baselineNumeratorValue = baselineNumeratorValue; + } + + /** + * Returns the current numerator of this ratio row. + * + * @return the current numerator of this ratio row. + */ + public double getCurrentNumeratorValue() { + return currentNumeratorValue; + } + + /** + * Sets the baseline numerator value of this ratio row. + * + * @param currentNumeratorValue the baseline numerator value of this ratio row. + */ + public void setCurrentNumeratorValue(double currentNumeratorValue) { + this.currentNumeratorValue = currentNumeratorValue; + } + + /** + * Returns the baseline denominator of this ratio row. + * + * @return the baseline denominator of this ratio row. + */ + public double getBaselineDenominatorValue() { + return baselineDenominatorValue; + } + + /** + * Sets the baseline denominator value of this ratio row. + * + * @param denominatorBaselineValue the baseline denominator value of this ratio row. + */ + public void setBaselineDenominatorValue(double denominatorBaselineValue) { + this.baselineDenominatorValue = denominatorBaselineValue; + } + + /** + * Returns the current denominator of this ratio row. + * + * @return the current denominator of this ratio row. + */ + public double getCurrentDenominatorValue() { + return currentDenominatorValue; + } + + /** + * Sets the current denominator value of this ratio row. + * + * @param denominatorCurrentValue the current denominator value of this ratio row. + */ + public void setCurrentDenominatorValue(double denominatorCurrentValue) { + this.currentDenominatorValue = denominatorCurrentValue; + } + + @Override + public RatioCubeNode toNode() { + return new RatioCubeNode(this); + } + + @Override + public CubeNode toNode(int level, int index, CubeNode parent) { + return new RatioCubeNode(level, index, this, (RatioCubeNode) parent); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof RatioRow)) { + return false; + } + if (!super.equals(o)) { + return false; + } + RatioRow ratioRow = (RatioRow) o; + return Double.compare(ratioRow.baselineNumeratorValue, baselineNumeratorValue) == 0 + && Double.compare(ratioRow.currentNumeratorValue, currentNumeratorValue) == 0 + && Double.compare(ratioRow.baselineDenominatorValue, baselineDenominatorValue) == 0 + && Double.compare(ratioRow.currentDenominatorValue, currentDenominatorValue) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(super.hashCode(), baselineNumeratorValue, currentNumeratorValue, baselineDenominatorValue, + currentDenominatorValue); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("baselineNumerator", baselineNumeratorValue) + .add("baselineDenominator", baselineDenominatorValue) + .add("currentNumerator", currentNumeratorValue) + .add("currentDenominator", currentDenominatorValue) + .add("changeRatio", currentNumeratorValue / baselineNumeratorValue) + .add("dimensions", super.getDimensions()) + .add("dimensionValues", super.getDimensionValues()) + .toString(); + } +} diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/BaseResponseRow.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/BaseResponseRow.java index 0c7ef3b..a7e937c 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/BaseResponseRow.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/BaseResponseRow.java @@ -19,11 +19,14 @@ package org.apache.pinot.thirdeye.cube.summary; + +/** + * A POJO for front-end representation. + */ public class BaseResponseRow { public double baselineValue; public double currentValue; - public double baselineSize; - public double currentSize; + public double sizeFactor; public String percentageChange; public String contributionChange; public String contributionToOverallChange; diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/Summary.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/Summary.java index f523fca..83bda4b 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/Summary.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/Summary.java @@ -33,10 +33,12 @@ import org.apache.pinot.thirdeye.cube.data.cube.DimNameValueCostEntry; import org.apache.pinot.thirdeye.cube.cost.BalancedCostFunction; import org.apache.pinot.thirdeye.cube.cost.CostFunction; import org.apache.pinot.thirdeye.cube.data.node.CubeNode; -import org.jfree.util.Log; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class Summary { + private static final Logger LOG = LoggerFactory.getLogger(Summary.class); static final NodeDimensionValuesComparator NODE_COMPARATOR = new NodeDimensionValuesComparator(); private Cube cube; @@ -66,9 +68,6 @@ public class Summary { this.levelCount = this.maxLevelCount; this.costSet = cube.getCostSet(); this.sortedDimensionCosts = cube.getSortedDimensionCosts(); - this.basicRowInserter = new BasicRowInserter(new BalancedCostFunction()); - this.oneSideErrorRowInserter = basicRowInserter; - this.leafRowInserter = basicRowInserter; this.costFunction = costFunction; this.basicRowInserter = new BasicRowInserter(costFunction); @@ -102,14 +101,16 @@ public class Summary { CubeNode root = cube.getRoot(); if (doOneSideError) { oneSideErrorRowInserter = - new OneSideErrorRowInserter(basicRowInserter, Double.compare(1., root.targetChangeRatio()) <= 0); + new OneSideErrorRowInserter(basicRowInserter, Double.compare(1., root.bootStrapChangeRatio()) <= 0); // If this cube contains only one dimension, one side error is calculated starting at leaf (detailed) level; // otherwise, a row at different side is removed through internal nodes. if (this.levelCount == 1) leafRowInserter = oneSideErrorRowInserter; } computeChildDPArray(root); List<CubeNode> answer = new ArrayList<>(dpArrays.get(0).getAnswer()); - SummaryResponse response = new SummaryResponse(); + SummaryResponse response = + new SummaryResponse(cube.getBaselineTotal(), cube.getCurrentTotal(), cube.getBaselineTotalSize(), + cube.getCurrentTotalSize()); response.buildDiffSummary(answer, this.levelCount, costFunction); response.buildGainerLoserGroup(costSet); response.setDimensionCosts(sortedDimensionCosts); @@ -131,7 +132,7 @@ public class Summary { for (CubeNode node : nodeList) { if (Double.compare(node.getBaselineValue(), node.getOriginalBaselineValue()) != 0 || Double.compare(node.getCurrentValue(), node.getOriginalCurrentValue()) != 0) { - Log.warn("Wrong Wow values at node: " + node.getDimensionValues() + ". Expected: " + LOG.warn("Wrong Wow values at node: " + node.getDimensionValues() + ". Expected: " + node.getOriginalBaselineValue() + "," + node.getOriginalCurrentValue() + ", actual: " + node.getBaselineValue() + "," + node.getCurrentValue()); } @@ -154,7 +155,7 @@ public class Summary { CubeNode parent = node.getParent(); DPArray dpArray = dpArrays.get(node.getLevel()); dpArray.fullReset(); - dpArray.targetRatio = node.targetChangeRatio(); + dpArray.targetRatio = node.bootStrapChangeRatio(); // Compute DPArray if the current node is the lowest internal node. // Otherwise, merge DPArrays from its children. @@ -165,16 +166,16 @@ public class Summary { // dpArray.setShrinkSize(Math.max(2, (node.childrenSize()+1)/2)); // } for (CubeNode child : (List<CubeNode>) node.getChildren()) { - leafRowInserter.insertRowToDPArray(dpArray, child, node.targetChangeRatio()); + leafRowInserter.insertRowToDPArray(dpArray, child, node.bootStrapChangeRatio()); updateWowValues(node, dpArray.getAnswer()); - dpArray.targetRatio = node.targetChangeRatio(); // get updated changeRatio + dpArray.targetRatio = node.bootStrapChangeRatio(); // get updated changeRatio } } else { for (CubeNode child : (List<CubeNode>) node.getChildren()) { computeChildDPArray(child); mergeDPArray(node, dpArray, dpArrays.get(node.getLevel() + 1)); updateWowValues(node, dpArray.getAnswer()); - dpArray.targetRatio = node.targetChangeRatio(); // get updated changeRatio + dpArray.targetRatio = node.bootStrapChangeRatio(); // get updated changeRatio } // Use the following block to replace the above one to roll-up rows aggressively // List<CubeNode> removedNodes = new ArrayList<>(); @@ -185,7 +186,7 @@ public class Summary { // computeChildDPArray(child); // removedNodes.addAll(mergeDPArray(node, dpArray, dpArrays.get(node.getLevel() + 1))); // updateWowValues(node, dpArray.getAnswer()); -// dpArray.targetChangeRatio = node.targetChangeRatio(); // get updated changeRatio +// dpArray.bootStrapChangeRatio = node.bootStrapChangeRatio(); // get updated changeRatio // } // // Aggregate current node's answer if it is thinned out due to the user's answer size is too huge. // // If the current node is kept being thinned out, it eventually aggregates all its children. @@ -195,7 +196,7 @@ public class Summary { // removedNodes.clear(); // dpArray.setShrinkSize(Math.max(1, (dpArray.getAnswer().size()*2)/3)); // dpArray.reset(); -// dpArray.targetChangeRatio = node.targetChangeRatio(); +// dpArray.bootStrapChangeRatio = node.bootStrapChangeRatio(); // } // } while (doRollback); } @@ -205,7 +206,7 @@ public class Summary { // Moreover, if a node is thinned out by its children, it won't be inserted to the answer. if (node.getLevel() != 0) { updateWowValues(parent, dpArray.getAnswer()); - double targetRatio = parent.targetChangeRatio(); + double targetRatio = parent.bootStrapChangeRatio(); recomputeCostAndRemoveSmallNodes(node, dpArray, targetRatio); dpArray.targetRatio = targetRatio; if ( !nodeIsThinnedOut(node) ) { @@ -311,7 +312,7 @@ public class Summary { } /** - * Recompute costs of the nodes in a DPArray using targetChangeRatio for calculating the cost. + * Recompute costs of the nodes in a DPArray using bootStrapChangeRatio for calculating the cost. */ private void recomputeCostAndRemoveSmallNodes(CubeNode parentNode, DPArray dp, double targetRatio) { Set<CubeNode> removedNodes = new HashSet<>(dp.getAnswer()); @@ -333,12 +334,12 @@ public class Summary { /** * If the node's parent is also in the DPArray, then it's parent's current changeRatio is used as the target changeRatio for - * calculating the cost of the node; otherwise, targetChangeRatio is used. + * calculating the cost of the node; otherwise, bootStrapChangeRatio is used. */ private void insertRowWithAdaptiveRatioNoOneSideError(DPArray dp, CubeNode node, double targetRatio) { if (dp.getAnswer().contains(node.getParent())) { // For one side error if node's parent is included in the solution, then its cost will be calculated normally. - basicRowInserter.insertRowToDPArray(dp, node, node.getParent().targetChangeRatio()); + basicRowInserter.insertRowToDPArray(dp, node, node.getParent().bootStrapChangeRatio()); } else { basicRowInserter.insertRowToDPArray(dp, node, targetRatio); } @@ -346,12 +347,12 @@ public class Summary { /** * If the node's parent is also in the DPArray, then it's parent's current changeRatio is used as the target changeRatio for - * calculating the cost of the node; otherwise, targetChangeRatio is used. + * calculating the cost of the node; otherwise, bootStrapChangeRatio is used. */ private void insertRowWithAdaptiveRatio(DPArray dp, CubeNode node, double targetRatio) { if (dp.getAnswer().contains(node.getParent())) { // For one side error if node's parent is included in the solution, then its cost will be calculated normally. - basicRowInserter.insertRowToDPArray(dp, node, node.getParent().targetChangeRatio()); + basicRowInserter.insertRowToDPArray(dp, node, node.getParent().bootStrapChangeRatio()); } else { oneSideErrorRowInserter.insertRowToDPArray(dp, node, targetRatio); } @@ -408,10 +409,6 @@ public class Summary { public void insertRowToDPArray(DPArray dp, CubeNode node, double targetRatio) { // If the row has the same change trend with the top row, then it is inserted. if ( side == node.side() ) { - // When do oneSide, we try to make the root's changeRatio close to 1 in order to see the major root causes. - if ( (side && Double.compare(targetRatio, 1d) > 0) || (!side && Double.compare(targetRatio, 1d) < 0)) { - targetRatio = 1d; - } basicRowInserter.insertRowToDPArray(dp, node, targetRatio); } else { // Otherwise, it is inserted only there exists an intermediate parent besides root node CubeNode parent = findAncestor(node, null, dp.getAnswer()); diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/SummaryResponse.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/SummaryResponse.java index 70011d3..9177cd8 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/SummaryResponse.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/cube/summary/SummaryResponse.java @@ -37,7 +37,7 @@ import org.apache.pinot.thirdeye.cube.data.node.CubeNode; public class SummaryResponse { private final static int MAX_GAINER_LOSER_COUNT = 5; - private final static NumberFormat DOUBLE_FORMATTER = new DecimalFormat("#0.00"); + private final static NumberFormat DOUBLE_FORMATTER = new DecimalFormat("#0.0000"); static final String INFINITE = ""; static final String ALL = "(ALL)"; @@ -80,6 +80,14 @@ public class SummaryResponse { @JsonProperty("dimensionCosts") private List<Cube.DimensionCost> dimensionCosts = new ArrayList<>(); + public SummaryResponse(double baselineTotal, double currentTotal, double baselineTotalSize, double currentTotalSize) { + this.baselineTotal = baselineTotal; + this.currentTotal = currentTotal; + this.baselineTotalSize = baselineTotalSize; + this.currentTotalSize = currentTotalSize; + globalRatio = roundUp(currentTotal / baselineTotal); + } + public String getDataset() { return dataset; } @@ -117,7 +125,7 @@ public class SummaryResponse { } public static SummaryResponse buildNotAvailableResponse(String dataset, String metricName) { - SummaryResponse response = new SummaryResponse(); + SummaryResponse response = new SummaryResponse(0d, 0d, 0d, 0d); response.setDataset(dataset); response.setMetricName(metricName); response.dimensions.add(NOT_AVAILABLE); @@ -146,8 +154,7 @@ public class SummaryResponse { SummaryGainerLoserResponseRow row = new SummaryGainerLoserResponseRow(); row.baselineValue = costEntry.getBaselineValue(); row.currentValue = costEntry.getCurrentValue(); - row.baselineSize = costEntry.getBaselineSize(); - row.currentSize = costEntry.getCurrentSize(); + row.sizeFactor = costEntry.getSizeFactor(); row.dimensionName = costEntry.getDimName(); row.dimensionValue = costEntry.getDimValue(); row.percentageChange = computePercentageChange(row.baselineValue, row.currentValue); @@ -160,18 +167,6 @@ public class SummaryResponse { } public void buildDiffSummary(List<CubeNode> nodes, int targetLevelCount, CostFunction costFunction) { - // Compute the total baseline and current value - - for(CubeNode node : nodes) { - baselineTotal += node.getBaselineValue(); - baselineTotalSize += node.getBaselineValue(); - currentTotal += node.getCurrentValue(); - currentTotalSize += node.getCurrentValue(); - } - if (Double.compare(baselineTotal, 0d) != 0) { - globalRatio = roundUp(currentTotal / baselineTotal); - } - // If all nodes have a lower level count than targetLevelCount, then it is not necessary to print the summary with // height higher than the available level. int maxNodeLevelCount = 0; @@ -227,8 +222,8 @@ public class SummaryResponse { row.baselineValue = node.getBaselineValue(); row.currentValue = node.getCurrentValue(); row.percentageChange = computePercentageChange(row.baselineValue, row.currentValue); - row.baselineSize = node.getBaselineSize(); - row.currentSize = node.getCurrentSize(); + row.sizeFactor = + (node.getBaselineSize() + node.getCurrentSize()) / (baselineTotalSize + currentTotalSize); row.contributionChange = computeContributionChange(row.baselineValue, row.currentValue, baselineTotal, currentTotal); row.contributionToOverallChange = @@ -275,7 +270,7 @@ public class SummaryResponse { } private static double roundUp(double number) { - return Math.round(number * 100d) / 100d; + return Math.round(number * 10000d) / 10000d; } public String toString() { diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/SummaryResource.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/SummaryResource.java index 0f590cc..c11a305 100644 --- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/SummaryResource.java +++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/SummaryResource.java @@ -36,13 +36,14 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import org.apache.commons.lang.StringUtils; import org.apache.pinot.thirdeye.cube.additive.AdditiveDBClient; -import org.apache.pinot.thirdeye.cube.additive.AdditiveRow; -import org.apache.pinot.thirdeye.cube.additive.MultiDimensionalSummary; -import org.apache.pinot.thirdeye.cube.additive.MultiDimensionalSummaryCLITool; import org.apache.pinot.thirdeye.cube.cost.BalancedCostFunction; import org.apache.pinot.thirdeye.cube.cost.CostFunction; -import org.apache.pinot.thirdeye.cube.data.dbclient.CubePinotClient; +import org.apache.pinot.thirdeye.cube.cost.RatioCostFunction; import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.entry.MultiDimensionalRatioSummary; +import org.apache.pinot.thirdeye.cube.entry.MultiDimensionalSummary; +import org.apache.pinot.thirdeye.cube.entry.MultiDimensionalSummaryCLITool; +import org.apache.pinot.thirdeye.cube.ratio.RatioDBClient; import org.apache.pinot.thirdeye.cube.summary.SummaryResponse; import org.apache.pinot.thirdeye.dashboard.Utils; import org.apache.pinot.thirdeye.datasource.ThirdEyeCacheRegistry; @@ -114,7 +115,7 @@ public class SummaryResource { CostFunction costFunction = new BalancedCostFunction(); DateTimeZone dateTimeZone = DateTimeZone.forID(timeZone); - CubePinotClient<AdditiveRow> cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); + AdditiveDBClient cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); MultiDimensionalSummary mdSummary = new MultiDimensionalSummary(cubeDbClient, costFunction, dateTimeZone); response = mdSummary @@ -167,7 +168,7 @@ public class SummaryResource { CostFunction costFunction = new BalancedCostFunction(); DateTimeZone dateTimeZone = DateTimeZone.forID(timeZone); - CubePinotClient<AdditiveRow> cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); + AdditiveDBClient cubeDbClient = new AdditiveDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); MultiDimensionalSummary mdSummary = new MultiDimensionalSummary(cubeDbClient, costFunction, dateTimeZone); response = mdSummary @@ -180,4 +181,69 @@ public class SummaryResource { } return OBJECT_MAPPER.writeValueAsString(response); } + + @GET + @Path(value = "/summary/autoRatioDimensionOrder") + @Produces(MediaType.APPLICATION_JSON) + public String buildRatioSummary(@QueryParam("dataset") String dataset, + @QueryParam("numeratorMetric") String numeratorMetric, + @QueryParam("denominatorMetric") String denominatorMetric, + @QueryParam("currentStart") long currentStartInclusive, + @QueryParam("currentEnd") long currentEndExclusive, + @QueryParam("baselineStart") long baselineStartInclusive, + @QueryParam("baselineEnd") long baselineEndExclusive, + @QueryParam("dimensions") String groupByDimensions, + @QueryParam("filters") String filterJsonPayload, + @QueryParam("summarySize") int summarySize, + @QueryParam("depth") @DefaultValue(DEFAULT_DEPTH) int depth, + @QueryParam("hierarchies") @DefaultValue(DEFAULT_HIERARCHIES) String hierarchiesPayload, + @QueryParam("oneSideError") @DefaultValue(DEFAULT_ONE_SIDE_ERROR) boolean doOneSideError, + @QueryParam("excludedDimensions") @DefaultValue(DEFAULT_EXCLUDED_DIMENSIONS) String excludedDimensions, + @QueryParam("timeZone") @DefaultValue(DEFAULT_TIMEZONE_ID) String timeZone) throws Exception { + if (summarySize < 1) summarySize = 1; + + SummaryResponse response = null; + + try { + Dimensions dimensions; + if (StringUtils.isBlank(groupByDimensions) || JAVASCRIPT_NULL_STRING.equals(groupByDimensions)) { + dimensions = + MultiDimensionalSummaryCLITool.sanitizeDimensions(new Dimensions(Utils.getSchemaDimensionNames(dataset))); + } else { + dimensions = new Dimensions(Arrays.asList(groupByDimensions.trim().split(","))); + } + + if (!Strings.isNullOrEmpty(excludedDimensions)) { + List<String> dimensionsToBeRemoved = Arrays.asList(excludedDimensions.trim().split(",")); + dimensions = MultiDimensionalSummaryCLITool.removeDimensions(dimensions, dimensionsToBeRemoved); + } + + Multimap<String, String> filterSetMap; + if (StringUtils.isBlank(filterJsonPayload) || JAVASCRIPT_NULL_STRING.equals(filterJsonPayload)) { + filterSetMap = ArrayListMultimap.create(); + } else { + filterJsonPayload = URLDecoder.decode(filterJsonPayload, HTML_STRING_ENCODING); + filterSetMap = ThirdEyeUtils.convertToMultiMap(filterJsonPayload); + } + + List<List<String>> hierarchies = + OBJECT_MAPPER.readValue(hierarchiesPayload, new TypeReference<List<List<String>>>() { + }); + + CostFunction costFunction = new RatioCostFunction(); + DateTimeZone dateTimeZone = DateTimeZone.forID(timeZone); + RatioDBClient dbClient = new RatioDBClient(CACHE_REGISTRY_INSTANCE.getQueryCache()); + MultiDimensionalRatioSummary mdSummary = new MultiDimensionalRatioSummary(dbClient, costFunction, dateTimeZone); + + response = mdSummary + .buildRatioSummary(dataset, numeratorMetric, denominatorMetric, currentStartInclusive, currentEndExclusive, baselineStartInclusive, + baselineEndExclusive, dimensions, filterSetMap, summarySize, depth, hierarchies, doOneSideError); + + } catch (Exception e) { + LOG.error("Exception while generating difference summary", e); + response = SummaryResponse.buildNotAvailableResponse(dataset, numeratorMetric + "/" + denominatorMetric); + } + + return OBJECT_MAPPER.writeValueAsString(response); + } } diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLIToolTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLIToolTest.java index f6e5540..7098e2d 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLIToolTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/additive/MultiDimensionalSummaryCLIToolTest.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.Arrays; import java.util.List; +import org.apache.pinot.thirdeye.cube.entry.MultiDimensionalSummaryCLITool; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/CubeTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/CubeTest.java index c4ab8bb..2690aa8 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/CubeTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/CubeTest.java @@ -91,13 +91,13 @@ public class CubeTest { private List<DimNameValueCostEntry> getBasicCostSet() { List<DimNameValueCostEntry> costSet = new ArrayList<>(); - costSet.add(new DimNameValueCostEntry("country", "US", 0, 0, 0, 0, 0, 7)); - costSet.add(new DimNameValueCostEntry("country", "IN", 0, 0, 0, 0, 0, 3)); - costSet.add(new DimNameValueCostEntry("continent", "N. America", 0, 0, 0, 0, 0, 4)); - costSet.add(new DimNameValueCostEntry("continent", "S. America", 0, 0, 0, 0, 0, 1)); - costSet.add(new DimNameValueCostEntry("page", "front_page", 0, 0, 0, 0, 0, 4)); - costSet.add(new DimNameValueCostEntry("page", "page", 0, 0, 0, 0, 0, 3)); - costSet.add(new DimNameValueCostEntry("page", "page2", 0, 0, 0, 0, 0, 1)); + costSet.add(new DimNameValueCostEntry("country", "US", 0, 0, 0d, 0d, 0, 0, 0, 7)); + costSet.add(new DimNameValueCostEntry("country", "IN", 0, 0, 0d, 0d, 0, 0, 0, 3)); + costSet.add(new DimNameValueCostEntry("continent", "N. America", 0, 0, 0d, 0d, 0, 0, 0, 4)); + costSet.add(new DimNameValueCostEntry("continent", "S. America", 0, 0, 0d, 0d, 0, 0, 0, 1)); + costSet.add(new DimNameValueCostEntry("page", "front_page", 0, 0, 0d, 0d, 0, 0, 0, 4)); + costSet.add(new DimNameValueCostEntry("page", "page", 0, 0, 0d, 0d, 0, 0, 0, 3)); + costSet.add(new DimNameValueCostEntry("page", "page2", 0, 0, 0d, 0d, 0, 0, 0, 1)); return costSet; } diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntryTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntryTest.java index eac33ce..652e396 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntryTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/cube/DimNameValueCostEntryTest.java @@ -16,23 +16,22 @@ package org.apache.pinot.thirdeye.cube.data.cube; -import org.apache.pinot.thirdeye.cube.data.cube.DimNameValueCostEntry; import org.testng.annotations.Test; public class DimNameValueCostEntryTest { @Test public void testCreation() { - new DimNameValueCostEntry("", "", 0, 0, 0, 0, 0, 0); + new DimNameValueCostEntry("", "", 0, 0, 0d, 0d, 0, 0, 0, 0); } @Test(expectedExceptions = NullPointerException.class) public void testNullDimensionNameCreation() { - new DimNameValueCostEntry(null, "", 0, 0, 0, 0, 0, 0); + new DimNameValueCostEntry(null, "", 0, 0, 0d, 0d, 0, 0, 0, 0); } @Test(expectedExceptions = NullPointerException.class) public void testNullDimensionValueCreation() { - new DimNameValueCostEntry("", null, 0, 0, 0, 0, 0, 0); + new DimNameValueCostEntry("", null, 0, 0, 0d, 0d, 0, 0, 0, 0); } } diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionValuesTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionValuesTest.java index f3ace00..da30e0a 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionValuesTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionValuesTest.java @@ -19,7 +19,6 @@ package org.apache.pinot.thirdeye.cube.data.dbrow; import java.util.Arrays; import java.util.Collections; import java.util.List; -import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionsTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionsTest.java index 1b23e91..4037a4d 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionsTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/dbrow/DimensionsTest.java @@ -21,7 +21,6 @@ import java.util.Collections; import java.util.List; import java.util.Objects; import org.apache.commons.collections.ListUtils; -import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/AdditiveCubeNodeTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/AdditiveCubeNodeTest.java new file mode 100644 index 0000000..afb3169 --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/AdditiveCubeNodeTest.java @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-c...@linkedin.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pinot.thirdeye.cube.data.node; + +import java.util.Collections; +import org.apache.pinot.thirdeye.cube.additive.AdditiveCubeNode; +import org.apache.pinot.thirdeye.cube.additive.AdditiveRow; +import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class AdditiveCubeNodeTest { + // Since CubeNode has cyclic reference between current node and parent node, the toString() will encounter + // overflowStack exception if it doesn't take care of the cyclic reference carefully. + @Test + public void testToString() { + AdditiveRow root = new AdditiveRow(new Dimensions(), new DimensionValues()); + AdditiveCubeNode rootNode = new AdditiveCubeNode(root); + + AdditiveRow child = new AdditiveRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("US")), 20, 30); + AdditiveCubeNode childNode = new AdditiveCubeNode(1, 0, child, rootNode); + + childNode.toString(); + } + + @Test + public void testEqualsAndHashCode() { + AdditiveRow root1 = new AdditiveRow(new Dimensions(), new DimensionValues()); + AdditiveCubeNode rootNode1 = new AdditiveCubeNode(root1); + + AdditiveRow root2 = new AdditiveRow(new Dimensions(), new DimensionValues()); + AdditiveCubeNode rootNode2 = new AdditiveCubeNode(root2); + + Assert.assertEquals(rootNode1, rootNode2); + Assert.assertTrue(CubeNodeUtils.equalHierarchy(rootNode1, rootNode2)); + Assert.assertEquals(rootNode1.hashCode(), rootNode2.hashCode()); + + AdditiveRow root3 = new AdditiveRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("US"))); + CubeNode rootNode3 = new AdditiveCubeNode(root3); + Assert.assertNotEquals(rootNode1, rootNode3); + Assert.assertNotEquals(rootNode1.hashCode(), rootNode3.hashCode()); + } +} diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeTest.java index 1286ca0..1aa90a5 100644 --- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeTest.java +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/CubeNodeTest.java @@ -28,39 +28,10 @@ import org.testng.Assert; import org.testng.annotations.Test; +/** + * Tests the hierarchy among cube nodes. The main challenge is handling parent and children nodes. + */ public class CubeNodeTest { - - // Since CubeNode has cyclic reference between current node and parent node, the toString() will encounter - // overflowStack exception if it doesn't take care of the cyclic reference carefully. - @Test - public void testToString() { - AdditiveRow root = new AdditiveRow(new Dimensions(), new DimensionValues()); - AdditiveCubeNode rootNode = new AdditiveCubeNode(root); - - AdditiveRow child = new AdditiveRow(new Dimensions(Collections.singletonList("country")), - new DimensionValues(Collections.singletonList("US")), 20, 30); - AdditiveCubeNode childNode = new AdditiveCubeNode(1, 0, child, rootNode); - - childNode.toString(); - } - - @Test - public void testSimpleEquals() { - AdditiveRow root1 = new AdditiveRow(new Dimensions(), new DimensionValues()); - AdditiveCubeNode rootNode1 = new AdditiveCubeNode(root1); - - AdditiveRow root2 = new AdditiveRow(new Dimensions(), new DimensionValues()); - AdditiveCubeNode rootNode2 = new AdditiveCubeNode(root2); - - Assert.assertEquals(rootNode1, rootNode2); - Assert.assertTrue(CubeNodeUtils.equalHierarchy(rootNode1, rootNode2)); - - AdditiveRow root3 = new AdditiveRow(new Dimensions(Collections.singletonList("country")), - new DimensionValues(Collections.singletonList("US"))); - CubeNode rootNode3 = new AdditiveCubeNode(root3); - Assert.assertNotEquals(rootNode1, rootNode3); - } - @Test public void testHierarchicalEquals() { AdditiveCubeNode rootNode1 = buildHierarchicalNodes(); @@ -103,7 +74,7 @@ public class CubeNodeTest { * Failed because data difference. */ @Test - public void testHierarchicalEqualsFail2() throws Exception { + public void testHierarchicalEqualsFail2() { AdditiveCubeNode rootNode1 = buildHierarchicalNodes(); AdditiveRow rootRow = new AdditiveRow(new Dimensions(), new DimensionValues(), 20, 15); @@ -147,6 +118,12 @@ public class CubeNodeTest { Assert.assertFalse(CubeNodeUtils.equalHierarchy(rootNode1, rootNode2)); } + /** + * Provides data for this hierarchy: + * A + * / \ + * B C + */ private List<List<Row>> buildHierarchicalRows() { List<List<Row>> hierarchicalRows = new ArrayList<>(); // Root level @@ -169,6 +146,12 @@ public class CubeNodeTest { return hierarchicalRows; } +/** + * Builds hierarchy: + * A + * / \ + * B C + */ private AdditiveCubeNode buildHierarchicalNodes() { List<List<Row>> rows = buildHierarchicalRows(); // Root level diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/RatioCubeNodeTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/RatioCubeNodeTest.java new file mode 100644 index 0000000..85d8b35 --- /dev/null +++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/cube/data/node/RatioCubeNodeTest.java @@ -0,0 +1,112 @@ +/** + * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-c...@linkedin.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pinot.thirdeye.cube.data.node; + +import java.util.Collections; +import org.apache.pinot.thirdeye.cube.data.dbrow.DimensionValues; +import org.apache.pinot.thirdeye.cube.data.dbrow.Dimensions; +import org.apache.pinot.thirdeye.cube.ratio.RatioCubeNode; +import org.apache.pinot.thirdeye.cube.ratio.RatioRow; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class RatioCubeNodeTest { + + @Test + public void testSide() { + RatioRow root = new RatioRow(new Dimensions(), new DimensionValues()); + root.setBaselineNumeratorValue(100); + root.setBaselineDenominatorValue(200); + root.setCurrentNumeratorValue(150); + root.setCurrentDenominatorValue(250); + RatioCubeNode rootNode = new RatioCubeNode(root); + + // Ratio node with clear side() + RatioRow rowUS = new RatioRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("US"))); + rowUS.setBaselineNumeratorValue(50); // 50 left + rowUS.setBaselineDenominatorValue(120); // 80 left + rowUS.setCurrentNumeratorValue(80); // 70 left + rowUS.setCurrentDenominatorValue(180); // 70 left + RatioCubeNode nodeUS = new RatioCubeNode(1, 0, rowUS, rootNode); + Assert.assertEquals(nodeUS.changeRatio(), (80/180d) / (50d/120d)); + Assert.assertEquals(nodeUS.side(), nodeUS.changeRatio() > 1d); + + // Ratio node doesn't have baseline + RatioRow rowIN = new RatioRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("IN"))); + rowIN.setBaselineNumeratorValue(0); // 50 left + rowIN.setBaselineDenominatorValue(0); // 80 left + rowIN.setCurrentNumeratorValue(70); // 0 left + rowIN.setCurrentDenominatorValue(50); // 20 left + RatioCubeNode nodeIN = new RatioCubeNode(1, 1, rowIN, rootNode); + Assert.assertEquals(nodeIN.changeRatio(), Double.NaN); // The ratio will be inferred by algorithm itself + Assert.assertEquals(nodeIN.side(), nodeIN.getCurrentValue() > rootNode.getCurrentValue()); + + + // Ratio node doesn't have baseline + RatioRow rowFR = new RatioRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("IN"))); + rowFR.setBaselineNumeratorValue(25); // 25 left + rowFR.setBaselineDenominatorValue(60); // 20 left + rowFR.setCurrentNumeratorValue(0); // 0 left + rowFR.setCurrentDenominatorValue(0); // 20 left + RatioCubeNode nodeFR = new RatioCubeNode(1, 2, rowFR, rootNode); + Assert.assertEquals(nodeFR.changeRatio(), Double.NaN); // The ratio will be inferred by algorithm itself + // The side of FR is UP because it's baseline has lower ratio than it's parent; hence, we expect that removing FR + // will move the metric upward. + Assert.assertEquals(nodeFR.side(), nodeFR.getBaselineValue() < rootNode.getBaselineValue()); + } + + // Since CubeNode has cyclic reference between current node and parent node, the toString() will encounter + // overflowStack exception if it doesn't take care of the cyclic reference carefully. + @Test + public void testToString() { + RatioRow root = new RatioRow(new Dimensions(), new DimensionValues()); + RatioCubeNode rootNode = new RatioCubeNode(root); + + RatioRow child = new RatioRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("US"))); + child.setBaselineNumeratorValue(20); + child.setBaselineDenominatorValue(20); + child.setCurrentNumeratorValue(30); + child.setCurrentDenominatorValue(31); + RatioCubeNode childNode = new RatioCubeNode(1, 0, child, rootNode); + + System.out.println(childNode.toString()); + } + + @Test + public void testEqualsAndHashCode() { + RatioRow root1 = new RatioRow(new Dimensions(), new DimensionValues()); + CubeNode rootNode1 = new RatioCubeNode(root1); + + RatioRow root2 = new RatioRow(new Dimensions(), new DimensionValues()); + CubeNode rootNode2 = new RatioCubeNode(root2); + + Assert.assertEquals(rootNode1, rootNode2); + Assert.assertTrue(CubeNodeUtils.equalHierarchy(rootNode1, rootNode2)); + Assert.assertEquals(rootNode1.hashCode(), rootNode2.hashCode()); + + RatioRow root3 = new RatioRow(new Dimensions(Collections.singletonList("country")), + new DimensionValues(Collections.singletonList("US"))); + CubeNode rootNode3 = new RatioCubeNode(root3); + Assert.assertNotEquals(rootNode1, rootNode3); + Assert.assertNotEquals(rootNode1.hashCode(), rootNode3.hashCode()); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org