This is an automated email from the ASF dual-hosted git repository. pratik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 455e194195 [timeseries] Define Time Series ID and Broker Response Name Tag Semantics (#14286) 455e194195 is described below commit 455e1941951e818bf2ebcac3f1bdf3a63b0705a4 Author: Ankit Sultana <ankitsult...@uber.com> AuthorDate: Fri Oct 25 09:47:50 2024 -0500 [timeseries] Define Time Series ID and Broker Response Name Tag Semantics (#14286) * [timeseries] Define Series ID and Response __name__ Tag Semantics * minor change * fix checkstyle --- .../response/PinotBrokerTimeSeriesResponse.java | 9 ++++++-- .../org/apache/pinot/tsdb/spi/TimeBuckets.java | 2 +- .../tsdb/spi/series/BaseTimeSeriesBuilder.java | 6 ++++++ .../apache/pinot/tsdb/spi/series/TimeSeries.java | 24 ++++++++++++++++++++++ .../pinot/tsdb/spi/series/TimeSeriesBlock.java | 4 ++++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/response/PinotBrokerTimeSeriesResponse.java b/pinot-common/src/main/java/org/apache/pinot/common/response/PinotBrokerTimeSeriesResponse.java index 8d455e09bd..96320b8326 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/response/PinotBrokerTimeSeriesResponse.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/response/PinotBrokerTimeSeriesResponse.java @@ -43,6 +43,11 @@ import org.apache.pinot.tsdb.spi.series.TimeSeriesBlock; public class PinotBrokerTimeSeriesResponse { public static final String SUCCESS_STATUS = "success"; public static final String ERROR_STATUS = "error"; + /** + * Prometheus returns a __name__ tag to denote the "name" of a time-series query. Time series language developers + * can set this tag in the final operator in their queries, which would allow them to configure the name tag + * returned by the Pinot Broker. By default, we use {@link TimeSeries#getTagsSerialized()} as the name of a series. + */ public static final String METRIC_NAME_KEY = "__name__"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private String _status; @@ -105,9 +110,9 @@ public class PinotBrokerTimeSeriesResponse { for (var listOfTimeSeries : seriesBlock.getSeriesMap().values()) { Preconditions.checkState(!listOfTimeSeries.isEmpty(), "Received empty time-series"); TimeSeries anySeries = listOfTimeSeries.get(0); - // TODO: Ideally we should allow "aliasing" a series, and hence we should store something like a series-name. - // Though in most contexts that would serve the same purpose as an ID. Map<String, String> metricMap = new HashMap<>(); + // Set the default metric name key. This may be overridden in the very next line if the language implementation + // sets its own name tag. metricMap.put(METRIC_NAME_KEY, anySeries.getTagsSerialized()); metricMap.putAll(anySeries.getTagKeyValuesAsMap()); for (TimeSeries timeSeries : listOfTimeSeries) { diff --git a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/TimeBuckets.java b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/TimeBuckets.java index 866249845e..a92eb3bcc2 100644 --- a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/TimeBuckets.java +++ b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/TimeBuckets.java @@ -56,7 +56,7 @@ public class TimeBuckets { } public long getRangeSeconds() { - return _timeBuckets[_timeBuckets.length - 1] - _timeBuckets[0]; + return _timeBuckets[_timeBuckets.length - 1] - _timeBuckets[0] + _bucketSize.getSeconds(); } public int getNumBuckets() { diff --git a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/BaseTimeSeriesBuilder.java b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/BaseTimeSeriesBuilder.java index 538f921b55..20ac1714a8 100644 --- a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/BaseTimeSeriesBuilder.java +++ b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/BaseTimeSeriesBuilder.java @@ -28,6 +28,8 @@ import org.apache.pinot.tsdb.spi.TimeBuckets; * BaseSeriesBuilder allows language implementations to build their own aggregation and other time-series functions. * Each time-series operator would typically call either of {@link #addValue} or {@link #addValueAtIndex}. When * the operator is done, it will call {@link #build()} to allow the builder to compute the final {@link TimeSeries}. + * <br /> + * <b>Important:</b> Refer to {@link TimeSeries} for details on Series ID and how to use it in general. */ public abstract class BaseTimeSeriesBuilder { protected final String _id; @@ -38,6 +40,10 @@ public abstract class BaseTimeSeriesBuilder { protected final List<String> _tagNames; protected final Object[] _tagValues; + /** + * Note that ID should be hashed to a Long to become the key in the Map<Long, List<TimeSeries>> in + * {@link TimeSeriesBlock}. Refer to {@link TimeSeries} for more details. + */ public BaseTimeSeriesBuilder(String id, @Nullable Long[] timeValues, @Nullable TimeBuckets timeBuckets, List<String> tagNames, Object[] tagValues) { _id = id; diff --git a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeries.java b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeries.java index 5fcf70b42e..55e2a9a730 100644 --- a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeries.java +++ b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeries.java @@ -38,6 +38,30 @@ import org.apache.pinot.tsdb.spi.TimeBuckets; * <b>Warning:</b> The time and value arrays passed to the Series are not copied, and can be modified by anyone with * access to them. This is by design, to make it easier to re-use buffers during time-series operations. * </p> + * + * <h3>Series ID Usage and Semantics</h3> + * ID of a time-series should uniquely identify a time-series in an execution context. There are languages that + * allow performing a "union" operation, and to accommodate those cases, we always store a {@link List<TimeSeries>} + * in {@link TimeSeriesBlock}. Moreover, for the union of series case, series with the same label key-value pairs can + * have the same ID. + * <p> + * <b>Important:</b> The following points summarize how Series ID should be used: + * <ul> + * <li> + * Series ID should be used in series blocks as the identifier that defines uniqueness. In other words, use it + * as the key for the Map<Long, List<TimeSeries>>. + * </li> + * <li> + * The leaf operator creates Series IDs using the tag-values alone, stored in a Object[]. For the Map in series + * block, we hash the ID to a Long using {@link TimeSeries#hash(Object[])}. The Object[] array will be empty, + * and so will the tags and values, if you do an aggregation without any grouping set. + * </li> + * <li> + * Whenever you have to convert the series ID to a Long, you can use Java hashCode or any other algorithm. The + * only reason we use a Long and not the String series ID is to make the Map lookups faster. + * </li> + * </ul> + * </p> */ public class TimeSeries { private final String _id; diff --git a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeriesBlock.java b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeriesBlock.java index e1e89d624d..1556954d20 100644 --- a/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeriesBlock.java +++ b/pinot-timeseries/pinot-timeseries-spi/src/main/java/org/apache/pinot/tsdb/spi/series/TimeSeriesBlock.java @@ -32,6 +32,10 @@ import org.apache.pinot.tsdb.spi.TimeBuckets; */ public class TimeSeriesBlock { private final TimeBuckets _timeBuckets; + /** + * Refer to {@link TimeSeries} for semantics on how to compute the Long hashed value from a + * {@link TimeSeries#getId()}. + */ private final Map<Long, List<TimeSeries>> _seriesMap; public TimeSeriesBlock(@Nullable TimeBuckets timeBuckets, Map<Long, List<TimeSeries>> seriesMap) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org