Jackie-Jiang commented on a change in pull request #6977: URL: https://github.com/apache/incubator-pinot/pull/6977#discussion_r641140241
########## File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentErrorInfo.java ########## @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.restlet.resources; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; +import org.apache.commons.lang3.exception.ExceptionUtils; + + +/** + * This class is used to represent errors related to a segment. + */ +@SuppressWarnings("unused") +@JsonIgnoreProperties(ignoreUnknown = true) +public class SegmentErrorInfo { + private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss z"; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT, Locale.getDefault()); + + private final String _timestamp; + private final String _errorMessage; + private final String _exceptionStackTrace; + + /** + * + * @param exceptionStackTrace Exception object + * @param errorMessage Error Message + * @param timestampMs Timestamp of the error + */ + public SegmentErrorInfo(Exception exceptionStackTrace, String errorMessage, long timestampMs) { + _timestamp = epochToSDF(timestampMs); + _errorMessage = errorMessage; + _exceptionStackTrace = (exceptionStackTrace != null) ? ExceptionUtils.getStackTrace(exceptionStackTrace) : null; + } + + /** + * This constructor is specifically for JSON ser/de. + * + * @param exceptionStackTrace Exception stack trace + * @param errorMessage Error message + * @param timestamp Time stamp of the error in Simple Date Format. + * + */ + @JsonCreator + public SegmentErrorInfo(@JsonProperty("exception") String exceptionStackTrace, + @JsonProperty("error") String errorMessage, @JsonProperty("timeStamp") String timestamp) { + _timestamp = timestamp; + _errorMessage = errorMessage; + _exceptionStackTrace = exceptionStackTrace; + } + + public String getTimestamp() { + return _timestamp; + } + + public String getError() { + return _errorMessage; + } + + public String getException() { + return _exceptionStackTrace; + } Review comment: Match the variable name ########## File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentErrorInfo.java ########## @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.restlet.resources; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; +import org.apache.commons.lang3.exception.ExceptionUtils; + + +/** + * This class is used to represent errors related to a segment. + */ +@SuppressWarnings("unused") +@JsonIgnoreProperties(ignoreUnknown = true) +public class SegmentErrorInfo { + private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss z"; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT, Locale.getDefault()); + + private final String _timestamp; + private final String _errorMessage; + private final String _exceptionStackTrace; + + /** + * + * @param exceptionStackTrace Exception object + * @param errorMessage Error Message + * @param timestampMs Timestamp of the error + */ + public SegmentErrorInfo(Exception exceptionStackTrace, String errorMessage, long timestampMs) { Review comment: ```suggestion public SegmentErrorInfo(Exception exception, String errorMessage, long timestampMs) { ``` ########## File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentErrorInfo.java ########## @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.restlet.resources; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; +import org.apache.commons.lang3.exception.ExceptionUtils; + + +/** + * This class is used to represent errors related to a segment. + */ +@SuppressWarnings("unused") +@JsonIgnoreProperties(ignoreUnknown = true) +public class SegmentErrorInfo { + private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss z"; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT, Locale.getDefault()); + + private final String _timestamp; + private final String _errorMessage; + private final String _exceptionStackTrace; + + /** + * + * @param exceptionStackTrace Exception object + * @param errorMessage Error Message + * @param timestampMs Timestamp of the error + */ + public SegmentErrorInfo(Exception exceptionStackTrace, String errorMessage, long timestampMs) { + _timestamp = epochToSDF(timestampMs); + _errorMessage = errorMessage; + _exceptionStackTrace = (exceptionStackTrace != null) ? ExceptionUtils.getStackTrace(exceptionStackTrace) : null; + } + + /** + * This constructor is specifically for JSON ser/de. + * + * @param exceptionStackTrace Exception stack trace + * @param errorMessage Error message + * @param timestamp Time stamp of the error in Simple Date Format. + * + */ + @JsonCreator + public SegmentErrorInfo(@JsonProperty("exception") String exceptionStackTrace, + @JsonProperty("error") String errorMessage, @JsonProperty("timeStamp") String timestamp) { + _timestamp = timestamp; + _errorMessage = errorMessage; + _exceptionStackTrace = exceptionStackTrace; + } + + public String getTimestamp() { + return _timestamp; + } + + public String getError() { + return _errorMessage; + } + + public String getException() { + return _exceptionStackTrace; + } + + /** + * Utility function to convert epoch in millis to SDF of form "yyyy-MM-dd HH:mm:ss z". + * + * @param millisSinceEpoch Time in millis to convert + * @return SDF equivalent + */ + private static String epochToSDF(long millisSinceEpoch) { + SIMPLE_DATE_FORMAT.setTimeZone(TimeZone.getDefault()); Review comment: Put this line into a static block (only set once) ########## File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java ########## @@ -59,9 +65,14 @@ protected HelixManager _helixManager; protected String _authToken; + // Fixed size LRU cache with InstanceName - SegmentName pair as key, and segment related Review comment: It is `TableName - SegmentName` pair ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java ########## @@ -156,38 +179,93 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa * * @param pinotHelixResourceManager Helix Resource Manager * @param tableNameWithType Name of table with type + * @param verbose If true, returns all segment info, else, only segments with errors * @return Debug information for segments */ private List<TableDebugInfo.SegmentDebugInfo> debugSegments(PinotHelixResourceManager pinotHelixResourceManager, - String tableNameWithType) { + String tableNameWithType, int verbose) { ExternalView externalView = pinotHelixResourceManager.getTableExternalView(tableNameWithType); IdealState idealState = pinotHelixResourceManager.getTableIdealState(tableNameWithType); - List<TableDebugInfo.SegmentDebugInfo> segmentDebugInfos = new ArrayList<>(); + List<TableDebugInfo.SegmentDebugInfo> result = new ArrayList<>(); if (idealState == null) { - return segmentDebugInfos; + return result; + } + + int serverRequestTimeoutMs = _controllerConf.getServerAdminRequestTimeoutSeconds() * 1000; + final Map<String, List<String>> serverToSegments = + _pinotHelixResourceManager.getServerToSegmentsMap(tableNameWithType); + Map<String, Map<String, SegmentServerDebugInfo>> segmentsDebugInfoFromServers; + + BiMap<String, String> serverToEndpoints; + try { + serverToEndpoints = _pinotHelixResourceManager.getDataInstanceAdminEndpoints(serverToSegments.keySet()); + } catch (InvalidConfigException e) { + throw new WebApplicationException( + "Caught exception when getting segment debug info for table: " + tableNameWithType); } + segmentsDebugInfoFromServers = + getSegmentsDebugInfoFromServers(tableNameWithType, serverToEndpoints, serverRequestTimeoutMs); + for (Map.Entry<String, Map<String, String>> segmentMapEntry : idealState.getRecord().getMapFields().entrySet()) { - String segment = segmentMapEntry.getKey(); - Map<String, String> instanceStateMap = segmentMapEntry.getValue(); + String segmentName = segmentMapEntry.getKey(); + Map<String, String> segmentIsMap = segmentMapEntry.getValue(); + + Map<String, TableDebugInfo.SegmentState> segmentServerState = new HashMap<>(); + for (Map.Entry<String, Map<String, SegmentServerDebugInfo>> segmentEntry : segmentsDebugInfoFromServers + .entrySet()) { + String instanceName = segmentEntry.getKey(); - List<TableDebugInfo.IsEvState> isEvStates = new ArrayList<>(); - for (Map.Entry<String, String> entry : instanceStateMap.entrySet()) { - String serverName = entry.getKey(); - String isState = entry.getValue(); + String isState = segmentIsMap.get(instanceName); + String evState = (externalView != null) ? externalView.getStateMap(segmentName).get(instanceName) : "null"; Review comment: (Critical) `externalView.getStateMap(segmentName)` could be `null`? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java ########## @@ -85,7 +86,7 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset; import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory; import org.apache.pinot.spi.stream.TransientConsumerException; -import org.apache.pinot.spi.utils.CommonConstants.ConsumerState; +import org.apache.pinot.spi.utils.CommonConstants; Review comment: Any specific reason for this change? ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java ########## @@ -156,38 +179,93 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa * * @param pinotHelixResourceManager Helix Resource Manager * @param tableNameWithType Name of table with type + * @param verbose If true, returns all segment info, else, only segments with errors * @return Debug information for segments */ private List<TableDebugInfo.SegmentDebugInfo> debugSegments(PinotHelixResourceManager pinotHelixResourceManager, - String tableNameWithType) { + String tableNameWithType, int verbose) { ExternalView externalView = pinotHelixResourceManager.getTableExternalView(tableNameWithType); IdealState idealState = pinotHelixResourceManager.getTableIdealState(tableNameWithType); - List<TableDebugInfo.SegmentDebugInfo> segmentDebugInfos = new ArrayList<>(); + List<TableDebugInfo.SegmentDebugInfo> result = new ArrayList<>(); if (idealState == null) { - return segmentDebugInfos; + return result; + } + + int serverRequestTimeoutMs = _controllerConf.getServerAdminRequestTimeoutSeconds() * 1000; + final Map<String, List<String>> serverToSegments = + _pinotHelixResourceManager.getServerToSegmentsMap(tableNameWithType); + Map<String, Map<String, SegmentServerDebugInfo>> segmentsDebugInfoFromServers; + + BiMap<String, String> serverToEndpoints; + try { + serverToEndpoints = _pinotHelixResourceManager.getDataInstanceAdminEndpoints(serverToSegments.keySet()); + } catch (InvalidConfigException e) { + throw new WebApplicationException( + "Caught exception when getting segment debug info for table: " + tableNameWithType); } + segmentsDebugInfoFromServers = + getSegmentsDebugInfoFromServers(tableNameWithType, serverToEndpoints, serverRequestTimeoutMs); + for (Map.Entry<String, Map<String, String>> segmentMapEntry : idealState.getRecord().getMapFields().entrySet()) { - String segment = segmentMapEntry.getKey(); - Map<String, String> instanceStateMap = segmentMapEntry.getValue(); + String segmentName = segmentMapEntry.getKey(); + Map<String, String> segmentIsMap = segmentMapEntry.getValue(); + + Map<String, TableDebugInfo.SegmentState> segmentServerState = new HashMap<>(); + for (Map.Entry<String, Map<String, SegmentServerDebugInfo>> segmentEntry : segmentsDebugInfoFromServers + .entrySet()) { + String instanceName = segmentEntry.getKey(); - List<TableDebugInfo.IsEvState> isEvStates = new ArrayList<>(); - for (Map.Entry<String, String> entry : instanceStateMap.entrySet()) { - String serverName = entry.getKey(); - String isState = entry.getValue(); + String isState = segmentIsMap.get(instanceName); + String evState = (externalView != null) ? externalView.getStateMap(segmentName).get(instanceName) : "null"; + Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap = segmentEntry.getValue(); - String evState = (externalView != null) ? externalView.getStateMap(segment).get(serverName) : "null"; - if (!isState.equals(evState)) { - isEvStates.add(new TableDebugInfo.IsEvState(serverName, isState, evState)); + if ((verbose > 0) || segmentHasErrors(segmentServerDebugInfoMap, evState)) { + SegmentServerDebugInfo segmentServerDebugInfo = segmentServerDebugInfoMap.get(segmentName); + + segmentServerState.put(instanceName, + new TableDebugInfo.SegmentState(isState, evState, segmentServerDebugInfo.getSegmentSize(), + segmentServerDebugInfo.getConsumerInfo(), segmentServerDebugInfo.getErrorInfo())); } } - if (isEvStates.size() > 0) { - segmentDebugInfos.add(new TableDebugInfo.SegmentDebugInfo(segment, isEvStates)); + if (!segmentServerState.isEmpty()) { + result.add(new TableDebugInfo.SegmentDebugInfo(segmentName, segmentServerState)); } } - return segmentDebugInfos; + + return result; + } + + /** + * Helper method to check if a segment has any errors/issues. + * + * @param segmentServerDebugInfoMap Map from instanceName to segmentDebugInfo + * @param externalView external view of segment + * @return True if there's any error/issue for the segment, false otherwise. + */ + private boolean segmentHasErrors(Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap, String externalView) { + // For now, we will skip cases where IS is ONLINE and EV is OFFLINE (or vice-versa), as it could happen during state transitions. + if (externalView.equals("ERROR")) { + return true; + } + + for (Map.Entry<String, SegmentServerDebugInfo> entry : segmentServerDebugInfoMap.entrySet()) { Review comment: (Critical) this method is per-segment check, and we should only check the `SegmentServerDebugInfo` for this one single segment instead of checking all of them ########## File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentErrorInfo.java ########## @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.restlet.resources; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; +import org.apache.commons.lang3.exception.ExceptionUtils; + + +/** + * This class is used to represent errors related to a segment. + */ +@SuppressWarnings("unused") +@JsonIgnoreProperties(ignoreUnknown = true) +public class SegmentErrorInfo { + private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss z"; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT, Locale.getDefault()); + + private final String _timestamp; + private final String _errorMessage; + private final String _exceptionStackTrace; + + /** + * + * @param exceptionStackTrace Exception object + * @param errorMessage Error Message + * @param timestampMs Timestamp of the error + */ + public SegmentErrorInfo(Exception exceptionStackTrace, String errorMessage, long timestampMs) { Review comment: (nit) change the parameter order to match the variable order ########## File path: pinot-server/src/main/java/org/apache/pinot/server/api/resources/DebugResource.java ########## @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.server.api.resources; + +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiOperation; +import io.swagger.annotations.ApiParam; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.inject.Inject; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.restlet.resources.SegmentConsumerInfo; +import org.apache.pinot.common.restlet.resources.SegmentErrorInfo; +import org.apache.pinot.common.restlet.resources.SegmentServerDebugInfo; +import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager; +import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; +import org.apache.pinot.segment.local.data.manager.SegmentDataManager; +import org.apache.pinot.segment.local.data.manager.TableDataManager; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.server.starter.ServerInstance; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.utils.builder.TableNameBuilder; + + +/** + * Debug resource for Pinot Server. + */ +@Api(tags = "Debug") +@Path("/debug/") +public class DebugResource { + + @Inject + private ServerInstance _serverInstance; + + @GET + @Path("tables/{tableName}") + @Produces(MediaType.APPLICATION_JSON) + @ApiOperation(value = "Get segments debug info for this table", notes = "This is a debug endpoint, and won't maintain backward compatibility") + public List<SegmentServerDebugInfo> getSegmentsDebugInfo( + @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableNameWithType) { + + TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableNameWithType); + return getSegmentServerDebugInfo(tableNameWithType, tableType); + } + + private List<SegmentServerDebugInfo> getSegmentServerDebugInfo(String tableNameWithType, TableType tableType) { + List<SegmentServerDebugInfo> segmentServerDebugInfos = new ArrayList<>(); + + TableDataManager tableDataManager = + ServerResourceUtils.checkGetTableDataManager(_serverInstance, tableNameWithType); + + Map<String, SegmentErrorInfo> segmentErrorsMap = tableDataManager.getSegmentErrors(); + Set<String> segmentsWithDataManagers = new HashSet<>(); + List<SegmentDataManager> segmentDataManagers = tableDataManager.acquireAllSegments(); + try { + for (SegmentDataManager segmentDataManager : segmentDataManagers) { + String segmentName = segmentDataManager.getSegmentName(); + segmentsWithDataManagers.add(segmentName); + + // Get segment consumer info. + SegmentConsumerInfo segmentConsumerInfo = getSegmentConsumerInfo(segmentDataManager, tableType); + + // Get segment size. + long segmentSize = getSegmentSize(segmentDataManager); + + // Get segment error. + SegmentErrorInfo segmentErrorInfo = segmentErrorsMap.get(segmentName); + + segmentServerDebugInfos.add(new SegmentServerDebugInfo(segmentName, segmentConsumerInfo, segmentErrorInfo, + FileUtils.byteCountToDisplaySize(segmentSize))); + } + } catch (Exception e) { + throw new WebApplicationException("Caught exception when getting consumer info for table: " + tableNameWithType); + } finally { + for (SegmentDataManager segmentDataManager : segmentDataManagers) { + tableDataManager.releaseSegment(segmentDataManager); + } + } + + // There may be segment errors for segments without Data Managers (e.g. segment wasn't loaded). + for (Map.Entry<String, SegmentErrorInfo> entry : segmentErrorsMap.entrySet()) { + String segmentName = entry.getKey(); + + if (!segmentsWithDataManagers.contains(segmentName)) { + SegmentErrorInfo segmentErrorInfo = entry.getValue(); + segmentServerDebugInfos.add(new SegmentServerDebugInfo(segmentName, null, segmentErrorInfo, "-")); + } + } + return segmentServerDebugInfos; + } + + private long getSegmentSize(SegmentDataManager segmentDataManager) { + return (segmentDataManager instanceof ImmutableSegmentDataManager) ? ((ImmutableSegment) segmentDataManager + .getSegment()).getSegmentSizeBytes() : 0; + } + + private SegmentConsumerInfo getSegmentConsumerInfo(SegmentDataManager segmentDataManager, TableType tableType) { + SegmentConsumerInfo segmentConsumerInfo = null; + if (tableType.equals(TableType.REALTIME)) { Review comment: (nit) this can be `==` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java ########## @@ -59,9 +65,14 @@ protected HelixManager _helixManager; protected String _authToken; + // Fixed size LRU cache with InstanceName - SegmentName pair as key, and segment related + // errors as the value. + protected LoadingCache<Pair<String, String>, SegmentErrorInfo> _errorCache; + @Override public void init(TableDataManagerConfig tableDataManagerConfig, String instanceId, - ZkHelixPropertyStore<ZNRecord> propertyStore, ServerMetrics serverMetrics, HelixManager helixManager) { + ZkHelixPropertyStore<ZNRecord> propertyStore, ServerMetrics serverMetrics, HelixManager helixManager, + LoadingCache<Pair<String, String>, SegmentErrorInfo> errorCache) { Review comment: nullable ########## File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentErrorInfo.java ########## @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.common.restlet.resources; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; +import org.apache.commons.lang3.exception.ExceptionUtils; + + +/** + * This class is used to represent errors related to a segment. + */ +@SuppressWarnings("unused") +@JsonIgnoreProperties(ignoreUnknown = true) +public class SegmentErrorInfo { + private static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss z"; + private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT, Locale.getDefault()); + + private final String _timestamp; + private final String _errorMessage; + private final String _exceptionStackTrace; + + /** + * + * @param exceptionStackTrace Exception object + * @param errorMessage Error Message + * @param timestampMs Timestamp of the error + */ + public SegmentErrorInfo(Exception exceptionStackTrace, String errorMessage, long timestampMs) { + _timestamp = epochToSDF(timestampMs); + _errorMessage = errorMessage; + _exceptionStackTrace = (exceptionStackTrace != null) ? ExceptionUtils.getStackTrace(exceptionStackTrace) : null; + } + + /** + * This constructor is specifically for JSON ser/de. + * + * @param exceptionStackTrace Exception stack trace + * @param errorMessage Error message + * @param timestamp Time stamp of the error in Simple Date Format. + * + */ + @JsonCreator + public SegmentErrorInfo(@JsonProperty("exception") String exceptionStackTrace, Review comment: Match the json property with the variable name? Also use the same order ########## File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/offline/DimensionTableDataManager.java ########## @@ -59,7 +59,7 @@ private DimensionTableDataManager() { /** * `createInstanceByTableName` should only be used by the {@link TableDataManagerProvider} and the returned - * instance should be properly initialized via {@link #init} method before using. + * instance should be properly initialized via {@link org.apache.pinot.segment.local.data.manager.TableDataManager#init} method before using. Review comment: I feel the existing doc is better ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java ########## @@ -82,9 +102,10 @@ @Produces(MediaType.APPLICATION_JSON) @ApiOperation(value = "Get debug information for table.", notes = "Debug information for table.") @ApiResponses(value = {@ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error")}) - public String getClusterInfo( + public String getTableDebugInfo( @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName, - @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr) + @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr, + @ApiParam(value = "Verbosity of debug information") @DefaultValue("0") @QueryParam("verbosity") int verbose) Review comment: Rename the argument to `verbosity` as well since it's no longer a boolean. Same for other places in this file ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java ########## @@ -156,38 +179,93 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa * * @param pinotHelixResourceManager Helix Resource Manager * @param tableNameWithType Name of table with type + * @param verbose If true, returns all segment info, else, only segments with errors * @return Debug information for segments */ private List<TableDebugInfo.SegmentDebugInfo> debugSegments(PinotHelixResourceManager pinotHelixResourceManager, - String tableNameWithType) { + String tableNameWithType, int verbose) { ExternalView externalView = pinotHelixResourceManager.getTableExternalView(tableNameWithType); IdealState idealState = pinotHelixResourceManager.getTableIdealState(tableNameWithType); - List<TableDebugInfo.SegmentDebugInfo> segmentDebugInfos = new ArrayList<>(); + List<TableDebugInfo.SegmentDebugInfo> result = new ArrayList<>(); if (idealState == null) { - return segmentDebugInfos; + return result; + } + + int serverRequestTimeoutMs = _controllerConf.getServerAdminRequestTimeoutSeconds() * 1000; + final Map<String, List<String>> serverToSegments = + _pinotHelixResourceManager.getServerToSegmentsMap(tableNameWithType); + Map<String, Map<String, SegmentServerDebugInfo>> segmentsDebugInfoFromServers; + + BiMap<String, String> serverToEndpoints; + try { + serverToEndpoints = _pinotHelixResourceManager.getDataInstanceAdminEndpoints(serverToSegments.keySet()); + } catch (InvalidConfigException e) { + throw new WebApplicationException( + "Caught exception when getting segment debug info for table: " + tableNameWithType); } + segmentsDebugInfoFromServers = Review comment: (nit) Put the declaration here ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java ########## @@ -156,38 +179,93 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa * * @param pinotHelixResourceManager Helix Resource Manager * @param tableNameWithType Name of table with type + * @param verbose If true, returns all segment info, else, only segments with errors * @return Debug information for segments */ private List<TableDebugInfo.SegmentDebugInfo> debugSegments(PinotHelixResourceManager pinotHelixResourceManager, - String tableNameWithType) { + String tableNameWithType, int verbose) { ExternalView externalView = pinotHelixResourceManager.getTableExternalView(tableNameWithType); IdealState idealState = pinotHelixResourceManager.getTableIdealState(tableNameWithType); - List<TableDebugInfo.SegmentDebugInfo> segmentDebugInfos = new ArrayList<>(); + List<TableDebugInfo.SegmentDebugInfo> result = new ArrayList<>(); if (idealState == null) { - return segmentDebugInfos; + return result; + } + + int serverRequestTimeoutMs = _controllerConf.getServerAdminRequestTimeoutSeconds() * 1000; + final Map<String, List<String>> serverToSegments = + _pinotHelixResourceManager.getServerToSegmentsMap(tableNameWithType); + Map<String, Map<String, SegmentServerDebugInfo>> segmentsDebugInfoFromServers; + + BiMap<String, String> serverToEndpoints; + try { + serverToEndpoints = _pinotHelixResourceManager.getDataInstanceAdminEndpoints(serverToSegments.keySet()); + } catch (InvalidConfigException e) { + throw new WebApplicationException( + "Caught exception when getting segment debug info for table: " + tableNameWithType); } + segmentsDebugInfoFromServers = + getSegmentsDebugInfoFromServers(tableNameWithType, serverToEndpoints, serverRequestTimeoutMs); + for (Map.Entry<String, Map<String, String>> segmentMapEntry : idealState.getRecord().getMapFields().entrySet()) { - String segment = segmentMapEntry.getKey(); - Map<String, String> instanceStateMap = segmentMapEntry.getValue(); + String segmentName = segmentMapEntry.getKey(); + Map<String, String> segmentIsMap = segmentMapEntry.getValue(); + + Map<String, TableDebugInfo.SegmentState> segmentServerState = new HashMap<>(); + for (Map.Entry<String, Map<String, SegmentServerDebugInfo>> segmentEntry : segmentsDebugInfoFromServers + .entrySet()) { + String instanceName = segmentEntry.getKey(); - List<TableDebugInfo.IsEvState> isEvStates = new ArrayList<>(); - for (Map.Entry<String, String> entry : instanceStateMap.entrySet()) { - String serverName = entry.getKey(); - String isState = entry.getValue(); + String isState = segmentIsMap.get(instanceName); + String evState = (externalView != null) ? externalView.getStateMap(segmentName).get(instanceName) : "null"; + Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap = segmentEntry.getValue(); - String evState = (externalView != null) ? externalView.getStateMap(segment).get(serverName) : "null"; - if (!isState.equals(evState)) { - isEvStates.add(new TableDebugInfo.IsEvState(serverName, isState, evState)); + if ((verbose > 0) || segmentHasErrors(segmentServerDebugInfoMap, evState)) { + SegmentServerDebugInfo segmentServerDebugInfo = segmentServerDebugInfoMap.get(segmentName); + + segmentServerState.put(instanceName, + new TableDebugInfo.SegmentState(isState, evState, segmentServerDebugInfo.getSegmentSize(), + segmentServerDebugInfo.getConsumerInfo(), segmentServerDebugInfo.getErrorInfo())); } } - if (isEvStates.size() > 0) { - segmentDebugInfos.add(new TableDebugInfo.SegmentDebugInfo(segment, isEvStates)); + if (!segmentServerState.isEmpty()) { + result.add(new TableDebugInfo.SegmentDebugInfo(segmentName, segmentServerState)); } } - return segmentDebugInfos; + + return result; + } + + /** + * Helper method to check if a segment has any errors/issues. + * + * @param segmentServerDebugInfoMap Map from instanceName to segmentDebugInfo + * @param externalView external view of segment + * @return True if there's any error/issue for the segment, false otherwise. + */ + private boolean segmentHasErrors(Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap, String externalView) { + // For now, we will skip cases where IS is ONLINE and EV is OFFLINE (or vice-versa), as it could happen during state transitions. + if (externalView.equals("ERROR")) { + return true; + } + + for (Map.Entry<String, SegmentServerDebugInfo> entry : segmentServerDebugInfoMap.entrySet()) { + SegmentServerDebugInfo debugInfo = entry.getValue(); + + SegmentConsumerInfo consumerInfo = debugInfo.getConsumerInfo(); + if (consumerInfo != null && consumerInfo.getConsumerState() + .equals(CommonConstants.ConsumerState.NOT_CONSUMING.toString())) { + return true; + } + + SegmentErrorInfo errorInfo = debugInfo.getErrorInfo(); Review comment: Should we check error info? It won't be cleared even after segment is recovered -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org