Jackie-Jiang commented on code in PR #14918: URL: https://github.com/apache/pinot/pull/14918#discussion_r1931096469
########## pinot-query-planner/src/main/java/org/apache/pinot/query/parser/utils/ParserUtils.java: ########## @@ -53,95 +54,87 @@ public static boolean canCompileWithMultiStageEngine(String query, String databa } /** - * Tries to fill an empty or not properly filled schema when no rows have been returned. + * Tries to fill an empty or not properly filled {@link DataSchema} when no row has been returned. + * + * Response data schema can be inaccurate or incomplete in several forms: + * 1. No result table at all (when all segments have been pruned on broker). + * 2. Data schema has all columns set to default type (STRING) (when all segments pruned on server). * * Priority is: - * - Types in schema provided by V2 validation for the given query. - * - Types in schema provided by V1 for the given table (only appliable to selection fields). - * - Types in response provided by V1 server (no action). + * - Types from multi-stage engine validation for the given query. + * - Types from schema for the given table (only applicable to selection fields). + * - Types from single-stage engine response (no action). + * + * Multi-stage engine schema will be available only if query compiles. */ - public static void fillEmptyResponseSchema( - BrokerResponse response, TableCache tableCache, Schema schema, String database, String query - ) { - if (response == null || response.getNumRowsResultSet() > 0) { - return; - } + public static void fillEmptyResponseSchema(BrokerResponse response, TableCache tableCache, Schema schema, + String database, String query) { + assert response.getNumRowsResultSet() == 0; - QueryEnvironment queryEnvironment = new QueryEnvironment(database, tableCache, null); - RelRoot node = queryEnvironment.getRelRootIfCanCompile(query); - DataSchema.ColumnDataType resolved; + DataSchema dataSchema = response.getResultTable() != null ? response.getResultTable().getDataSchema() : null; - // V1 schema info for the response can be inaccurate or incomplete in several forms: - // 1) No schema provided at all (when no segments have been even pruned). - // 2) Schema provided but all columns set to default type (STRING) (when no segments have been matched). - // V2 schema will be available only if query compiles. + List<RelDataTypeField> dataTypeFields = null; + try { + QueryEnvironment queryEnvironment = new QueryEnvironment(database, tableCache, null); + RelRoot node = queryEnvironment.getRelRootIfCanCompile(query); + if (node != null && node.validatedRowType != null) { + dataTypeFields = node.validatedRowType.getFieldList(); + } + } catch (Exception ignored) { + // Ignored + } - boolean hasV1Schema = response.getResultTable() != null; - boolean hasV2Schema = node != null && node.validatedRowType != null; + if (dataSchema == null && dataTypeFields == null) { + // No schema available, nothing we can do + return; + } - if (hasV1Schema && hasV2Schema) { - // match v1 column types with v2 column types using column names - // if no match, rely on v1 schema based on column name - // if no match either, just leave it as it is - DataSchema responseSchema = response.getResultTable().getDataSchema(); - List<RelDataTypeField> fields = node.validatedRowType.getFieldList(); - for (int i = 0; i < responseSchema.size(); i++) { - resolved = RelToPlanNodeConverter.convertToColumnDataType(fields.get(i).getType()); - if (resolved == null || resolved.isUnknown()) { - FieldSpec spec = schema.getFieldSpecFor(responseSchema.getColumnName(i)); - try { - resolved = DataSchema.ColumnDataType.fromDataType(spec.getDataType(), false); - } catch (Exception e) { - try { - resolved = DataSchema.ColumnDataType.fromDataType(spec.getDataType(), true); - } catch (Exception e2) { - resolved = DataSchema.ColumnDataType.UNKNOWN; - } - } - } - if (resolved == null || resolved.isUnknown()) { - resolved = responseSchema.getColumnDataType(i); + if (dataSchema == null || (dataTypeFields != null && dataSchema.size() != dataTypeFields.size())) { + // If data schema is not available or has different number of columns than the validated row type, we use the + // validated row type to populate the schema. Review Comment: This is for robustness. Server can return `"*"` when broker doesn't have access to schema to fill the actual columns. Also it might change if schema is updated. We don't want to get NPE in this case -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org