morningman commented on code in PR #37979: URL: https://github.com/apache/doris/pull/37979#discussion_r1682560799
########## fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java: ########## @@ -60,6 +65,7 @@ public void open() throws IOException { withAllocator(nativeIOReader.getAllocator()); nativeIOReader.setBatchSize(batchSize); + LOG.info("opening LakeSoulJniScanner with params={}", params); Review Comment: use debug, like this: if (LOG.isDebugEnable()) { LOG.debug(xxx); } ########## fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java: ########## @@ -126,24 +215,45 @@ public void setLakeSoulParams(TFileRangeDesc rangeDesc, LakeSoulSplit lakeSoulSp } public List<Split> getSplits() throws UserException { + LOG.info("getSplits with columnFilters={}", columnFilters); Review Comment: ditto ########## fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java: ########## @@ -126,24 +215,45 @@ public void setLakeSoulParams(TFileRangeDesc rangeDesc, LakeSoulSplit lakeSoulSp } public List<Split> getSplits() throws UserException { + LOG.info("getSplits with columnFilters={}", columnFilters); + LOG.info("getSplits with columnNameToRange={}", columnNameToRange); + LOG.info("getSplits with conjuncts={}", conjuncts); + + List<PartitionInfo> allPartitionInfo = lakeSoulExternalTable.listPartitionInfo(); + LOG.info("allPartitionInfo={}", allPartitionInfo); + List<PartitionInfo> filteredPartitionInfo = allPartitionInfo; + try { + filteredPartitionInfo = + LakeSoulUtils.applyPartitionFilters( + allPartitionInfo, + tableName, + partitionArrowSchema, + columnNameToRange + ); + } catch (IOException e) { + throw new UserException(e); + } + LOG.info("filteredPartitionInfo={}", filteredPartitionInfo); Review Comment: ditto ########## fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulUtils.java: ########## @@ -0,0 +1,352 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.lakesoul; + +import org.apache.doris.analysis.CastExpr; +import org.apache.doris.analysis.CompoundPredicate; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.analysis.IsNullPredicate; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.planner.ColumnBound; +import org.apache.doris.planner.ColumnRange; +import org.apache.doris.thrift.TExprOpcode; + +import com.dmetasoul.lakesoul.lakesoul.io.substrait.SubstraitUtil; +import com.dmetasoul.lakesoul.meta.entity.PartitionInfo; +import com.google.common.collect.BoundType; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Field; +import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Schema; +import io.substrait.expression.Expression; +import io.substrait.extension.DefaultExtensionCatalog; +import io.substrait.type.Type; +import io.substrait.type.TypeCreator; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + + +public class LakeSoulUtils { + + public static final String FILE_NAMES = "file_paths"; + public static final String PRIMARY_KEYS = "primary_keys"; + public static final String SCHEMA_JSON = "table_schema"; + public static final String PARTITION_DESC = "partition_descs"; + public static final String REQUIRED_FIELDS = "required_fields"; + public static final String OPTIONS = "options"; + public static final String SUBSTRAIT_PREDICATE = "substrait_predicate"; + public static final String CDC_COLUMN = "lakesoul_cdc_change_column"; + public static final String LIST_DELIM = ";"; + public static final String PARTITIONS_KV_DELIM = "="; + public static final String FS_S3A_ACCESS_KEY = "fs.s3a.access.key"; + public static final String FS_S3A_SECRET_KEY = "fs.s3a.secret.key"; + public static final String FS_S3A_ENDPOINT = "fs.s3a.endpoint"; + public static final String FS_S3A_REGION = "fs.s3a.endpoint.region"; + public static final String FS_S3A_PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; + + public static List<PartitionInfo> applyPartitionFilters( + List<PartitionInfo> allPartitionInfo, + String tableName, + Schema partitionArrowSchema, + Map<String, ColumnRange> columnNameToRange + ) throws IOException { + + Expression conjunctionFilter = null; + for (Field field : partitionArrowSchema.getFields()) { + ColumnRange columnRange = columnNameToRange.get(field.getName()); + if (columnRange != null) { + Expression expr = columnRangeToSubstraitFilter(field, columnRange); + if (expr != null) { + if (conjunctionFilter == null) { + conjunctionFilter = expr; + } else { + conjunctionFilter = SubstraitUtil.and(conjunctionFilter, expr); + } + } + } + } + return SubstraitUtil.applyPartitionFilters( + allPartitionInfo, + partitionArrowSchema, + SubstraitUtil.substraitExprToProto(conjunctionFilter, tableName) + ); + } + + public static Expression columnRangeToSubstraitFilter( + Field columnField, + ColumnRange columnRange + ) throws IOException { + Optional<RangeSet<ColumnBound>> rangeSetOpt = columnRange.getRangeSet(); + if (columnRange.hasConjunctiveIsNull() || !rangeSetOpt.isPresent()) { + return SubstraitUtil.CONST_TRUE; + } else { + RangeSet<ColumnBound> rangeSet = rangeSetOpt.get(); + if (rangeSet.isEmpty()) { + return SubstraitUtil.CONST_TRUE; + } else { + Expression conjunctionFilter = null; + for (Range range : rangeSet.asRanges()) { + Expression expr = rangeToSubstraitFilter(columnField, range); + if (expr != null) { + if (conjunctionFilter == null) { + conjunctionFilter = expr; + } else { + conjunctionFilter = SubstraitUtil.or(conjunctionFilter, expr); + } + } + } + return conjunctionFilter; + } + } + } + + public static Expression rangeToSubstraitFilter(Field columnField, Range range) throws IOException { Review Comment: I suggest to write unit test for these expr conversion logic. You can refer to: IcebergPredicateTest.java ########## fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java: ########## @@ -72,19 +78,37 @@ public void open() throws IOException { Arrays.stream(primaryKeys.split(LakeSoulUtils.LIST_DELIM)).collect(Collectors.toList())); } - Schema schema = Schema.fromJSON(params.get(LakeSoulUtils.SCHEMA_JSON)); + String options = params.getOrDefault(LakeSoulUtils.OPTIONS, "{}"); + Map<String, String> optionsMap = new ObjectMapper().readValue( + options, new TypeReference<Map<String, String>>() {} + ); + String base64Predicate = optionsMap.get(LakeSoulUtils.SUBSTRAIT_PREDICATE); + if (base64Predicate != null) { + Plan predicate = SubstraitUtil.decodeBase64String(base64Predicate); + LOG.info("push predicate={}", predicate); Review Comment: ditto ########## fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalTable.java: ########## @@ -150,6 +165,7 @@ public Optional<SchemaCacheValue> initSchema() { String tableSchema = tableInfo.getTableSchema(); DBUtil.TablePartitionKeys partitionKeys = DBUtil.parseTableInfoPartitions(tableInfo.getPartitions()); Schema schema; + LOG.info("tableSchema={}", tableSchema); Review Comment: ditto ########## fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java: ########## @@ -81,26 +132,32 @@ protected TFileFormatType getFileFormatType() throws UserException { @Override protected List<String> getPathPartitionKeys() throws UserException { - return new ArrayList<>(DBUtil.parseTableInfoPartitions(table.getPartitions()).rangeKeys); + return new ArrayList<>(DBUtil.parseTableInfoPartitions(partitions).rangeKeys); } @Override protected TableIf getTargetTable() throws UserException { - return lakeSoulExternalTable; + return desc.getTable(); } @Override protected Map<String, String> getLocationProperties() throws UserException { return lakeSoulExternalTable.getHadoopProperties(); } + @SneakyThrows @Override protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { + LOG.info("{}", rangeDesc); Review Comment: ditto -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org