morningman commented on code in PR #19819: URL: https://github.com/apache/doris/pull/19819#discussion_r1214018756
########## fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java: ########## @@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException { @Override protected List<Split> getSplits() throws UserException { List<Split> result = new ArrayList<>(); - result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList())); + // String splitPath = catalog.getTunnelUrl(); + // TODO: use single max compute scan node rather than file scan node + com.aliyun.odps.Table odpsTable = table.getOdpsTable(); + if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { + return result; + } + try { + List<Pair<Long, Long>> sliceRange = new ArrayList<>(); + long totalRows = catalog.getTotalRows(table.getDbName(), table.getName()); + long fileNum = odpsTable.getFileNum(); + long splitSize = (long) Math.ceil((double) totalRows / fileNum); Review Comment: If the `totalRows` is less than `fileNum`, the `splitSize` will be 0. ########## fe/java-udf/src/main/java/org/apache/doris/jni/vec/VectorColumn.java: ########## @@ -21,6 +21,8 @@ import org.apache.doris.jni.utils.TypeNativeBytes; import org.apache.doris.jni.vec.ColumnType.Type; +import org.apache.log4j.Logger; Review Comment: Wrong logger ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java: ########## @@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException { @Override protected List<Split> getSplits() throws UserException { List<Split> result = new ArrayList<>(); - result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList())); + // String splitPath = catalog.getTunnelUrl(); + // TODO: use single max compute scan node rather than file scan node + com.aliyun.odps.Table odpsTable = table.getOdpsTable(); + if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { + return result; + } + try { + List<Pair<Long, Long>> sliceRange = new ArrayList<>(); + long totalRows = catalog.getTotalRows(table.getDbName(), table.getName()); Review Comment: Is this `getTotalRows()` method expensive? ########## be/src/vec/exec/jni_connector.cpp: ########## @@ -63,14 +63,15 @@ JniConnector::~JniConnector() { } Status JniConnector::open(RuntimeState* state, RuntimeProfile* profile) { - RETURN_IF_ERROR(JniUtil::GetJNIEnv(&_env)); - if (_env == nullptr) { + JNIEnv* env = nullptr; + RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); + if (env == nullptr) { return Status::InternalError("Failed to get/create JVM"); } - RETURN_IF_ERROR(_init_jni_scanner(_env, state->batch_size())); + RETURN_IF_ERROR(_init_jni_scanner(env, state->batch_size())); // Call org.apache.doris.jni.JniScanner#open - _env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open); - RETURN_ERROR_IF_EXC(_env); + env->CallVoidMethod(_jni_scanner_obj, _jni_scanner_open); + RETURN_ERROR_IF_EXC(env); Review Comment: Why using a local env here? And do we need to release this env? Add some comment in code. ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java: ########## @@ -74,7 +78,37 @@ protected Map<String, String> getLocationProperties() throws UserException { @Override protected List<Split> getSplits() throws UserException { List<Split> result = new ArrayList<>(); - result.add(new FileSplit(new Path("/"), 0, -1, -1, 0L, new String[0], Collections.emptyList())); + // String splitPath = catalog.getTunnelUrl(); + // TODO: use single max compute scan node rather than file scan node + com.aliyun.odps.Table odpsTable = table.getOdpsTable(); + if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { + return result; + } + try { + List<Pair<Long, Long>> sliceRange = new ArrayList<>(); + long totalRows = catalog.getTotalRows(table.getDbName(), table.getName()); + long fileNum = odpsTable.getFileNum(); + long splitSize = (long) Math.ceil((double) totalRows / fileNum); Review Comment: And what if there is only 100 rows, and 10 files. Do we need to split into 10 ranges? I think this is too small. ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java: ########## @@ -193,7 +193,10 @@ private void setColumnPositionMapping() throws UserException { TableIf tbl = getTargetTable(); List<Integer> columnIdxs = Lists.newArrayList(); - + if (params.getRequiredSlots() == null) { Review Comment: What is this for? Add some comment to explain it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org