[incubator-doris] branch master updated: [Optimize] Add an unordered_map for TabletSchema to speed up column name lookup (#4779)
This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git The following commit(s) were added to refs/heads/master by this push: new d1c2b3e [Optimize] Add an unordered_map for TabletSchema to speed up column name lookup (#4779) d1c2b3e is described below commit d1c2b3ed0d2c520ba670a24938d97e164689ccaf Author: Yingchun Lai <405403...@qq.com> AuthorDate: Tue Nov 3 19:53:44 2020 +0800 [Optimize] Add an unordered_map for TabletSchema to speed up column name lookup (#4779) Reduce column name lookup for TabletSchema and Tablet from O(N) to O(1). --- be/src/olap/delete_handler.cpp | 17 +++- be/src/olap/delete_handler.h| 18 ++--- be/src/olap/olap_cond.cpp | 6 +++--- be/src/olap/olap_cond.h | 13 ++-- be/src/olap/reader.cpp | 7 +-- be/src/olap/tablet.h| 4 ++-- be/src/olap/tablet_manager.cpp | 19 ++--- be/src/olap/tablet_schema.cpp | 27 + be/src/olap/tablet_schema.h | 9 - be/test/olap/rowset/segment_v2/segment_test.cpp | 16 --- 10 files changed, 54 insertions(+), 82 deletions(-) diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 28c8d77..8e8e80a 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -154,8 +154,7 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( const TabletSchema& schema, const TCondition& cond) { // 检查指定列名的列是否存在 -int field_index = _get_field_index(schema, cond.column_name); - +int32_t field_index = schema.field_index(cond.column_name); if (field_index < 0) { OLAP_LOG_WARNING("field is not existent. [field_index=%d]", field_index); return OLAP_ERR_DELETE_INVALID_CONDITION; @@ -229,19 +228,10 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio OLAPStatus DeleteHandler::init(const TabletSchema& schema, const DelPredicateArray& delete_conditions, int32_t version) { -if (_is_inited) { -OLAP_LOG_WARNING("reinitialize delete handler."); -return OLAP_ERR_INIT_FAILED; - -} - -if (version < 0) { -OLAP_LOG_WARNING("invalid parameters. [version=%d]", version); -return OLAP_ERR_DELETE_INVALID_PARAMETERS; -} +DCHECK(!_is_inited) << "reinitialize delete handler."; +DCHECK(version >= 0) << "invalid parameters. version=" << version; DelPredicateArray::const_iterator it = delete_conditions.begin(); - for (; it != delete_conditions.end(); ++it) { // 跳过版本号大于version的过滤条件 if (it->version() > version) { @@ -250,7 +240,6 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema, DeleteConditions temp; temp.filter_version = it->version(); - temp.del_cond = new(std::nothrow) Conditions(); if (temp.del_cond == nullptr) { diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index 7cab577..b03227a 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -51,27 +51,13 @@ public: std::string construct_sub_predicates(const TCondition& condition); private: - -int32_t _get_field_index(const TabletSchema& schema, const std::string& field_name) const { -for (int i = 0; i < schema.num_columns(); i++) { -if (schema.column(i).name() == field_name) { -return i; -} -} -LOG(WARNING) << "invalid field name. name='" << field_name; -return -1; -} - bool is_condition_value_valid(const TabletColumn& column, const TCondition& cond, const string& value); }; // 表示一个删除条件 struct DeleteConditions { -DeleteConditions() : filter_version(0), del_cond(NULL) {} -~DeleteConditions() {} - -int32_t filter_version; // 删除条件版本号 -Conditions* del_cond; // 删除条件 +int32_t filter_version = 0; // 删除条件版本号 +Conditions* del_cond = nullptr; // 删除条件 }; // 这个类主要用于判定一条数据(RowCursor)是否符合删除条件。这个类的使用流程如下: diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp index 5e37e76..74c603f 100644 --- a/be/src/olap/olap_cond.cpp +++ b/be/src/olap/olap_cond.cpp @@ -579,10 +579,10 @@ bool CondColumn::eval(const segment_v2::BloomFilter* bf) const { } OLAPStatus Conditions::append_condition(const TCondition& tcond) { -int32_t index = _get_field_index(tcond.column_name); +DCHECK(_schema != nullptr); +int32_t index = _schema->field_index(tcond.column_name); if (index < 0) { -LOG(WARNING) << "fail to get field index, name is invalid. index=" << index - << ", field_name=" << tcond.column_name; +LOG(WARNING) << "fail to
[incubator-doris] branch master updated: [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer (#4771)
This is an automated email from the ASF dual-hosted git repository. kangkaisen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git The following commit(s) were added to refs/heads/master by this push: new 80d5f6e [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer (#4771) 80d5f6e is described below commit 80d5f6e3d86364e81c0a5c2ffbc4549aacbd9d1d Author: HuangWei AuthorDate: Tue Nov 3 20:23:48 2020 +0800 [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer (#4771) --- .../{LoadBalancer.java => BeLoadRebalancer.java} | 61 +- .../java/org/apache/doris/clone/Rebalancer.java| 96 ++ .../org/apache/doris/clone/TabletScheduler.java| 79 +++--- 3 files changed, 165 insertions(+), 71 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java similarity index 88% rename from fe/fe-core/src/main/java/org/apache/doris/clone/LoadBalancer.java rename to fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index f764f56..9e26978 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -27,7 +27,6 @@ import org.apache.doris.clone.TabletSchedCtx.Priority; import org.apache.doris.clone.TabletScheduler.PathSlot; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; -import org.apache.doris.task.AgentBatchTask; import org.apache.doris.thrift.TStorageMedium; import com.google.common.collect.Lists; @@ -43,32 +42,16 @@ import java.util.Map; import java.util.Set; /* - * LoadBalancer is responsible for + * BeLoadRebalancer strategy: * 1. selecting alternative tablets from high load backends, and return them to tablet scheduler. * 2. given a tablet, find a backend to migration. + * 3. deleting the redundant replica in high load, so don't override getCachedSrcBackendId(). */ -public class LoadBalancer { -private static final Logger LOG = LogManager.getLogger(LoadBalancer.class); +public class BeLoadRebalancer extends Rebalancer { +private static final Logger LOG = LogManager.getLogger(BeLoadRebalancer.class); -private Map statisticMap; -private TabletInvertedIndex invertedIndex; -private SystemInfoService infoService; - -public LoadBalancer(Map statisticMap) { -this.statisticMap = statisticMap; -this.invertedIndex = Catalog.getCurrentInvertedIndex(); -this.infoService = Catalog.getCurrentSystemInfo(); -} - -public List selectAlternativeTablets() { -List alternativeTablets = Lists.newArrayList(); -for (Map.Entry entry : statisticMap.entrySet()) { -for (TStorageMedium medium : TStorageMedium.values()) { - alternativeTablets.addAll(selectAlternativeTabletsForCluster(entry.getKey(), -entry.getValue(), medium)); -} -} -return alternativeTablets; +public BeLoadRebalancer(SystemInfoService infoService, TabletInvertedIndex invertedIndex) { +super(infoService, invertedIndex); } /* @@ -79,14 +62,15 @@ public class LoadBalancer { * and whether it is benefit for balance (All these will be checked in tablet scheduler) * 2. Only select tablets from 'high' backends. * 3. Only select tablets from 'high' and 'mid' paths. - * + * * Here we only select tablets from high load node, do not set its src or dest, all this will be set * when this tablet is being scheduled in tablet scheduler. - * + * * NOTICE that we may select any available tablets here, ignore their state. * The state will be checked when being scheduled in tablet scheduler. */ -private List selectAlternativeTabletsForCluster( +@Override +protected List selectAlternativeTabletsForCluster( String clusterName, ClusterLoadStatistic clusterStat, TStorageMedium medium) { List alternativeTablets = Lists.newArrayList(); @@ -95,7 +79,7 @@ public class LoadBalancer { List midBEs = Lists.newArrayList(); List highBEs = Lists.newArrayList(); clusterStat.getBackendStatisticByClass(lowBEs, midBEs, highBEs, medium); - + if (lowBEs.isEmpty() && highBEs.isEmpty()) { LOG.info("cluster is balance: {} with medium: {}. skip", clusterName, medium); return alternativeTablets; @@ -108,7 +92,7 @@ public class LoadBalancer { lowBEs.stream().mapToLong(BackendLoadStatistic::getBeId).toArray(), medium); return alternativeTablets; } - + if (lowBEs.stream().noneMatch(BackendLoadStatistic::hasAvailDisk)) { LOG.info("all low load backends {} ha
[GitHub] [incubator-doris] Karl-WangSK opened a new issue #4838: How to change line break when stream load
Karl-WangSK opened a new issue #4838: URL: https://github.com/apache/incubator-doris/issues/4838 The default line break is \n. if my data contains \n . it will separate as 2 records. and end up columns number doesn't match error . This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] kangkaisen commented on a change in pull request #3025: Restructure storage type to support complex types expending
kangkaisen commented on a change in pull request #3025: URL: https://github.com/apache/incubator-doris/pull/3025#discussion_r516385798 ## File path: be/src/olap/collection.h ## @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace doris { + +// cpp type for ARRAY +struct Collection { +size_t length; +// null bitmap +bool* null_signs; Review comment: You could add a has_null flag. when reading array column, if has_null is false, we could only read the data column. ## File path: gensrc/proto/segment_v2.proto ## @@ -151,6 +151,12 @@ message ColumnMetaPB { repeated ColumnIndexMetaPB indexes = 8; // pointer to dictionary page when using DICT_ENCODING optional PagePointerPB dict_page = 9; + +repeated ColumnMetaPB children_columns = 10; +repeated string children_column_names = 11; Review comment: OK This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new issue #4828: [Bug] Compaction cause BE crash
morningman opened a new issue #4828: URL: https://github.com/apache/incubator-doris/issues/4828 **Describe the bug**  This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] imay commented on issue #4838: How to change line break when stream load
imay commented on issue #4838: URL: https://github.com/apache/incubator-doris/issues/4838#issuecomment-720992443 Not supported now This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] ccoffline opened a new issue #4833: [Bug] Unexpected exception: org.apache.doris.analysis.CastExpr cannot be cast to org.apache.doris.analysis.LiteralExpr
ccoffline opened a new issue #4833: URL: https://github.com/apache/incubator-doris/issues/4833 **Describe the bug** ``` ERROR 1064 (HY000): errCode = 2, detailMessage = Unexpected exception: org.apache.doris.analysis.CastExpr cannot be cast to org.apache.doris.analysis.LiteralExpr ``` **To Reproduce** ```sql CREATE DATABASE IF NOT EXISTS `test`; USE `test`; CREATE TABLE `test_t` ( `dt` bigint(20) NULL, `id` bigint(20) NULL, `name` varchar(100) NULL ) ENGINE=OLAP DUPLICATE KEY(`dt`, `id`, `name`) PARTITION BY RANGE(`dt`) ( PARTITION p20201001 VALUES [("19700101"), ("20201001")), PARTITION p20201002 VALUES [("20201001"), ("20201002")), PARTITION p20201003 VALUES [("20201002"), ("20201003")), PARTITION p20201004 VALUES [("20201003"), ("20201004")), PARTITION p20201005 VALUES [("20201004"), ("20201005")), PARTITION p20201006 VALUES [("20201005"), ("20201006")), PARTITION p20201007 VALUES [("20201006"), ("20201007")), PARTITION p20201008 VALUES [("20201007"), ("20201008")), PARTITION p20201009 VALUES [("20201008"), ("20201009")), PARTITION p20201010 VALUES [("20201009"), ("20201010")), PARTITION p20201011 VALUES [("20201010"), ("20201011")), PARTITION p20201012 VALUES [("20201011"), ("20201012")), PARTITION p20201013 VALUES [("20201012"), ("20201013")), PARTITION p20201014 VALUES [("20201013"), ("20201014")), PARTITION p20201015 VALUES [("20201014"), ("20201015")), PARTITION p20201016 VALUES [("20201015"), ("20201016")), PARTITION p20201017 VALUES [("20201016"), ("20201017")), PARTITION p20201018 VALUES [("20201017"), ("20201018")), PARTITION p20201019 VALUES [("20201018"), ("20201019")), PARTITION p20201020 VALUES [("20201019"), ("20201020")), PARTITION p20201021 VALUES [("20201020"), ("20201021")), PARTITION p20201022 VALUES [("20201021"), ("20201022")), PARTITION p20201023 VALUES [("20201022"), ("20201023")), PARTITION p20201024 VALUES [("20201023"), ("20201024")), PARTITION p20201025 VALUES [("20201024"), ("20201025")), PARTITION p20201026 VALUES [("20201025"), ("20201026")), PARTITION p20201027 VALUES [("20201026"), ("20201027")), PARTITION p20201028 VALUES [("20201027"), ("20201028")), PARTITION p20201029 VALUES [("20201028"), ("20201029")), PARTITION p20201030 VALUES [("20201029"), ("20201030")), PARTITION p20201031 VALUES [("20201030"), ("20201031"))) DISTRIBUTED BY HASH(`id`) BUCKETS 5 PROPERTIES ( "replication_num" = "1", "in_memory" = "false", "storage_format" = "DEFAULT" ); SELECT * FROM `test`.`test_t` WHERE `dt` >= 20200801 and `dt` < 20201101 AND `id` = 1 AND `name` = 'x' AND substr(`dt`, 7, 8) in ('09') ; ``` **Expected behavior** Here `key.castTo(destType)` returns `CastExpr` https://github.com/apache/incubator-doris/blob/b1c1ffda4a0d5ff2f0be4f9d2f25997a31537ea8/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java#L135-L150 Maybe should call `getResultValue()` Does anyone have other advice? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] weizuo93 opened a new issue #4834: [Proposal] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
weizuo93 opened a new issue #4834: URL: https://github.com/apache/incubator-doris/issues/4834 A large number of small segment files will lead to low efficiency for scan operations. Multiple small files can be merged into a large file by compaction operation. So we could take the tablet scan frequency into consideration when selecting an tablet for compaction and preferentially do compaction for those tablets which are scanned frequently during a latest period of time at the present. Using the compaction strategy of `Kudu`for reference, `scan frequency` can be calculated for tablet during a latest period of time at the present and be taken into consideration when calculating compaction score. New compaction score can be calculated like this: `new_compaction_score = k1 * tablet_scan_frequency + k2 * old_compaction_score ` `k1`and`k2`can be set dynamically through http interface `/api/update_config`. We can add a metric `query_scan_count` for each tablet which records the scan count of the tablet. Thus, tablet scan frequency can be calculated like this: `tablet_scan_frequency = (now_query_scan_count - last_query_scan_count) / (now_time - last_time)` `last_query_scan_count` will be updated every time an `interval` passes and `interval`can be config (such as `300` second). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] ccoffline opened a new pull request #4836: [Bug] Fix ClassCastException (#4833)
ccoffline opened a new pull request #4836: URL: https://github.com/apache/incubator-doris/pull/4836 ## Proposed changes fix to #4833 ## Types of changes - [x] Bugfix (non-breaking change which fixes an issue) - [] New feature (non-breaking change which adds functionality) - [] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [] Documentation Update (if none of the other choices apply) - [] Code refactor (Modify the code structure, format the code, etc...) ## Checklist - [x] I have create an issue on (Fix #ISSUE), and have described the bug/feature there in detail - [x] Compiling and unit tests pass locally with my changes - [] I have added tests that prove my fix is effective or that my feature works - [] If this change need a document change, I have updated the document - [x] Any dependent changes have been merged ## Further comments This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] EmmyMiao87 commented on issue #4795: [BUG] retry submit exception is not catched
EmmyMiao87 commented on issue #4795: URL: https://github.com/apache/incubator-doris/issues/4795#issuecomment-721088608 Another problem is that I think loading should not have so much concurrency control. In fact, db itself already has a control of the number of transactions. So the number of concurrent tasks here is best to be twice the number of transactions. Also, if the submitted task is abnormal, I think the load job can be cancelled directly. The reason is that after the system is currently loading the stack, it is generally difficult to recover quickly, so you are not sure how long you will have to wait to retry successfully. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] xinyiZzz edited a comment on pull request #4825: [Feature] Running Profile OLAP_SCAN_NODE layering and enhance readability
xinyiZzz edited a comment on pull request #4825: URL: https://github.com/apache/incubator-doris/pull/4825#issuecomment-720394209 @HappenLee Good question, I have moved `_segment_profile` and `scanner_profile` to `OlapScanNode` and modified the document. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] kangkaisen closed issue #4763: [Proposal] refactor the LoadBalancer to support more rebalance strategies
kangkaisen closed issue #4763: URL: https://github.com/apache/incubator-doris/issues/4763 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] HappenLee commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
HappenLee commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-720952500 good idea~ This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman commented on a change in pull request #4837: [Optimize] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
morningman commented on a change in pull request #4837: URL: https://github.com/apache/incubator-doris/pull/4837#discussion_r516629748 ## File path: be/src/olap/tablet.h ## @@ -301,6 +303,10 @@ class Tablet : public BaseTablet { // cumulative compaction policy std::unique_ptr _cumulative_compaction_policy; std::string _cumulative_compaction_type; + +int64_t _last_update_scan_count; Review comment: Add comment for the new fields This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] kangkaisen commented on a change in pull request #4771: [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer
kangkaisen commented on a change in pull request #4771: URL: https://github.com/apache/incubator-doris/pull/4771#discussion_r515754212 ## File path: fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java ## @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.TabletScheduler.PathSlot; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.thrift.TStorageMedium; + +import com.google.common.collect.Lists; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/* + * Rebalancer is responsible for + * 1. selectAlternativeTablets: selecting alternative tablets by one rebalance strategy, + * and return them to tablet scheduler(maybe contains the concrete moves, or maybe not). + * 2. createBalanceTask: given a tablet, try to create a clone task for this tablet. + * 3. getToDeleteReplicaId: if the rebalance strategy wants to delete the specified replica, + * override this func to let TabletScheduler know in handling redundant replica. + * NOTICE: + * 1. Adding the selected tablets by TabletScheduler may not succeed at all. And the move may be failed in some other places. + * So the thing you need to know is, Rebalancer cannot know when the move is failed. + * 2. If you want to make sure the move is succeed, you can assume that it's succeed when getToDeleteReplicaId called. + */ +public abstract class Rebalancer { +// When Rebalancer init, the statisticMap is usually empty. So it's no need to be an arg. +// Only use updateLoadStatistic() to load stats. +protected Map statisticMap = new HashMap<>(); +protected TabletInvertedIndex invertedIndex; +protected SystemInfoService infoService; + +public Rebalancer(SystemInfoService infoService, TabletInvertedIndex invertedIndex) { +this.infoService = infoService; +this.invertedIndex = invertedIndex; +} + +public List selectAlternativeTablets() { +List alternativeTablets = Lists.newArrayList(); +for (Map.Entry entry : statisticMap.entrySet()) { +for (TStorageMedium medium : TStorageMedium.values()) { + alternativeTablets.addAll(selectAlternativeTabletsForCluster(entry.getKey(), +entry.getValue(), medium)); +} +} +return alternativeTablets; +} + +// The return TabletSchedCtx should have the tablet id at least. {srcReplica, destBe} can be complete here or +// later(when createBalanceTask called). +protected abstract List selectAlternativeTabletsForCluster( +String clusterName, ClusterLoadStatistic clusterStat, TStorageMedium medium); + + +public void createBalanceTask(TabletSchedCtx tabletCtx, Map backendsWorkingSlots, + AgentBatchTask batchTask) throws SchedException { +completePlan(tabletCtx, backendsWorkingSlots); +batchTask.addTask(tabletCtx.createCloneReplicaAndTask()); +} + +// Before createCloneReplicaAndTask, we need to complete the TabletSchedCtx. +// 1. If you generate {tabletId, srcReplica, destBe} in selectAlternativeTablets(), it may be invalid at +// this point(it may have a long interval between selectAlternativeTablets & createBalanceTask). +// You should check the moves' validation. +// 2. If you want to generate {srcReplica, destBe} here, just do it. +// 3. You should check the path slots of src & dest. +protected abstract void completePlan(TabletSchedCtx tabletCtx, Map backendsWorkingSlots) Review comment: In the whole FE balance module, we don't use `Plan`. So I think we could rename it to `completeSchedCtx`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org ---
[GitHub] [incubator-doris] imay commented on issue #4832: ErrorReason{code=errCode = 104, msg='be 10003 abort task with reason: Broker: Offset out of range'}
imay commented on issue #4832: URL: https://github.com/apache/incubator-doris/issues/4832#issuecomment-720845365 是不是Kafka中已经没有这么多的数据了? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] demon-gu opened a new issue #4832: ErrorReason{code=errCode = 104, msg='be 10003 abort task with reason: Broker: Offset out of range'}
demon-gu opened a new issue #4832: URL: https://github.com/apache/incubator-doris/issues/4832 从Kafka中拉取数据到Doris出现这个问题ErrorReason{code=errCode = 104, msg='be 10003 abort task with reason: Broker: Offset out of range'} ,建表语句定义的offset是"kafka_offsets"="0,0,0,0,0,0,0,0,0",请问这是什么原因呀? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] vagetablechicken commented on a change in pull request #4771: [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer
vagetablechicken commented on a change in pull request #4771: URL: https://github.com/apache/incubator-doris/pull/4771#discussion_r515766101 ## File path: fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java ## @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.clone.TabletScheduler.PathSlot; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.task.AgentBatchTask; +import org.apache.doris.thrift.TStorageMedium; + +import com.google.common.collect.Lists; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/* + * Rebalancer is responsible for + * 1. selectAlternativeTablets: selecting alternative tablets by one rebalance strategy, + * and return them to tablet scheduler(maybe contains the concrete moves, or maybe not). + * 2. createBalanceTask: given a tablet, try to create a clone task for this tablet. + * 3. getToDeleteReplicaId: if the rebalance strategy wants to delete the specified replica, + * override this func to let TabletScheduler know in handling redundant replica. + * NOTICE: + * 1. Adding the selected tablets by TabletScheduler may not succeed at all. And the move may be failed in some other places. + * So the thing you need to know is, Rebalancer cannot know when the move is failed. + * 2. If you want to make sure the move is succeed, you can assume that it's succeed when getToDeleteReplicaId called. + */ +public abstract class Rebalancer { +// When Rebalancer init, the statisticMap is usually empty. So it's no need to be an arg. +// Only use updateLoadStatistic() to load stats. +protected Map statisticMap = new HashMap<>(); +protected TabletInvertedIndex invertedIndex; +protected SystemInfoService infoService; + +public Rebalancer(SystemInfoService infoService, TabletInvertedIndex invertedIndex) { +this.infoService = infoService; +this.invertedIndex = invertedIndex; +} + +public List selectAlternativeTablets() { +List alternativeTablets = Lists.newArrayList(); +for (Map.Entry entry : statisticMap.entrySet()) { +for (TStorageMedium medium : TStorageMedium.values()) { + alternativeTablets.addAll(selectAlternativeTabletsForCluster(entry.getKey(), +entry.getValue(), medium)); +} +} +return alternativeTablets; +} + +// The return TabletSchedCtx should have the tablet id at least. {srcReplica, destBe} can be complete here or +// later(when createBalanceTask called). +protected abstract List selectAlternativeTabletsForCluster( +String clusterName, ClusterLoadStatistic clusterStat, TStorageMedium medium); + + +public void createBalanceTask(TabletSchedCtx tabletCtx, Map backendsWorkingSlots, + AgentBatchTask batchTask) throws SchedException { +completePlan(tabletCtx, backendsWorkingSlots); +batchTask.addTask(tabletCtx.createCloneReplicaAndTask()); +} + +// Before createCloneReplicaAndTask, we need to complete the TabletSchedCtx. +// 1. If you generate {tabletId, srcReplica, destBe} in selectAlternativeTablets(), it may be invalid at +// this point(it may have a long interval between selectAlternativeTablets & createBalanceTask). +// You should check the moves' validation. +// 2. If you want to generate {srcReplica, destBe} here, just do it. +// 3. You should check the path slots of src & dest. +protected abstract void completePlan(TabletSchedCtx tabletCtx, Map backendsWorkingSlots) Review comment: 👌 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional
[GitHub] [incubator-doris] px-l opened a new pull request #4830: [FE UI] Fix some bugs about new FE UI
px-l opened a new pull request #4830: URL: https://github.com/apache/incubator-doris/pull/4830 1. Add a search boxer in the left tree view of Playground. 2. Fix some visual bugs of UI. 3. Fix bugs that link failed in QueryProfile view. 4. Fix bugs that cookie is always invalid. 5. Set cookie to HTTP_ONLY to make it more safe. ## Proposed changes Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue. ## Types of changes What types of changes does your code introduce to Doris? _Put an `x` in the boxes that apply_ - [] Bugfix (non-breaking change which fixes an issue) - [] New feature (non-breaking change which adds functionality) - [] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [] Documentation Update (if none of the other choices apply) - [] Code refactor (Modify the code structure, format the code, etc...) ## Checklist _Put an `x` in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code._ - [] I have create an issue on (Fix #ISSUE), and have described the bug/feature there in detail - [] Compiling and unit tests pass locally with my changes - [] I have added tests that prove my fix is effective or that my feature works - [] If this change need a document change, I have updated the document - [] Any dependent changes have been merged ## Further comments If this is a relatively large or complex change, kick off the discussion at d...@doris.apache.org by explaining why you chose the solution you did and what alternatives you considered, etc... This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] EmmyMiao87 opened a new pull request #4831: Add other license declare in NOTICE
EmmyMiao87 opened a new pull request #4831: URL: https://github.com/apache/incubator-doris/pull/4831 ## Proposed changes The copyright of some files is not apache, these files will be declared in the NOTICE file Some files that forgot to bring the apache copyright will also be added. ## Types of changes What types of changes does your code introduce to Doris? _Put an `x` in the boxes that apply_ - [] Bugfix (non-breaking change which fixes an issue) - [] New feature (non-breaking change which adds functionality) - [] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [x] Documentation Update (if none of the other choices apply) - [] Code refactor (Modify the code structure, format the code, etc...) ## Checklist _Put an `x` in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code._ - [] I have create an issue on (Fix #ISSUE), and have described the bug/feature there in detail - [] Compiling and unit tests pass locally with my changes - [] I have added tests that prove my fix is effective or that my feature works - [] If this change need a document change, I have updated the document - [] Any dependent changes have been merged ## Further comments If this is a relatively large or complex change, kick off the discussion at d...@doris.apache.org by explaining why you chose the solution you did and what alternatives you considered, etc... This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new pull request #4840: [Bug] Fix bug that failed to create view with complex select stmt
morningman opened a new pull request #4840: URL: https://github.com/apache/incubator-doris/pull/4840 ## Proposed changes Fix bug that failed to create view with complex select stmt ## Types of changes - [x] Bugfix (non-breaking change which fixes an issue) ## Checklist - [] I have create an issue on (Fix #4839), and have described the bug/feature there in detail This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] kangkaisen merged pull request #4771: [LoadBalance] make BeLoadRebalancer extends from base class Rebalancer
kangkaisen merged pull request #4771: URL: https://github.com/apache/incubator-doris/pull/4771 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] HappenLee commented on a change in pull request #4825: [Feature] Running Profile OLAP_SCAN_NODE layering and enhance readability
HappenLee commented on a change in pull request #4825: URL: https://github.com/apache/incubator-doris/pull/4825#discussion_r515754966 ## File path: be/src/exec/scan_node.h ## @@ -96,49 +96,29 @@ class ScanNode : public ExecNode { RuntimeProfile::Counter* rows_read_counter() const { return _rows_read_counter; } -RuntimeProfile::Counter* read_timer() const { -return _read_timer; -} -RuntimeProfile::Counter* total_throughput_counter() const { -return _total_throughput_counter; -} -RuntimeProfile::Counter* per_read_thread_throughput_counter() const { -return _per_read_thread_throughput_counter; -} RuntimeProfile::Counter* materialize_tuple_timer() const { return _materialize_tuple_timer; } -RuntimeProfile::ThreadCounters* scanner_thread_counters() const { -return _scanner_thread_counters; -} +// OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator according to the calling relationship +void init_scan_profile(); // names of ScanNode common counters static const std::string _s_bytes_read_counter; static const std::string _s_rows_read_counter; -static const std::string _s_total_read_timer; -static const std::string _s_total_throughput_counter; -static const std::string _s_per_read_thread_throughput_counter; static const std::string _s_num_disks_accessed_counter; static const std::string _s_materialize_tuple_timer; static const std::string _s_scanner_thread_counters_prefix; static const std::string _s_scanner_thread_total_wallclock_time; -static const std::string _s_average_io_mgr_queue_capacity; -static const std::string _s_num_scanner_threads_started; protected: RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner // # rows/tuples read from the scanner (including those discarded by eval_conjucts()) RuntimeProfile::Counter* _rows_read_counter; -RuntimeProfile::Counter* _read_timer; // total read time -// Wall based aggregate read throughput [bytes/sec] -RuntimeProfile::Counter* _total_throughput_counter; -// Per thread read throughput [bytes/sec] -RuntimeProfile::Counter* _per_read_thread_throughput_counter; RuntimeProfile::Counter* _num_disks_accessed_counter; RuntimeProfile::Counter* _materialize_tuple_timer; // time writing tuple slots -// Aggregated scanner thread counters -RuntimeProfile::ThreadCounters* _scanner_thread_counters; -RuntimeProfile::Counter* _num_scanner_threads_started_counter; + +boost::scoped_ptr _scanner_profile; +boost::scoped_ptr _segment_profile; Review comment: use `std::unique_ptr` replace `boost::scoped_ptr` ## File path: be/src/exec/scan_node.cpp ## @@ -23,45 +23,37 @@ namespace doris { const string ScanNode::_s_bytes_read_counter = "BytesRead"; const string ScanNode::_s_rows_read_counter = "RowsRead"; -const string ScanNode::_s_total_read_timer = "TotalRawReadTime(*)"; -const string ScanNode::_s_total_throughput_counter = "TotalReadThroughput"; const string ScanNode::_s_materialize_tuple_timer = "MaterializeTupleTime(*)"; -const string ScanNode::_s_per_read_thread_throughput_counter = -"PerReadThreadRawHdfsThroughput"; const string ScanNode::_s_num_disks_accessed_counter = "NumDiskAccess"; const string ScanNode::_s_scanner_thread_counters_prefix = "ScannerThreads"; const string ScanNode::_s_scanner_thread_total_wallclock_time = "ScannerThreadsTotalWallClockTime"; -const string ScanNode::_s_num_scanner_threads_started ="NumScannerThreadsStarted"; - Status ScanNode::prepare(RuntimeState* state) { +init_scan_profile(); RETURN_IF_ERROR(ExecNode::prepare(state)); -_scanner_thread_counters = -ADD_THREAD_COUNTERS(runtime_profile(), _s_scanner_thread_counters_prefix); _bytes_read_counter = -ADD_COUNTER(runtime_profile(), _s_bytes_read_counter, TUnit::BYTES); +ADD_COUNTER(_segment_profile, _s_bytes_read_counter, TUnit::BYTES); //TODO: The _rows_read_counter == RowsReturned counter in exec node, there is no need to keep both of them _rows_read_counter = -ADD_COUNTER(runtime_profile(), _s_rows_read_counter, TUnit::UNIT); -_read_timer = ADD_TIMER(runtime_profile(), _s_total_read_timer); +ADD_COUNTER(_scanner_profile, _s_rows_read_counter, TUnit::UNIT); #ifndef BE_TEST -_total_throughput_counter = runtime_profile()->add_rate_counter( -_s_total_throughput_counter, _bytes_read_counter); #endif _materialize_tuple_timer = ADD_CHILD_TIMER(runtime_profile(), _s_materialize_tuple_timer, _s_scanner_thread_total_wallclock_time); -_per_read_thread_throughput_counter = runtime_profile()->add_derived_counter( -_s_per_read_thread_throughput_counter, TUnit::BYTES_PER_SECOND, -
[GitHub] [incubator-doris] EmmyMiao87 removed a comment on issue #4795: [BUG] retry submit exception is not catched
EmmyMiao87 removed a comment on issue #4795: URL: https://github.com/apache/incubator-doris/issues/4795#issuecomment-721088608 Another problem is that I think loading should not have so much concurrency control. In fact, db itself already has a control of the number of transactions. So the number of concurrent tasks here is best to be twice the number of transactions. Also, if the submitted task is abnormal, I think the load job can be cancelled directly. The reason is that after the system is currently loading the stack, it is generally difficult to recover quickly, so you are not sure how long you will have to wait to retry successfully. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new issue #4839: [Bug] Create view failed
morningman opened a new issue #4839: URL: https://github.com/apache/incubator-doris/issues/4839 **Describe the bug** Create view failed with error: "failed to init view stmt" **To Reproduce** create a view with some complex select stmt such as union and join and group by. **Why** This is because the `toSql()` method of SlotRef will return like " col as alias", if that SlotRef is analyzed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman commented on pull request #4781: [Optimize] Improve LRU cache's performance
morningman commented on pull request #4781: URL: https://github.com/apache/incubator-doris/pull/4781#issuecomment-721076428 Could you rebase the master code to see if unit test can run normally? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] acelyc111 commented on pull request #4781: [Optimize] Improve LRU cache's performance
acelyc111 commented on pull request #4781: URL: https://github.com/apache/incubator-doris/pull/4781#issuecomment-721092805 > Could you rebase the master code to see if unit test can run normally? Done This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] yangzhg opened a new issue #4835: [Proposal] Disable v1 storage format when creating a new table
yangzhg opened a new issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835 At present, our V2 storage format has been developed for a long time, and V2 has many advantages that V1 does not have. The coexistence of V1 and V2 brings a lot of code and cluster maintenance costs. Therefore, I think the next version, that is, version 0.14 will disable the use of V1 storage format for newly created tables. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] weizuo93 opened a new pull request #4837: [Optimize] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
weizuo93 opened a new pull request #4837: URL: https://github.com/apache/incubator-doris/pull/4837 ## Proposed changes A large number of small segment files will lead to low efficiency for scan operations. Multiple small files can be merged into a large file by compaction operation. So we could take the tablet scan frequency into consideration when selecting an tablet for compaction and preferentially do compaction for those tablets which are scanned frequently during a latest period of time at the present. Using the compaction strategy of `Kudu`for reference, `scan frequency` can be calculated for tablet during a latest period of time and be taken into consideration when calculating compaction score. ## Types of changes What types of changes does your code introduce to Doris? _Put an `x` in the boxes that apply_ - [] Bugfix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [] Documentation Update (if none of the other choices apply) - [] Code refactor (Modify the code structure, format the code, etc...) ## Checklist _Put an `x` in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code._ - [x] I have create an issue on (Fix #4834), and have described the bug/feature there in detail - [x] Compiling and unit tests pass locally with my changes - [] I have added tests that prove my fix is effective or that my feature works - [x] If this change need a document change, I have updated the document - [x] Any dependent changes have been merged This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] hf200012 commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
hf200012 commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-720955226 good idea This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman merged pull request #4779: [Optimize] Add an unordered_map for TabletSchema to speed up column name lookup
morningman merged pull request #4779: URL: https://github.com/apache/incubator-doris/pull/4779 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] xinyiZzz commented on pull request #4825: [Feature] Running Profile OLAP_SCAN_NODE layering and enhance readability
xinyiZzz commented on pull request #4825: URL: https://github.com/apache/incubator-doris/pull/4825#issuecomment-720394209 @HappenLee Good question, I have moved `_segment_profile` and `scanner_profile` to ʻOlapScanNode` and modified the document. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new pull request #4829: [Compaction][Buf] Fix bug that meta lock need to be held when calculating compaction score
morningman opened a new pull request #4829: URL: https://github.com/apache/incubator-doris/pull/4829 ## Proposed changes We need to hold the tablet's meta lock when calculating the compaction score ## Types of changes - [x] Bugfix (non-breaking change which fixes an issue) ## Checklist - [x] I have create an issue on (Fix #4828), and have described the bug/feature there in detail This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] acelyc111 commented on issue #4499: BE coredump in StorageEngine::_unused_rowset_monitor_thread_callback
acelyc111 commented on issue #4499: URL: https://github.com/apache/incubator-doris/issues/4499#issuecomment-721071655 A similar coredump: ``` (gdb) f 0 #0 0x0133d395 in Delete (arena=0x0, value=) at /var/local/thirdparty/installed/include/google/protobuf/repeated_field.h:623 623 in /var/local/thirdparty/installed/include/google/protobuf/repeated_field.h (gdb) i locals arena = 0x0 value = 0xf0002 (gdb) p value $1 = (doris::ColumnEncodingMessage *) 0xf0002 (gdb) p *value $2 = { = {}, members of doris::ColumnEncodingMessage: static kIndexInFileMessages = 1, static DIRECT = doris::ColumnEncodingMessage_Kind_DIRECT, static DICTIONARY = doris::ColumnEncodingMessage_Kind_DICTIONARY, static Kind_MIN = doris::ColumnEncodingMessage_Kind_DIRECT, static Kind_MAX = doris::ColumnEncodingMessage_Kind_DICTIONARY, static Kind_ARRAYSIZE = 2, static kKindFieldNumber = 1, static kDictionarySizeFieldNumber = 2, _internal_metadata_ = { > = { ptr_ = 0x0, static kPtrTagMask = 1, static kPtrValueMask = -2 }, }, _has_bits_ = { has_bits_ = {0} }, _cached_size_ = 0, kind_ = 0, dictionary_size_ = 0 } ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] xinghuayu007 closed pull request #4809: 【BUG】Catch runtime exception
xinghuayu007 closed pull request #4809: URL: https://github.com/apache/incubator-doris/pull/4809 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] liutang123 commented on a change in pull request #3025: Restructure storage type to support complex types expending
liutang123 commented on a change in pull request #3025: URL: https://github.com/apache/incubator-doris/pull/3025#discussion_r516041649 ## File path: be/src/olap/collection.h ## @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +namespace doris { + +// cpp type for ARRAY +struct Collection { +size_t length; +// null bitmap +bool* null_signs; Review comment: Now, all array is nullable aray. Should we design an non-nullable array? ## File path: gensrc/proto/segment_v2.proto ## @@ -151,6 +151,12 @@ message ColumnMetaPB { repeated ColumnIndexMetaPB indexes = 8; // pointer to dictionary page when using DICT_ENCODING optional PagePointerPB dict_page = 9; + +repeated ColumnMetaPB children_columns = 10; +repeated string children_column_names = 11; Review comment: `children_column_names` can be remove. `num_rows ` is used in `OrdinalIndexReader` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] yangzhg closed issue #4817: [Bug] Sequence column is not visible when show_hidden_columns = true
yangzhg closed issue #4817: URL: https://github.com/apache/incubator-doris/issues/4817 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] zhaojintaozhao commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
zhaojintaozhao commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-721121745 Currently, many companies are using the v1 storage format. The v2 format is not stable yet. The new Doris version must be compatible with earlier versions. Therefore, I do not recommend disabling v1 storage format. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] zhaojintaozhao edited a comment on issue #4835: [Proposal] Disable v1 storage format when creating a new table
zhaojintaozhao edited a comment on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-721121745 Currently, there are many companies also using the v1 storage format. The v2 format is not stable yet. The new Doris version must be compatible with earlier versions. Therefore, I do not recommend disabling v1 storage format. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new issue #4841: [Bug][SQL] query failed with Union and Colocate Join
morningman opened a new issue #4841: URL: https://github.com/apache/incubator-doris/issues/4841 **Describe the bug** Query failed with error like: `failed to get tablet. tablet_id=26846, with schema_hash=398972982, reason=tablet does not exist` SQL looks like: `select a join b union select c join d`; And both a/b and c/d are colocate join. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman opened a new pull request #4842: [Bug][SQL] Fix bug that query failed when SQL contains Union and Colocate join
morningman opened a new pull request #4842: URL: https://github.com/apache/incubator-doris/pull/4842 ## Proposed changes SQL like: `select a join b union select c join d`; if both a/b and c/d are colocate join, the query may failed with error like: `failed to get tablet. tablet_id=26846, with schema_hash=398972982, reason=tablet does not exist` This is because when assigning exec host for fragment, the planner failed to distinguish which scan nodes belong to the certain fragment. So it may choose the wrong host for scan range. ## Types of changes - [x] Bugfix (non-breaking change which fixes an issue) ## Checklist - [] I have create an issue on (Fix #4841), and have described the bug/feature there in detail This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] blueChild commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
blueChild commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-721461833 Would it be better if we should allow users to choose which version to use instead of disabling it? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] zhaojintaozhao commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
zhaojintaozhao commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-721465184 The right to choose the storage format v1 or v2 should be delegated to the user. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] morningman commented on issue #4835: [Proposal] Disable v1 storage format when creating a new table
morningman commented on issue #4835: URL: https://github.com/apache/incubator-doris/issues/4835#issuecomment-721467250 Hi @zhaojintaozhao @blueChild SegmentV2 is implemented since v0.12, and has been online for a long time. In v0.13, the default storage format has already been changed to SegmentV2, so that all **Newly Created** table will use SegmentV2 as default. And all existed table has no effect. What we discuss here is to disable V1 in v0.14, which is reasonable to me because you can think of 0.13 as a transitional version. In addition, it is recommended to discuss this issue in the dev mail list(d...@doris.apache.org) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] HappenLee commented on a change in pull request #4842: [Bug][SQL] Fix bug that query failed when SQL contains Union and Colocate join
HappenLee commented on a change in pull request #4842: URL: https://github.com/apache/incubator-doris/pull/4842#discussion_r517127793 ## File path: fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java ## @@ -1140,17 +1141,26 @@ private long getScanRangeLength(final TScanRange scanRange) { private void computeColocateJoinInstanceParam(PlanFragmentId fragmentId, int parallelExecInstanceNum, FragmentExecParams params) { Map bucketSeqToAddress = fragmentIdToSeqToAddressMap.get(fragmentId); Review comment: Maybe `bucketSeqToScanRang` should be a map like `fragmentIdToSeqToAddressMap` to solve this problem is a better understanding way? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] demon-gu commented on issue #4832: ErrorReason{code=errCode = 104, msg='be 10003 abort task with reason: Broker: Offset out of range'}
demon-gu commented on issue #4832: URL: https://github.com/apache/incubator-doris/issues/4832#issuecomment-721552450 > 是不是Kafka中已经没有这么多的数据了? 我这边解决了,设置一下property.kafka_default_offsets为OFFSET_END或者OFFSET_BEGING就可以了 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] EmmyMiao87 closed issue #4832: ErrorReason{code=errCode = 104, msg='be 10003 abort task with reason: Broker: Offset out of range'}
EmmyMiao87 closed issue #4832: URL: https://github.com/apache/incubator-doris/issues/4832 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] acelyc111 commented on a change in pull request #4837: [Optimize] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
acelyc111 commented on a change in pull request #4837: URL: https://github.com/apache/incubator-doris/pull/4837#discussion_r517147556 ## File path: be/src/common/config.h ## @@ -329,6 +329,13 @@ namespace config { CONF_mInt32(base_compaction_trace_threshold, "10"); CONF_mInt32(cumulative_compaction_trace_threshold, "2"); +// update tablet scan count in second +CONF_mInt64(update_tablet_scan_count_interval_second, "300"); +// coefficient for tablet scan frequency and compaction score when finding a tablet for compaction +CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); +CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); Review comment: Do they need to be normalized? If needed, you should define them as double. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] acelyc111 commented on a change in pull request #4837: [Optimize] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
acelyc111 commented on a change in pull request #4837: URL: https://github.com/apache/incubator-doris/pull/4837#discussion_r517147556 ## File path: be/src/common/config.h ## @@ -329,6 +329,13 @@ namespace config { CONF_mInt32(base_compaction_trace_threshold, "10"); CONF_mInt32(cumulative_compaction_trace_threshold, "2"); +// update tablet scan count in second +CONF_mInt64(update_tablet_scan_count_interval_second, "300"); +// coefficient for tablet scan frequency and compaction score when finding a tablet for compaction +CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); +CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); Review comment: Does it need to be normalized? If it needed, you should define them as double. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org
[GitHub] [incubator-doris] acelyc111 commented on a change in pull request #4837: [Optimize] Take 'tablet scan frequency' into consideration when selecting a tablet for compaction
acelyc111 commented on a change in pull request #4837: URL: https://github.com/apache/incubator-doris/pull/4837#discussion_r517147556 ## File path: be/src/common/config.h ## @@ -329,6 +329,13 @@ namespace config { CONF_mInt32(base_compaction_trace_threshold, "10"); CONF_mInt32(cumulative_compaction_trace_threshold, "2"); +// update tablet scan count in second +CONF_mInt64(update_tablet_scan_count_interval_second, "300"); +// coefficient for tablet scan frequency and compaction score when finding a tablet for compaction +CONF_mInt32(compaction_tablet_scan_frequency_factor, "0"); +CONF_mInt32(compaction_tablet_compaction_score_factor, "1"); Review comment: Do they need to be normalized? If it needed, you should define them as double. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org