This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new c678c928f9e [Fix](show data) Inverted index size not added to data
size after index build (#29568) (#29777)
c678c928f9e is described below
commit c678c928f9e690649c752886c860d2e2acd32191
Author: airborne12 <[email protected]>
AuthorDate: Wed Jan 10 18:21:43 2024 +0800
[Fix](show data) Inverted index size not added to data size after index
build (#29568) (#29777)
---
be/src/olap/task/index_builder.cpp | 8 ++
.../suites/inverted_index_p0/test_show_data.groovy | 155 +++++++++++++++++++++
2 files changed, 163 insertions(+)
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 6019857d229..cefeeda2ae2 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -155,6 +155,7 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
std::string segment_dir = _tablet->tablet_path();
auto fs = output_rowset_meta->fs();
auto output_rowset_schema = output_rowset_meta->tablet_schema();
+ size_t inverted_index_size = 0;
for (auto& seg_ptr : segments) {
std::string segment_filename = fmt::format(
"{}_{}.dat", output_rowset_meta->rowset_id().to_string(),
seg_ptr->id());
@@ -249,11 +250,18 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
return
Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"CLuceneError occured: {}", e.what());
}
+ inverted_index_size +=
_inverted_index_builders[writer_sign]->file_size();
}
_olap_data_convertor->reset();
}
_inverted_index_builders.clear();
+
output_rowset_meta->set_data_disk_size(output_rowset_meta->data_disk_size() +
+ inverted_index_size);
+
output_rowset_meta->set_total_disk_size(output_rowset_meta->total_disk_size() +
+ inverted_index_size);
+
output_rowset_meta->set_index_disk_size(output_rowset_meta->index_disk_size() +
+ inverted_index_size);
LOG(INFO) << "all row nums. source_rows=" <<
output_rowset_meta->num_rows();
}
diff --git a/regression-test/suites/inverted_index_p0/test_show_data.groovy
b/regression-test/suites/inverted_index_p0/test_show_data.groovy
new file mode 100644
index 00000000000..339152d378d
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_show_data.groovy
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_show_data", "p0") {
+ // define a sql table
+ def testTable = "test_show_data_httplogs"
+ def delta_time = 5000
+ def timeout = 60000
+ String database = context.config.getDbNameByFile(context.file)
+
+ def create_httplogs_dup_table = {testTablex ->
+ // multi-line sql
+ def result = sql """
+ CREATE TABLE IF NOT EXISTS ${testTablex} (
+ `@timestamp` int(11) NULL,
+ `clientip` varchar(20) NULL,
+ `request` text NULL,
+ `status` int(11) NULL,
+ `size` int(11) NULL,
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default:
1"
+ );
+ """
+ }
+
+ def load_httplogs_data = {table_name, label, read_flag, format_flag,
file_name, ignore_failure=false,
+ expected_succ_rows = -1, load_to_single_tablet =
'true' ->
+
+ // load the json data
+ streamLoad {
+ table "${table_name}"
+
+ // set http request header params
+ set 'label', label + "_" + UUID.randomUUID().toString()
+ set 'read_json_by_line', read_flag
+ set 'format', format_flag
+ file file_name // import json file
+ time 10000 // limit inflight 10s
+ if (expected_succ_rows >= 0) {
+ set 'max_filter_ratio', '1'
+ }
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (ignore_failure && expected_succ_rows < 0) { return }
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ if (expected_succ_rows >= 0) {
+ assertEquals(json.NumberLoadedRows, expected_succ_rows)
+ } else {
+ assertEquals(json.NumberTotalRows,
json.NumberLoadedRows + json.NumberUnselectedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes
> 0)
+ }
+ }
+ }
+ }
+
+ def wait_for_show_data_finish = { table_name, OpTimeout, origin_size ->
+ def useTime = 0
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ result = sql """show data from ${database}.${table_name};"""
+ if (result.size() > 0) {
+ logger.info(table_name + " show data, detail: " +
result[0].toString())
+ def size = result[0][2].replace(" KB", "").toDouble()
+ if (size > origin_size) {
+ return size
+ }
+ }
+ useTime = t
+ Thread.sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_show_data_finish timeout,
useTime=${useTime}")
+ return "wait_timeout"
+ }
+
+ def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
+ def useTime = 0
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName =
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
+ alter_res = alter_res.toString()
+ if(alter_res.contains("FINISHED")) {
+ sleep(3000) // wait change table state to normal
+ logger.info(table_name + " latest alter job finished, detail:
" + alter_res)
+ break
+ }
+ useTime = t
+ Thread.sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish
timeout, useTime=${useTime}")
+ }
+
+ def wait_for_last_build_index_on_table_finish = { table_name, OpTimeout ->
+ def useTime = 0
+ for(int t = delta_time; t <= OpTimeout; t += delta_time){
+ alter_res = sql """SHOW BUILD INDEX WHERE TableName =
"${table_name}" ORDER BY JobId """
+
+ if (alter_res.size() > 0) {
+ def last_job_state = alter_res[alter_res.size()-1][7];
+ if (last_job_state == "FINISHED" || last_job_state ==
"CANCELLED") {
+ logger.info(table_name + " last index job finished, state:
" + last_job_state + ", detail: " + alter_res)
+ return last_job_state;
+ }
+ }
+ useTime = t
+ Thread.sleep(delta_time)
+ }
+ assertTrue(useTime <= OpTimeout,
"wait_for_last_build_index_on_table_finish timeout, useTime=${useTime}")
+ return "wait_timeout"
+ }
+
+ try {
+ sql "DROP TABLE IF EXISTS ${testTable}"
+
+ create_httplogs_dup_table.call(testTable)
+
+ load_httplogs_data.call(testTable, 'test_httplogs_load', 'true',
'json', 'documents-1000.json')
+
+ sql "sync"
+ def no_index_size = wait_for_show_data_finish(testTable, 300000, 0)
+ assertTrue(no_index_size != "wait_timeout")
+ sql """ ALTER TABLE ${testTable} ADD INDEX idx_request (`request`)
USING INVERTED PROPERTIES("parser" = "english") """
+ wait_for_latest_op_on_table_finish(testTable, timeout)
+
+ // BUILD INDEX and expect state is RUNNING
+ sql """ BUILD INDEX idx_request ON ${testTable} """
+ def state = wait_for_last_build_index_on_table_finish(testTable,
timeout)
+ assertEquals(state, "FINISHED")
+ def with_index_size = wait_for_show_data_finish(testTable, 300000,
no_index_size)
+ assertTrue(with_index_size != "wait_timeout")
+ } finally {
+ //try_sql("DROP TABLE IF EXISTS ${testTable}")
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]