Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 105 additions & 50 deletions be/src/service/point_query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "cloud/cloud_tablet.h"
Expand Down Expand Up @@ -87,25 +88,70 @@ static void get_missing_and_include_cids(const TabletSchema& schema,
std::unordered_set<int>& include_cids) {
missing_cids.clear();
include_cids.clear();
std::unordered_set<int> access_path_cids;
for (auto* slot : slots) {
missing_cids.insert(slot->col_unique_id());
const int cid = slot->col_unique_id();
missing_cids.insert(cid);
if (!slot->all_access_paths().empty() || !slot->predicate_access_paths().empty()) {
access_path_cids.insert(cid);
}
}
// insert delete sign column id
missing_cids.insert(schema.columns()[schema.delete_sign_idx()]->unique_id());
const int delete_sign_cid = schema.columns()[schema.delete_sign_idx()]->unique_id();
missing_cids.insert(delete_sign_cid);
if (target_rs_column_id == -1) {
// no row store columns
return;
}
const TabletColumn& target_rs_column = schema.column_by_uid(target_rs_column_id);
DCHECK(target_rs_column.is_row_store_column());
// The full column group is considered a full match, thus no missing cids
// The full column group is considered a full match, thus no missing cids.
if (schema.row_columns_uids().empty()) {
missing_cids.clear();
} else {
for (int cid : schema.row_columns_uids()) {
missing_cids.erase(cid);
include_cids.insert(cid);
}
}

if (!access_path_cids.empty()) {
// Row store keeps complex columns in their full serialized layout, while nested
// access-path slots can use a pruned layout. Read those slots from column store.
if (schema.row_columns_uids().empty()) {
// Empty include_cids means "read all columns" for jsonb_to_columns(). Switch to
// an explicit include set before excluding access-path columns.
for (auto* slot : slots) {
include_cids.insert(slot->col_unique_id());
}
include_cids.insert(delete_sign_cid);
}
for (int cid : access_path_cids) {
missing_cids.insert(cid);
include_cids.erase(cid);
}
}
}

static void set_slot_access_paths(const SlotDescriptor& slot, const TabletSchema& schema,
StorageReadOptions& storage_read_options) {
int32_t unique_id = slot.col_unique_id();
const int field_index =
unique_id >= 0 ? schema.field_index(unique_id) : schema.field_index(slot.col_name());
if (field_index >= 0) {
const auto& column = schema.column(field_index);
unique_id = column.unique_id() >= 0 ? column.unique_id() : column.parent_unique_id();
}
if (unique_id < 0) {
return;
}
for (int cid : schema.row_columns_uids()) {
missing_cids.erase(cid);
include_cids.insert(cid);

if (!slot.all_access_paths().empty()) {
storage_read_options.all_access_paths[unique_id] = slot.all_access_paths();
}

if (!slot.predicate_access_paths().empty()) {
storage_read_options.predicate_access_paths[unique_id] = slot.predicate_access_paths();
}
}

Expand Down Expand Up @@ -465,7 +511,7 @@ Status PointQueryExecutor::_lookup_row_key() {
std::vector<std::unique_ptr<SegmentCacheHandle>> segment_caches(specified_rowsets.size());
for (size_t i = 0; i < _row_read_ctxs.size(); ++i) {
RowLocation location;
if (!config::disable_storage_row_cache) {
if (!config::disable_storage_row_cache && _reusable->missing_col_uids().empty()) {
RowCache::CacheHandle cache_handle;
auto hit_cache = RowCache::instance()->lookup(
{_tablet->tablet_id(), _row_read_ctxs[i]._primary_key}, &cache_handle);
Expand Down Expand Up @@ -496,6 +542,53 @@ Status PointQueryExecutor::_lookup_row_key() {
return Status::OK();
}

static Status lookup_missing_columns_by_rowid(const BaseTabletSPtr& tablet, Reusable& reusable,
Metrics& profile_metrics,
OlapReaderStatistics* read_stats,
const RowLocation& row_loc,
MutableColumns& result_columns) {
if (!reusable.runtime_state()->enable_short_circuit_query_access_column_store()) {
std::string missing_columns;
for (int cid : reusable.missing_col_uids()) {
missing_columns += tablet->tablet_schema()->column_by_uid(cid).name() + ",";
}
return Status::InternalError(
"Not support column store, set store_row_column=true or "
"row_store_columns in table properties, missing columns: " +
missing_columns + " should be added to row store");
}

// fill missing columns by column store
BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(tablet->get_rowset(row_loc.rowset_id));
SegmentCacheHandle segment_cache;
{
SCOPED_TIMER(&profile_metrics.load_segment_data_stage_ns);
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true));
}
// find segment
auto it =
std::find_if(segment_cache.get_segments().cbegin(), segment_cache.get_segments().cend(),
[&](const segment_v2::SegmentSharedPtr& seg) {
return seg->id() == row_loc.segment_id;
});
const auto& segment = *it;
for (int cid : reusable.missing_col_uids()) {
int pos = reusable.get_col_uid_to_idx().at(cid);
std::vector<segment_v2::rowid_t> row_ids {static_cast<segment_v2::rowid_t>(row_loc.row_id)};
auto& column = result_columns[pos];
std::unique_ptr<ColumnIterator> iter;
SlotDescriptor* slot = reusable.tuple_desc()->slots()[pos];
StorageReadOptions storage_read_options;
storage_read_options.stats = read_stats;
storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY;
set_slot_access_paths(*slot, *tablet->tablet_schema(), storage_read_options);
RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*tablet->tablet_schema(), slot, row_ids,
column, storage_read_options, iter));
}
return Status::OK();
}

Status PointQueryExecutor::_lookup_row_data() {
// 3. get values
SCOPED_TIMER(&_profile_metrics.lookup_data_ns);
Expand All @@ -518,7 +611,8 @@ Status PointQueryExecutor::_lookup_row_data() {
std::string value;
// fill block by row store
if (_reusable->rs_column_uid() != -1) {
bool use_row_cache = !config::disable_storage_row_cache;
bool use_row_cache =
!config::disable_storage_row_cache && _reusable->missing_col_uids().empty();
RETURN_IF_ERROR(_tablet->lookup_row_data(
_row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(),
*(_row_read_ctxs[i]._rowset_ptr), _profile_metrics.read_stats, value,
Expand All @@ -530,48 +624,9 @@ Status PointQueryExecutor::_lookup_row_data() {
_reusable->get_col_default_values(), _reusable->include_col_uids()));
}
if (!_reusable->missing_col_uids().empty()) {
if (!_reusable->runtime_state()->enable_short_circuit_query_access_column_store()) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fallback is skipped for row-cache hits, so enabling disable_storage_row_cache=false can still return the old broken shape on repeated point queries. The first lookup calls BaseTablet::lookup_row_data() and inserts only the row-store JSONB blob into RowCache; the second lookup can hit _cached_row_data, deserialize only _include_col_uids, and continue before lookup_missing_columns_by_rowid() runs. Since this PR intentionally leaves access-path columns in _missing_col_uids, those columns are never read from column store on the cached path. Please either bypass row cache when missing columns exist, or keep enough row-location state to run this fallback after a cache hit as well.

std::string missing_columns;
for (int cid : _reusable->missing_col_uids()) {
missing_columns +=
_tablet->tablet_schema()->column_by_uid(cid).name() + ",";
}
return Status::InternalError(
"Not support column store, set store_row_column=true or "
"row_store_columns in table properties, missing columns: " +
missing_columns + " should be added to row store");
}
// fill missing columns by column store
RowLocation row_loc = _row_read_ctxs[i]._row_location.value();
BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(
_tablet->get_rowset(row_loc.rowset_id));
SegmentCacheHandle segment_cache;
{
SCOPED_TIMER(&_profile_metrics.load_segment_data_stage_ns);
RETURN_IF_ERROR(
SegmentLoader::instance()->load_segments(rowset, &segment_cache, true));
}
// find segment
auto it = std::find_if(segment_cache.get_segments().cbegin(),
segment_cache.get_segments().cend(),
[&](const segment_v2::SegmentSharedPtr& seg) {
return seg->id() == row_loc.segment_id;
});
const auto& segment = *it;
for (int cid : _reusable->missing_col_uids()) {
int pos = _reusable->get_col_uid_to_idx().at(cid);
std::vector<segment_v2::rowid_t> row_ids {
static_cast<segment_v2::rowid_t>(row_loc.row_id)};
auto& column = result_columns[pos];
std::unique_ptr<ColumnIterator> iter;
SlotDescriptor* slot = _reusable->tuple_desc()->slots()[pos];
StorageReadOptions storage_read_options;
storage_read_options.stats = &_read_stats;
storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY;
RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot,
row_ids, column,
storage_read_options, iter));
}
RETURN_IF_ERROR(lookup_missing_columns_by_rowid(
_tablet, *_reusable, _profile_metrics, &_read_stats,
_row_read_ctxs[i]._row_location.value(), result_columns));
}
}
if (result_columns.size() > _reusable->include_col_uids().size()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !normal_path --
5 8 8 \N \N 3 false

-- !short_circuit_path --
5 8 8 \N \N 3 false

-- !short_circuit_path_repeat --
5 8 8 \N \N 3 false
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_short_circuit_rowstore_nested_complex", "p0,nonConcurrent") {
def backendId_to_backendIP = [:]
def backendId_to_backendHttpPort = [:]
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort)
def set_be_config = { key, value ->
for (String backend_id : backendId_to_backendIP.keySet()) {
def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), key, value)
logger.info("update config: code=" + code + ", out=" + out + ", err=" + err)
}
}

try {
set_be_config.call("disable_storage_row_cache", "false")

sql "SET enable_nereids_planner=true"
sql "SET enable_sql_cache=false"
sql "SET enable_snapshot_point_query=true"
sql "SET enable_short_circuit_query_access_column_store=true"

sql "DROP TABLE IF EXISTS short_circuit_rowstore_nested_complex"
sql """
CREATE TABLE short_circuit_rowstore_nested_complex (
pk INT,
deep STRUCT<
nested_str: VARCHAR(64),
inner_s: STRUCT<deep_str: VARCHAR(64), flag: BOOLEAN, deep_char: CHAR(8)>,
deep_arr: ARRAY<STRUCT<verified: BOOLEAN, txt: VARCHAR(64), char_tag: CHAR(8)>>,
deep_map: MAP<VARCHAR(32), STRUCT<leaf: VARCHAR(64), n: INT, char_leaf: CHAR(8)>>
> NULL,
s STRUCT<str: VARCHAR(64), char_leaf: CHAR(8), num: INT, sibling: VARCHAR(64)> NULL
) ENGINE = OLAP
UNIQUE KEY(pk)
DISTRIBUTED BY HASH(pk) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"store_row_column" = "true",
"row_store_page_size" = "16384"
)
"""

sql """
INSERT INTO short_circuit_rowstore_nested_complex VALUES
(5,
named_struct(
'nested_str', 'b-deep-5',
'inner_s', named_struct('deep_str', 'b-inner-5', 'flag', true, 'deep_char', 'bd5'),
'deep_arr', array(named_struct('verified', false, 'txt', 'b-deep-arr-5', 'char_tag', 'bt5')),
'deep_map', map('b_key', named_struct('leaf', 'b-leaf-5', 'n', -5, 'char_leaf', 'bl5'))),
named_struct('str', 'b-s-5', 'char_leaf', 'bc5', 'num', -35, 'sibling', 'b-sib-5'))
"""
sql "SYNC"

sql "SET enable_short_circuit_query=false"
order_qt_normal_path """
SELECT /*+ SET_VAR(enable_nereids_planner=true) */
pk,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf')) AS hit_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf'))) AS hit_lower_len,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf')) AS miss_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf'))) AS miss_lower_len,
LENGTH(struct_element(struct_element(deep, 'inner_s'), 'deep_char')) AS char_storage_len,
((struct_element(s, 'num') + 1) IS NULL) AS expr_is_null
FROM short_circuit_rowstore_nested_complex
WHERE pk = 5
"""

sql "SET enable_short_circuit_query=true"
explain {
sql """
SELECT /*+ SET_VAR(enable_nereids_planner=true) */
pk,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf')) AS hit_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf'))) AS hit_lower_len,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf')) AS miss_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf'))) AS miss_lower_len,
LENGTH(struct_element(struct_element(deep, 'inner_s'), 'deep_char')) AS char_storage_len,
((struct_element(s, 'num') + 1) IS NULL) AS expr_is_null
FROM short_circuit_rowstore_nested_complex
WHERE pk = 5
"""
contains "SHORT-CIRCUIT"
}
order_qt_short_circuit_path """
SELECT /*+ SET_VAR(enable_nereids_planner=true) */
pk,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf')) AS hit_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf'))) AS hit_lower_len,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf')) AS miss_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf'))) AS miss_lower_len,
LENGTH(struct_element(struct_element(deep, 'inner_s'), 'deep_char')) AS char_storage_len,
((struct_element(s, 'num') + 1) IS NULL) AS expr_is_null
FROM short_circuit_rowstore_nested_complex
WHERE pk = 5
"""

order_qt_short_circuit_path_repeat """
SELECT /*+ SET_VAR(enable_nereids_planner=true) */
pk,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf')) AS hit_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'b_key'), 'leaf'))) AS hit_lower_len,
CHAR_LENGTH(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf')) AS miss_char_len,
LENGTH(LOWER(struct_element(element_at(struct_element(deep, 'deep_map'), 'dense'), 'leaf'))) AS miss_lower_len,
LENGTH(struct_element(struct_element(deep, 'inner_s'), 'deep_char')) AS char_storage_len,
((struct_element(s, 'num') + 1) IS NULL) AS expr_is_null
FROM short_circuit_rowstore_nested_complex
WHERE pk = 5
"""
} finally {
set_be_config.call("disable_storage_row_cache", "true")
}
}
Loading