From d5d4f98b13167c30bfdd88c2de11ab7e20db0a32 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Sat, 3 Jan 2026 05:20:12 +0000 Subject: [PATCH] Update vendored DuckDB sources to 4b7a6b7bd0 --- .../decoder/delta_byte_array_decoder.cpp | 113 +++++----- .../decoder/delta_byte_array_decoder.hpp | 6 +- src/duckdb/src/common/enum_util.cpp | 22 ++ src/duckdb/src/common/file_system.cpp | 2 +- .../expression_executor/execute_operator.cpp | 12 +- .../physical_plan/plan_set_operation.cpp | 10 +- .../table/system/enable_profiling.cpp | 148 +++++++++++++ .../src/function/table/system_functions.cpp | 1 + .../function/table/version/pragma_version.cpp | 6 +- .../src/include/duckdb/common/enum_util.hpp | 8 + .../function/table/system_functions.hpp | 4 + .../src/include/duckdb/main/client_config.hpp | 2 + .../main/database_file_path_manager.hpp | 4 + .../include/duckdb/main/profiling_info.hpp | 1 + .../duckdb/storage/table/row_group.hpp | 2 +- src/duckdb/src/main/db_instance_cache.cpp | 31 ++- src/duckdb/src/main/query_profiler.cpp | 1 - .../src/main/settings/custom_settings.cpp | 200 ++++++++++++------ .../operator/propagate_aggregate.cpp | 61 +++++- src/duckdb/src/planner/bind_context.cpp | 62 +++--- src/duckdb/src/storage/table/row_group.cpp | 24 ++- .../storage/table/row_group_collection.cpp | 4 +- src/duckdb/ub_src_function_table_system.cpp | 2 + 23 files changed, 549 insertions(+), 177 deletions(-) create mode 100644 src/duckdb/src/function/table/system/enable_profiling.cpp diff --git a/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp b/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp index a0dfaba83..1409d87f3 100644 --- a/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp +++ b/src/duckdb/extension/parquet/decoder/delta_byte_array_decoder.cpp @@ -20,9 +20,6 @@ void DeltaByteArrayDecoder::ReadDbpData(Allocator &allocator, ResizeableBuffer & } void DeltaByteArrayDecoder::InitializePage() { - if (reader.Type().InternalType() != PhysicalType::VARCHAR) { - throw std::runtime_error("Delta Byte Array encoding is only supported for string/blob data"); - } auto &block = *reader.block; auto &allocator = reader.reader.allocator; idx_t prefix_count, suffix_count; @@ -33,71 +30,77 @@ void DeltaByteArrayDecoder::InitializePage() { if (prefix_count != suffix_count) { throw std::runtime_error("DELTA_BYTE_ARRAY - prefix and suffix counts are different - corrupt file?"); } + + auto prefix_data = reinterpret_cast(prefix_buffer.ptr); + auto suffix_data = reinterpret_cast(suffix_buffer.ptr); + + // Allocate the plain data buffer + if (!plain_data) { + plain_data = make_shared_ptr(); + } + plain_data->reset(); + if (prefix_count == 0) { - // no values - byte_array_data = make_uniq(LogicalType::VARCHAR, nullptr); + plain_data->resize(allocator, 0); return; } - auto prefix_data = reinterpret_cast(prefix_buffer.ptr); - auto suffix_data = reinterpret_cast(suffix_buffer.ptr); - byte_array_data = make_uniq(LogicalType::VARCHAR, prefix_count); - byte_array_count = prefix_count; - delta_offset = 0; - auto string_data = FlatVector::GetData(*byte_array_data); + + // Decode DELTA_BYTE_ARRAY into plain Parquet page format + // Plain format for BYTE_ARRAY: [4-byte length][data] repeated + // Plain format for FIXED_LEN_BYTE_ARRAY: [data] repeated (no length prefix) + auto &schema = reader.Schema(); + bool is_fixed_len = (schema.parquet_type == duckdb_parquet::Type::FIXED_LEN_BYTE_ARRAY); + idx_t fixed_len = is_fixed_len ? schema.type_length : 0; + + // Calculate total buffer size and max value length in one pass + idx_t total_size = 0; + idx_t max_len = 0; for (idx_t i = 0; i < prefix_count; i++) { - auto str_len = prefix_data[i] + suffix_data[i]; - block.available(suffix_data[i]); - string_data[i] = StringVector::EmptyString(*byte_array_data, str_len); - auto result_data = string_data[i].GetDataWriteable(); - if (prefix_data[i] > 0) { - if (i == 0 || prefix_data[i] > string_data[i - 1].GetSize()) { - throw std::runtime_error("DELTA_BYTE_ARRAY - prefix is out of range - corrupt file?"); - } - memcpy(result_data, string_data[i - 1].GetData(), prefix_data[i]); + idx_t len = prefix_data[i] + suffix_data[i]; + if (is_fixed_len && len != fixed_len) { + throw std::runtime_error( + "DELTA_BYTE_ARRAY on FIXED_LEN_BYTE_ARRAY: decoded length does not match type length"); } - memcpy(result_data + prefix_data[i], block.ptr, suffix_data[i]); - block.inc(suffix_data[i]); - string_data[i].Finalize(); + total_size += len + (is_fixed_len ? 0 : sizeof(uint32_t)); + max_len = MaxValue(max_len, len); } -} -void DeltaByteArrayDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) { - if (!byte_array_data) { - throw std::runtime_error("Internal error - DeltaByteArray called but there was no byte_array_data set"); - } - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - auto string_data = FlatVector::GetData(*byte_array_data); - for (idx_t row_idx = 0; row_idx < read_count; row_idx++) { - if (defines && defines[row_idx + result_offset] != reader.MaxDefine()) { - result_mask.SetInvalid(row_idx + result_offset); - continue; + plain_data->resize(allocator, total_size); + unsafe_vector prev_value(max_len); + idx_t prev_len = 0; + + auto output = plain_data->ptr; + for (idx_t i = 0; i < prefix_count; i++) { + auto prefix_len = prefix_data[i]; + auto suffix_len = suffix_data[i]; + auto value_len = prefix_len + suffix_len; + + if (prefix_len > prev_len) { + throw std::runtime_error("DELTA_BYTE_ARRAY - prefix is out of range - corrupt file?"); } - if (delta_offset >= byte_array_count) { - throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted " - "read of %d from %d entries) - corrupt file?", - delta_offset + 1, byte_array_count); + + if (!is_fixed_len) { + Store(static_cast(value_len), output); + output += sizeof(uint32_t); } - result_ptr[row_idx + result_offset] = string_data[delta_offset++]; + + memcpy(output, prev_value.data(), prefix_len); + block.available(suffix_len); + memcpy(output + prefix_len, block.ptr, suffix_len); + block.inc(suffix_len); + + memcpy(prev_value.data(), output, value_len); + prev_len = value_len; + output += value_len; } - StringVector::AddHeapReference(result, *byte_array_data); +} + +void DeltaByteArrayDecoder::Read(uint8_t *defines, idx_t read_count, Vector &result, idx_t result_offset) { + reader.Plain(plain_data, defines, read_count, result_offset, result); } void DeltaByteArrayDecoder::Skip(uint8_t *defines, idx_t skip_count) { - if (!byte_array_data) { - throw std::runtime_error("Internal error - DeltaByteArray called but there was no byte_array_data set"); - } - for (idx_t row_idx = 0; row_idx < skip_count; row_idx++) { - if (defines && defines[row_idx] != reader.MaxDefine()) { - continue; - } - if (delta_offset >= byte_array_count) { - throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted " - "read of %d from %d entries) - corrupt file?", - delta_offset + 1, byte_array_count); - } - delta_offset++; - } + reader.PlainSkip(*plain_data, defines, skip_count); } } // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp b/src/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp index 02df64bb1..749cf88a9 100644 --- a/src/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp +++ b/src/duckdb/extension/parquet/include/decoder/delta_byte_array_decoder.hpp @@ -30,9 +30,9 @@ class DeltaByteArrayDecoder { private: ColumnReader &reader; - unique_ptr byte_array_data; - idx_t byte_array_count = 0; - idx_t delta_offset = 0; + + //! Decoded data in plain Parquet page format + shared_ptr plain_data; }; } // namespace duckdb diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp index 69a8d4da5..9686d0c2f 100644 --- a/src/duckdb/src/common/enum_util.cpp +++ b/src/duckdb/src/common/enum_util.cpp @@ -133,6 +133,7 @@ #include "duckdb/main/extension.hpp" #include "duckdb/main/extension_helper.hpp" #include "duckdb/main/extension_install_info.hpp" +#include "duckdb/main/profiling_info.hpp" #include "duckdb/main/query_parameters.hpp" #include "duckdb/main/query_profiler.hpp" #include "duckdb/main/query_result.hpp" @@ -3806,6 +3807,27 @@ ProfilingCoverage EnumUtil::FromString(const char *value) { return static_cast(StringUtil::StringToEnum(GetProfilingCoverageValues(), 2, "ProfilingCoverage", value)); } +const StringUtil::EnumStringLiteral *GetProfilingParameterNamesValues() { + static constexpr StringUtil::EnumStringLiteral values[] { + { static_cast(ProfilingParameterNames::FORMAT), "FORMAT" }, + { static_cast(ProfilingParameterNames::COVERAGE), "COVERAGE" }, + { static_cast(ProfilingParameterNames::SAVE_LOCATION), "SAVE_LOCATION" }, + { static_cast(ProfilingParameterNames::MODE), "MODE" }, + { static_cast(ProfilingParameterNames::METRICS), "METRICS" } + }; + return values; +} + +template<> +const char* EnumUtil::ToChars(ProfilingParameterNames value) { + return StringUtil::EnumToString(GetProfilingParameterNamesValues(), 5, "ProfilingParameterNames", static_cast(value)); +} + +template<> +ProfilingParameterNames EnumUtil::FromString(const char *value) { + return static_cast(StringUtil::StringToEnum(GetProfilingParameterNamesValues(), 5, "ProfilingParameterNames", value)); +} + const StringUtil::EnumStringLiteral *GetPushdownExtractSupportValues() { static constexpr StringUtil::EnumStringLiteral values[] { { static_cast(PushdownExtractSupport::UNCHECKED), "UNCHECKED" }, diff --git a/src/duckdb/src/common/file_system.cpp b/src/duckdb/src/common/file_system.cpp index 782c30301..f8e584ece 100644 --- a/src/duckdb/src/common/file_system.cpp +++ b/src/duckdb/src/common/file_system.cpp @@ -228,7 +228,7 @@ bool FileSystem::IsPathAbsolute(const string &path) { string FileSystem::NormalizeAbsolutePath(const string &path) { D_ASSERT(IsPathAbsolute(path)); - auto result = StringUtil::Lower(FileSystem::ConvertSeparators(path)); + auto result = FileSystem::ConvertSeparators(path); if (StartsWithSingleBackslash(result)) { // Path starts with a single backslash or forward slash // prepend drive letter diff --git a/src/duckdb/src/execution/expression_executor/execute_operator.cpp b/src/duckdb/src/execution/expression_executor/execute_operator.cpp index 04883c5de..527178d36 100644 --- a/src/duckdb/src/execution/expression_executor/execute_operator.cpp +++ b/src/duckdb/src/execution/expression_executor/execute_operator.cpp @@ -113,8 +113,18 @@ void ExpressionExecutor::Execute(const BoundOperatorExpression &expr, Expression } } else if (expression_type == ExpressionType::OPERATOR_TRY) { auto &child_state = *state->child_states[0]; + Vector try_result(result.GetType()); try { - Execute(*expr.children[0], &child_state, sel, count, result); + Execute(*expr.children[0], &child_state, sel, count, try_result); + if (try_result.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.Reference(try_result); + return; + } + if (sel) { + VectorOperations::Copy(try_result, result, *sel, count, 0, 0, count); + } else { + VectorOperations::Copy(try_result, result, count, 0, 0); + } return; } catch (std::exception &ex) { ErrorData error(ex); diff --git a/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp b/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp index 2d7427861..ee71c5509 100644 --- a/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp @@ -23,11 +23,15 @@ static vector> CreatePartitionedRowNumExpression(const ve return res; } -static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t i) { +static JoinCondition CreateNotDistinctComparison(ClientContext &context, const LogicalType &type, idx_t i) { JoinCondition cond; cond.left = make_uniq(type, i); cond.right = make_uniq(type, i); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + + ExpressionBinder::PushCollation(context, cond.left, type); + ExpressionBinder::PushCollation(context, cond.right, type); + return cond; } @@ -59,7 +63,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalSetOperation &op) { vector conditions; // create equality condition for all columns for (idx_t i = 0; i < types.size(); i++) { - conditions.push_back(CreateNotDistinctComparison(types[i], i)); + conditions.push_back(CreateNotDistinctComparison(context, types[i], i)); } // For EXCEPT ALL / INTERSECT ALL we push a window operator with a ROW_NUMBER into the scans and join to get bag // semantics. @@ -80,7 +84,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalSetOperation &op) { right = right_window; // add window expression result to join condition - conditions.push_back(CreateNotDistinctComparison(LogicalType::BIGINT, types.size())); + conditions.push_back(CreateNotDistinctComparison(context, LogicalType::BIGINT, types.size())); // join (created below) now includes the row number result column op.types.push_back(LogicalType::BIGINT); } diff --git a/src/duckdb/src/function/table/system/enable_profiling.cpp b/src/duckdb/src/function/table/system/enable_profiling.cpp new file mode 100644 index 000000000..a7a8cdac2 --- /dev/null +++ b/src/duckdb/src/function/table/system/enable_profiling.cpp @@ -0,0 +1,148 @@ +#include "duckdb/function/table/system_functions.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/main/settings.hpp" + +namespace duckdb { + +class EnableProfilingBindData : public TableFunctionData { +public: + EnableProfilingBindData() { + } + + Value format; + Value coverage; + Value save_location; + Value mode; + Value metrics; +}; + +static void EnableProfiling(ClientContext &context, TableFunctionInput &data, DataChunk &output) { + auto bind_data = data.bind_data->Cast(); + + auto &client_config = ClientConfig::GetConfig(context); + client_config.enable_profiler = true; + client_config.emit_profiler_output = true; + + if (!bind_data.format.IsNull() && !bind_data.save_location.IsNull()) { + auto &file_system = FileSystem::GetFileSystem(context); + const auto file_type = file_system.ExtractExtension(bind_data.save_location.ToString()); + if (file_type != "txt" && file_type != bind_data.format.ToString()) { + throw InvalidInputException( + "EnableProfiling: the save_location must be a .txt file or match the specified format."); + } + + EnableProfilingSetting::ResetLocal(context); + ProfileOutputSetting::ResetLocal(context); + } + + if (!bind_data.format.IsNull()) { + EnableProfilingSetting::SetLocal(context, bind_data.format); + } + + if (!bind_data.coverage.IsNull()) { + ProfilingCoverageSetting::SetLocal(context, bind_data.coverage); + } + + if (!bind_data.save_location.IsNull()) { + ProfileOutputSetting::SetLocal(context, bind_data.save_location); + } + + if (!bind_data.mode.IsNull()) { + ProfilingModeSetting::SetLocal(context, bind_data.mode); + } + + if (!bind_data.metrics.IsNull()) { + CustomProfilingSettingsSetting::SetLocal(context, bind_data.metrics); + } +} + +static unique_ptr BindEnableProfiling(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + if (input.inputs.size() > 1) { + throw InvalidInputException("EnableProfiling: expected 0 or 1 parameter"); + } + + auto bind_data = make_uniq(); + + auto config = ClientConfig::GetConfig(context); + + bool metrics_set = false; + + for (const auto &named_param : input.named_parameters) { + const auto key = EnumUtil::FromString(named_param.first); + switch (key) { + case ProfilingParameterNames::FORMAT: + bind_data->format = StringUtil::Lower(named_param.second.ToString()); + break; + case ProfilingParameterNames::COVERAGE: + bind_data->coverage = StringUtil::Lower(named_param.second.ToString()); + break; + case ProfilingParameterNames::SAVE_LOCATION: + bind_data->save_location = StringUtil::Lower(named_param.second.ToString()); + break; + case ProfilingParameterNames::MODE: + bind_data->mode = StringUtil::Lower(named_param.second.ToString()); + break; + case ProfilingParameterNames::METRICS: { + if (named_param.second.type() != LogicalType::LIST(LogicalType::VARCHAR) && + named_param.second.type().id() != LogicalTypeId::STRUCT && + named_param.second.type() != LogicalType::VARCHAR) { + throw InvalidInputException("EnableProfiling: metrics must be a list of strings or a JSON string"); + } + + bind_data->metrics = named_param.second; + metrics_set = true; + } + } + } + + // Process positional param: metrics configs + if (!input.inputs.empty()) { + if (metrics_set) { + throw InvalidInputException("EnableProfiling: cannot specify both metrics and positional parameters"); + } + if (input.inputs[0].type() != LogicalType::LIST(LogicalType::VARCHAR) && + input.inputs[0].type().id() != LogicalTypeId::STRUCT && input.inputs[0].type() != LogicalType::VARCHAR) { + throw InvalidInputException("EnableProfiling: metrics must be a list of strings or a JSON string"); + } + + bind_data->metrics = input.inputs[0]; + } + + return_types.emplace_back(LogicalType::BOOLEAN); + names.emplace_back("Success"); + + return std::move(bind_data); +} + +static void DisableProfiling(ClientContext &context, TableFunctionInput &data, DataChunk &output) { + auto &client_config = ClientConfig::GetConfig(context); + client_config.enable_profiler = false; + client_config.emit_profiler_output = false; +} + +static unique_ptr BindDisableProfiling(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + return_types.emplace_back(LogicalType::BOOLEAN); + names.emplace_back("Success"); + + return nullptr; +} + +void EnableProfilingFun::RegisterFunction(BuiltinFunctions &set) { + auto enable_fun = TableFunction("enable_profiling", {}, EnableProfiling, BindEnableProfiling, nullptr, nullptr); + + enable_fun.named_parameters.emplace("format", LogicalType::VARCHAR); + enable_fun.named_parameters.emplace("coverage", LogicalType::VARCHAR); + enable_fun.named_parameters.emplace("save_location", LogicalType::VARCHAR); + enable_fun.named_parameters.emplace("mode", LogicalType::VARCHAR); + enable_fun.named_parameters.emplace("metrics", LogicalType::ANY); + + enable_fun.varargs = LogicalType::LIST(LogicalType::VARCHAR); + set.AddFunction(enable_fun); + + auto disable_fun = TableFunction("disable_profiling", {}, DisableProfiling, BindDisableProfiling, nullptr, nullptr); + set.AddFunction(disable_fun); +} + +} // namespace duckdb diff --git a/src/duckdb/src/function/table/system_functions.cpp b/src/duckdb/src/function/table/system_functions.cpp index 0a6a03507..b51d6cde9 100644 --- a/src/duckdb/src/function/table/system_functions.cpp +++ b/src/duckdb/src/function/table/system_functions.cpp @@ -47,6 +47,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() { DuckDBVariablesFun::RegisterFunction(*this); DuckDBViewsFun::RegisterFunction(*this); EnableLoggingFun::RegisterFunction(*this); + EnableProfilingFun::RegisterFunction(*this); TestAllTypesFun::RegisterFunction(*this); TestVectorTypesFun::RegisterFunction(*this); } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 503dd21ac..ccbcd1e7e 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "0-dev5101" +#define DUCKDB_PATCH_VERSION "0-dev5159" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.0-dev5101" +#define DUCKDB_VERSION "v1.5.0-dev5159" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "e797eded1b" +#define DUCKDB_SOURCE_ID "4b7a6b7bd0" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/enum_util.hpp b/src/duckdb/src/include/duckdb/common/enum_util.hpp index 5b5445245..e096c6017 100644 --- a/src/duckdb/src/include/duckdb/common/enum_util.hpp +++ b/src/duckdb/src/include/duckdb/common/enum_util.hpp @@ -338,6 +338,8 @@ enum class ProfilerPrintFormat : uint8_t; enum class ProfilingCoverage : uint8_t; +enum class ProfilingParameterNames : uint8_t; + enum class PushdownExtractSupport : uint8_t; enum class QuantileSerializationType : uint8_t; @@ -946,6 +948,9 @@ const char* EnumUtil::ToChars(ProfilerPrintFormat value); template<> const char* EnumUtil::ToChars(ProfilingCoverage value); +template<> +const char* EnumUtil::ToChars(ProfilingParameterNames value); + template<> const char* EnumUtil::ToChars(PushdownExtractSupport value); @@ -1628,6 +1633,9 @@ ProfilerPrintFormat EnumUtil::FromString(const char *value) template<> ProfilingCoverage EnumUtil::FromString(const char *value); +template<> +ProfilingParameterNames EnumUtil::FromString(const char *value); + template<> PushdownExtractSupport EnumUtil::FromString(const char *value); diff --git a/src/duckdb/src/include/duckdb/function/table/system_functions.hpp b/src/duckdb/src/include/duckdb/function/table/system_functions.hpp index 49c5e794c..5944f8cff 100644 --- a/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +++ b/src/duckdb/src/include/duckdb/function/table/system_functions.hpp @@ -159,6 +159,10 @@ struct EnableLoggingFun { static void RegisterFunction(BuiltinFunctions &set); }; +struct EnableProfilingFun { + static void RegisterFunction(BuiltinFunctions &set); +}; + struct TestType { TestType(LogicalType type_p, string name_p) : type(std::move(type_p)), name(std::move(name_p)), min_value(Value::MinimumValue(type)), diff --git a/src/duckdb/src/include/duckdb/main/client_config.hpp b/src/duckdb/src/include/duckdb/main/client_config.hpp index 2485ffdf4..e2b5c6926 100644 --- a/src/duckdb/src/include/duckdb/main/client_config.hpp +++ b/src/duckdb/src/include/duckdb/main/client_config.hpp @@ -41,6 +41,8 @@ struct ClientConfig { //! The custom settings for the profiler //! (empty = use the default settings) profiler_settings_t profiler_settings = MetricsUtils::GetDefaultMetrics(); + //! The input format type of the profiler settings + LogicalTypeId profiler_settings_type = LogicalTypeId::VARCHAR; //! Allows suppressing profiler output, even if enabled. We turn on the profiler on all test runs but don't want //! to output anything diff --git a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp index 3af2f1873..ad637cad5 100644 --- a/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +++ b/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp @@ -48,7 +48,11 @@ class DatabaseFilePathManager { //! A set containing all attached database path //! This allows to attach many databases efficiently, and to avoid attaching the //! same file path twice +#if defined(_WIN32) || defined(__APPLE__) case_insensitive_map_t db_paths; +#else // !(_WIN32 or __APPLE__) + unordered_map db_paths; +#endif // _WIN32 or __APPLE__ }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/main/profiling_info.hpp b/src/duckdb/src/include/duckdb/main/profiling_info.hpp index a3f160957..458c7b766 100644 --- a/src/duckdb/src/include/duckdb/main/profiling_info.hpp +++ b/src/duckdb/src/include/duckdb/main/profiling_info.hpp @@ -23,6 +23,7 @@ struct yyjson_mut_val; } // namespace duckdb_yyjson namespace duckdb { +enum class ProfilingParameterNames : uint8_t { FORMAT, COVERAGE, SAVE_LOCATION, MODE, METRICS }; class ProfilingInfo { public: diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp index 2d1f2424d..5d3d72018 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp @@ -197,7 +197,7 @@ class RowGroup : public SegmentBase { unique_ptr GetStatistics(const StorageIndex &column_idx) const; void GetColumnSegmentInfo(const QueryContext &context, idx_t row_group_index, vector &result); - PartitionStatistics GetPartitionStats(idx_t row_group_start); + static PartitionStatistics GetPartitionStats(SegmentNode &row_group); idx_t GetAllocationSize() const { return allocation_size; diff --git a/src/duckdb/src/main/db_instance_cache.cpp b/src/duckdb/src/main/db_instance_cache.cpp index 1960c5ee3..a8b87ec06 100644 --- a/src/duckdb/src/main/db_instance_cache.cpp +++ b/src/duckdb/src/main/db_instance_cache.cpp @@ -13,7 +13,7 @@ DatabaseCacheEntry::DatabaseCacheEntry(const shared_ptr &database_p) : d DatabaseCacheEntry::~DatabaseCacheEntry() { } -string GetDBAbsolutePath(const string &database_p, FileSystem &fs) { +static string GetDBAbsolutePath(const string &database_p, FileSystem &fs) { auto database = FileSystem::ExpandPath(database_p, nullptr); if (database.empty()) { return IN_MEMORY_PATH; @@ -32,6 +32,23 @@ string GetDBAbsolutePath(const string &database_p, FileSystem &fs) { return fs.NormalizeAbsolutePath(fs.JoinPath(FileSystem::GetWorkingDirectory(), database)); } +static string GetCacheKey(const string &database_p, const DBConfig &config) { + // Currently the cache key is derived directly from the abs path, + // but it can apply more transformations to the path if necessary + string abs_database_path; + if (config.file_system) { + abs_database_path = GetDBAbsolutePath(database_p, *config.file_system); + } else { + auto tmp_fs = FileSystem::CreateLocal(); + abs_database_path = GetDBAbsolutePath(database_p, *tmp_fs); + } +#if defined(_WIN32) || defined(__APPLE__) + return StringUtil::Lower(abs_database_path); +#else // !(_WIN32 or __APPLE__) + return abs_database_path; +#endif // _WIN32 or __APPLE__ +} + DBInstanceCache::DBInstanceCache() { path_manager = make_shared_ptr(); } @@ -42,9 +59,8 @@ DBInstanceCache::~DBInstanceCache() { shared_ptr DBInstanceCache::GetInstanceInternal(const string &database, const DBConfig &config, std::unique_lock &db_instances_lock) { D_ASSERT(db_instances_lock.owns_lock()); - auto local_fs = FileSystem::CreateLocal(); - auto abs_database_path = GetDBAbsolutePath(database, *local_fs); - auto entry = db_instances.find(abs_database_path); + auto cache_key = GetCacheKey(database, config); + auto entry = db_instances.find(cache_key); if (entry == db_instances.end()) { // path does not exist in the list yet - no cache entry return nullptr; @@ -73,7 +89,7 @@ shared_ptr DBInstanceCache::GetInstanceInternal(const string &database, D_ASSERT(!cache_entry); // the cache entry has now been deleted - clear it from the set of database instances and return db_instances_lock.lock(); - db_instances.erase(abs_database_path); + db_instances.erase(cache_key); db_instances_lock.unlock(); return nullptr; } @@ -110,12 +126,13 @@ shared_ptr DBInstanceCache::CreateInstanceInternal(const string &databas shared_ptr db_instance; config.path_manager = path_manager; if (cache_instance) { - D_ASSERT(db_instances.find(abs_database_path) == db_instances.end()); + string cache_key = GetCacheKey(database, config); + D_ASSERT(db_instances.find(cache_key) == db_instances.end()); shared_ptr cache_entry = make_shared_ptr(); config.db_cache_entry = cache_entry; // Create the new instance after unlocking to avoid new ddb creation requests to be blocked lock_guard create_db_lock(cache_entry->update_database_mutex); - db_instances[abs_database_path] = cache_entry; + db_instances[cache_key] = cache_entry; db_instances_lock.unlock(); db_instance = make_shared_ptr(instance_path, &config); cache_entry->database = db_instance; diff --git a/src/duckdb/src/main/query_profiler.cpp b/src/duckdb/src/main/query_profiler.cpp index fef9d9fe2..17e5f1f42 100644 --- a/src/duckdb/src/main/query_profiler.cpp +++ b/src/duckdb/src/main/query_profiler.cpp @@ -214,7 +214,6 @@ void QueryProfiler::EndQuery() { if (IsEnabled() && !is_explain_analyze) { if (root) { auto &info = root->GetProfilingInfo(); - info = ProfilingInfo(ClientConfig::GetConfig(context).profiler_settings); auto &child_info = root->children[0]->GetProfilingInfo(); const auto &settings = info.expanded_settings; diff --git a/src/duckdb/src/main/settings/custom_settings.cpp b/src/duckdb/src/main/settings/custom_settings.cpp index cb6a09d98..9c5635815 100644 --- a/src/duckdb/src/main/settings/custom_settings.cpp +++ b/src/duckdb/src/main/settings/custom_settings.cpp @@ -373,56 +373,37 @@ Value CheckpointThresholdSetting::GetSetting(const ClientContext &context) { //===----------------------------------------------------------------------===// bool IsEnabledOptimizer(MetricType metric, const set &disabled_optimizers) { auto matching_optimizer_type = MetricsUtils::GetOptimizerTypeByMetric(metric); - if (matching_optimizer_type != OptimizerType::INVALID && - disabled_optimizers.find(matching_optimizer_type) == disabled_optimizers.end()) { - return true; - } - return false; + return matching_optimizer_type != OptimizerType::INVALID && + disabled_optimizers.find(matching_optimizer_type) == disabled_optimizers.end(); } -static profiler_settings_t FillTreeNodeSettings(unordered_map &input, - const set &disabled_optimizers) { - profiler_settings_t metrics; +template +static profiler_settings_t ExtractSettings(ExtractFromType extract_from, const set &disabled_optimizers, + vector &invalid_settings) { + profiler_settings_t enabled_metrics; + + auto insert_if_enabled = [&](MetricType m) { + if (!MetricsUtils::IsOptimizerMetric(m) || IsEnabledOptimizer(m, disabled_optimizers)) { + enabled_metrics.insert(m); + } + }; - string invalid_settings; - for (auto &entry : input) { - MetricType setting; - MetricGroup group = MetricGroup::INVALID; + extract_from([&](const std::string &metric) { + const auto upper = StringUtil::Upper(metric); try { - setting = EnumUtil::FromString(StringUtil::Upper(entry.first)); - } catch (std::exception &ex) { + insert_if_enabled(EnumUtil::FromString(upper)); + } catch (std::exception &) { try { - group = EnumUtil::FromString(StringUtil::Upper(entry.first)); - } catch (std::exception &ex) { - if (!invalid_settings.empty()) { - invalid_settings += ", "; - } - invalid_settings += entry.first; - continue; - } - } - if (group != MetricGroup::INVALID) { - if (entry.second == "true") { - auto group_metrics = MetricsUtils::GetMetricsByGroupType(group); - for (auto &metric : group_metrics) { - if (!MetricsUtils::IsOptimizerMetric(metric) || IsEnabledOptimizer(metric, disabled_optimizers)) { - metrics.insert(metric); - } + auto group = EnumUtil::FromString(upper); + for (auto &converted_metric : MetricsUtils::GetMetricsByGroupType(group)) { + insert_if_enabled(converted_metric); } + } catch (std::exception &) { + invalid_settings.push_back(metric); } - continue; - } - - if (StringUtil::Lower(entry.second) == "true" && - (!MetricsUtils::IsOptimizerMetric(setting) || IsEnabledOptimizer(setting, disabled_optimizers))) { - metrics.insert(setting); } - } - - if (!invalid_settings.empty()) { - throw IOException("Invalid custom profiler settings: \"%s\"", invalid_settings); - } - return metrics; + }); + return enabled_metrics; } void AddOptimizerMetrics(profiler_settings_t &settings, const set &disabled_optimizers) { @@ -436,45 +417,146 @@ void AddOptimizerMetrics(profiler_settings_t &settings, const set } } -void CustomProfilingSettingsSetting::SetLocal(ClientContext &context, const Value &input) { - auto &config = ClientConfig::GetConfig(context); +void ExtractFromList(ClientConfig &config, profiler_settings_t &enabled_metrics, vector &invalid_settings, + const Value &input, const set &disabled_optimizers) { + config.profiler_settings_type = LogicalTypeId::LIST; + + enabled_metrics = ExtractSettings( + [&](const std::function &func) { + for (auto &val : ListValue::GetChildren(input)) { + func(val.GetValue()); + } + }, + disabled_optimizers, invalid_settings); +} + +void ExtractFromStruct(ClientConfig &config, profiler_settings_t &enabled_metrics, vector &invalid_settings, + const Value &input, const set &disabled_optimizers) { + config.profiler_settings_type = LogicalTypeId::STRUCT; + + enabled_metrics = ExtractSettings( + [&](const std::function &func) { + auto &children = StructValue::GetChildren(input); + for (idx_t i = 0; i < children.size(); i++) { + auto child_val = children[i]; + if ((child_val.type() == LogicalType::BOOLEAN && child_val.GetValue() == true) || + StringUtil::Lower(child_val.ToString()) == "true") { + func(StructType::GetChildName(input.type(), i)); + } + } + }, + disabled_optimizers, invalid_settings); +} + +void ExtractFromJSON(ClientConfig &config, profiler_settings_t &enabled_metrics, vector &invalid_settings, + const Value &input, const set &disabled_optimizers) { + config.profiler_settings_type = LogicalTypeId::VARCHAR; - // parse the file content - unordered_map input_json; + // JSON string: parse, then accept entries with value == "true" + std::unordered_map json; try { - input_json = StringUtil::ParseJSONMap(input.ToString())->Flatten(); + json = StringUtil::ParseJSONMap(input.ToString())->Flatten(); } catch (std::exception &ex) { - throw IOException("Could not parse the custom profiler settings file due to incorrect JSON: \"%s\". Make sure " - "all the keys and values start with a quote. ", - input.ToString()); + throw IOException("Could not parse the custom profiler settings file due to incorrect JSON: \"%s\". Make " + "sure all the keys and values start with a quote. (error: %s)", + input.ToString(), ex.what()); + } + + enabled_metrics = ExtractSettings( + [&](const std::function &func) { + for (auto &entry : json) { + if (StringUtil::Lower(entry.second) == "true") { + func(entry.first); + } + } + }, + disabled_optimizers, invalid_settings); +} + +void ConstructInvalidSettingsAndThrow(const vector &invalid_settings) { + string invalid_settings_str; + for (auto &invalid_setting : invalid_settings) { + if (!invalid_settings_str.empty()) { + invalid_settings_str += ", "; + } + invalid_settings_str += invalid_setting; } + throw IOException("Invalid custom profiler settings: \"%s\"", invalid_settings_str); +} + +void CustomProfilingSettingsSetting::SetLocal(ClientContext &context, const Value &input) { + auto &config = ClientConfig::GetConfig(context); - config.enable_profiler = true; auto &db_config = DBConfig::GetConfig(context); auto &disabled_optimizers = db_config.options.disabled_optimizers; - auto settings = FillTreeNodeSettings(input_json, disabled_optimizers); - AddOptimizerMetrics(settings, disabled_optimizers); - config.profiler_settings = settings; + vector invalid_settings; + profiler_settings_t enabled_metrics; + if (input.type() == LogicalType::LIST(LogicalType::VARCHAR)) { + ExtractFromList(config, enabled_metrics, invalid_settings, input, disabled_optimizers); + } else if (input.type().id() == LogicalTypeId::STRUCT) { + ExtractFromStruct(config, enabled_metrics, invalid_settings, input, disabled_optimizers); + } else if (input.type() == LogicalType::VARCHAR) { + ExtractFromJSON(config, enabled_metrics, invalid_settings, input, disabled_optimizers); + } else { + throw ParserException("Invalid custom profiler settings type \"%s\", expected LIST(VARCHAR) or JSON", + input.type().ToString()); + } + + if (!invalid_settings.empty()) { + ConstructInvalidSettingsAndThrow(invalid_settings); + } + + AddOptimizerMetrics(enabled_metrics, disabled_optimizers); + config.enable_profiler = true; + config.profiler_settings = enabled_metrics; } void CustomProfilingSettingsSetting::ResetLocal(ClientContext &context) { auto &config = ClientConfig::GetConfig(context); config.enable_profiler = ClientConfig().enable_profiler; config.profiler_settings = MetricsUtils::GetDefaultMetrics(); + config.profiler_settings_type = LogicalTypeId::VARCHAR; } Value CustomProfilingSettingsSetting::GetSetting(const ClientContext &context) { auto &config = ClientConfig::GetConfig(context); - string profiling_settings_str; + set enabled_settings; for (auto &entry : config.profiler_settings) { - if (!profiling_settings_str.empty()) { - profiling_settings_str += ", "; + enabled_settings.insert(EnumUtil::ToString(entry)); + } + + switch (config.profiler_settings_type) { + case LogicalTypeId::VARCHAR: { + // i.e. JSON + string profiling_settings_str; + for (auto &entry : enabled_settings) { + if (!profiling_settings_str.empty()) { + profiling_settings_str += ", "; + } + profiling_settings_str += "\"" + entry + "\": \"true\""; } - profiling_settings_str += StringUtil::Format("\"%s\": \"true\"", EnumUtil::ToString(entry)); + + return Value(StringUtil::Format("{%s}", profiling_settings_str)); + } + case LogicalTypeId::STRUCT: { + child_list_t children; + for (auto &entry : enabled_settings) { + children.emplace_back(entry, Value::BOOLEAN(true)); + } + return Value::STRUCT(std::move(children)); + } + case LogicalTypeId::LIST: { + vector children; + for (auto &entry : enabled_settings) { + children.emplace_back(entry); + } + return Value::LIST(std::move(children)); + } + default: + throw InternalException("Invalid custom profiler settings type"); } - return Value(StringUtil::Format("{%s}", profiling_settings_str)); } //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp index 7417b8112..b44a9f5d5 100644 --- a/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +++ b/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp @@ -1,6 +1,6 @@ #include "duckdb/common/assert.hpp" +#include "duckdb/common/column_index.hpp" #include "duckdb/common/enums/expression_type.hpp" -#include "duckdb/common/enums/tuple_data_layout_enums.hpp" #include "duckdb/common/helper.hpp" #include "duckdb/common/numeric_utils.hpp" #include "duckdb/common/types.hpp" @@ -18,6 +18,7 @@ #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/storage/statistics/base_statistics.hpp" #include "duckdb/storage/statistics/string_stats.hpp" +#include "duckdb/storage/storage_index.hpp" namespace duckdb { @@ -158,10 +159,6 @@ void StatisticsPropagator::TryExecuteAggregates(LogicalAggregate &aggr, unique_p // GET does not support getting the partition stats return; } - if (!get.table_filters.filters.empty()) { - // we cannot do this if the GET has filters - return; - } if (get.extra_info.sample_options) { // only use row group statistics if we query the whole table return; @@ -188,6 +185,60 @@ void StatisticsPropagator::TryExecuteAggregates(LogicalAggregate &aggr, unique_p vector types; vector> agg_results; + // we can keep execute eager aggregate if all partitions could be either filtered entirely or remained entirely + if (!get.table_filters.filters.empty()) { + map>> filter_storage_index_map; + for (auto &entry : get.table_filters.filters) { + auto col_idx = entry.first; + auto &filter = entry.second; + auto column_index = ColumnIndex(col_idx); + StorageIndex storage_index; + if (!get.TryGetStorageIndex(column_index, storage_index)) { + return; + } + filter_storage_index_map.emplace(storage_index, filter); + } + vector remaining_partition_stats; + for (auto &stats : partition_stats) { + if (!stats.partition_row_group) { + return; + } + auto filter_result = FilterPropagateResult::FILTER_ALWAYS_TRUE; + for (auto &entry : filter_storage_index_map) { + auto &storage_index = entry.first; + auto &filter = entry.second; + auto prg = stats.partition_row_group; + if (!prg) { + return; + } + auto column_stats = prg->GetColumnStatistics(storage_index); + if (!column_stats) { + return; + } + auto col_filter_result = filter.get()->CheckStatistics(*column_stats); + if (col_filter_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) { + // all data in this partition is filtered out, remove this partition entirely + filter_result = FilterPropagateResult::FILTER_ALWAYS_FALSE; + break; + } + if (col_filter_result != FilterPropagateResult::FILTER_ALWAYS_TRUE) { + filter_result = col_filter_result; + } + } + switch (filter_result) { + case FilterPropagateResult::FILTER_ALWAYS_TRUE: + // all filters passed - this partition should keep execute eager aggregate + remaining_partition_stats.push_back(std::move(stats)); + break; + case FilterPropagateResult::FILTER_ALWAYS_FALSE: + break; + default: + // any filter that is not always true/false - bail + return; + } + } + partition_stats = std::move(remaining_partition_stats); + } if (!min_max_bindings.empty()) { // Execute min/max aggregates on partition statistics diff --git a/src/duckdb/src/planner/bind_context.cpp b/src/duckdb/src/planner/bind_context.cpp index cc1b3d25e..470a8bc37 100644 --- a/src/duckdb/src/planner/bind_context.cpp +++ b/src/duckdb/src/planner/bind_context.cpp @@ -450,6 +450,7 @@ struct ExclusionListInfo { vector> &new_select_list; case_insensitive_set_t excluded_columns; qualified_column_set_t excluded_qualified_columns; + case_insensitive_set_t replaced_columns; }; bool CheckExclusionList(StarExpression &expr, const QualifiedColumnName &qualified_name, ExclusionListInfo &info) { @@ -457,22 +458,27 @@ bool CheckExclusionList(StarExpression &expr, const QualifiedColumnName &qualifi info.excluded_qualified_columns.insert(qualified_name); return true; } - auto entry = expr.replace_list.find(qualified_name.column); - if (entry != expr.replace_list.end()) { - auto new_entry = entry->second->Copy(); - new_entry->SetAlias(entry->first); - info.excluded_columns.insert(entry->first); - info.new_select_list.push_back(std::move(new_entry)); - return true; - } return false; } -void HandleRename(StarExpression &expr, const QualifiedColumnName &qualified_name, ParsedExpression &new_expr) { +bool HandleRename(StarExpression &expr, const QualifiedColumnName &qualified_name, + unique_ptr &new_expr, ExclusionListInfo &info) { + auto replace_entry = expr.replace_list.find(qualified_name.column); + if (replace_entry != expr.replace_list.end()) { + if (info.replaced_columns.find(replace_entry->first) == info.replaced_columns.end()) { + new_expr = replace_entry->second->Copy(); + new_expr->SetAlias(replace_entry->first); + info.replaced_columns.insert(replace_entry->first); + info.excluded_columns.insert(replace_entry->first); + } else { + return false; + } + } auto rename_entry = expr.rename_list.find(qualified_name); if (rename_entry != expr.rename_list.end()) { - new_expr.SetAlias(rename_entry->second); + new_expr->SetAlias(rename_entry->second); } + return true; } void BindContext::GenerateAllColumnExpressions(StarExpression &expr, @@ -507,26 +513,32 @@ void BindContext::GenerateAllColumnExpressions(StarExpression &expr, // we have not! output the using column if (!using_binding.primary_binding.IsSet()) { // no primary binding: output a coalesce - auto coalesce = make_uniq(ExpressionType::OPERATOR_COALESCE); + auto coalesce = + make_uniq_base(ExpressionType::OPERATOR_COALESCE); for (auto &child_binding : using_binding.bindings) { - coalesce->children.push_back(make_uniq(column_name, child_binding)); + coalesce->Cast().children.push_back( + make_uniq(column_name, child_binding)); } coalesce->SetAlias(column_name); - HandleRename(expr, qualified_column, *coalesce); - new_select_list.push_back(std::move(coalesce)); + if (HandleRename(expr, qualified_column, coalesce, exclusion_info)) { + new_select_list.push_back(std::move(coalesce)); + } } else { // primary binding: output the qualified column ref - auto new_expr = make_uniq(column_name, using_binding.primary_binding); - HandleRename(expr, qualified_column, *new_expr); - new_select_list.push_back(std::move(new_expr)); + auto new_expr = make_uniq_base( + column_name, using_binding.primary_binding); + if (HandleRename(expr, qualified_column, new_expr, exclusion_info)) { + new_select_list.push_back(std::move(new_expr)); + } } handled_using_columns.insert(using_binding); continue; } auto new_expr = CreateColumnReference(binding_alias, column_name, ColumnBindType::DO_NOT_EXPAND_GENERATED_COLUMNS); - HandleRename(expr, qualified_column, *new_expr); - new_select_list.push_back(std::move(new_expr)); + if (HandleRename(expr, qualified_column, new_expr, exclusion_info)) { + new_select_list.push_back(std::move(new_expr)); + } } } } else { @@ -563,9 +575,10 @@ void BindContext::GenerateAllColumnExpressions(StarExpression &expr, continue; } column_names[2] = child.first; - auto new_expr = make_uniq(column_names); - HandleRename(expr, qualified_name, *new_expr); - new_select_list.push_back(std::move(new_expr)); + unique_ptr new_expr = make_uniq(column_names); + if (HandleRename(expr, qualified_name, new_expr, exclusion_info)) { + new_select_list.push_back(std::move(new_expr)); + } } } else { for (auto &column_name : column_names) { @@ -575,8 +588,9 @@ void BindContext::GenerateAllColumnExpressions(StarExpression &expr, } auto new_expr = CreateColumnReference(binding_alias, column_name, ColumnBindType::DO_NOT_EXPAND_GENERATED_COLUMNS); - HandleRename(expr, qualified_name, *new_expr); - new_select_list.push_back(std::move(new_expr)); + if (HandleRename(expr, qualified_name, new_expr, exclusion_info)) { + new_select_list.push_back(std::move(new_expr)); + } } } } diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index aaecfe71e..c7e3fee70 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -1418,19 +1418,19 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &deserializer) { // GetPartitionStats //===--------------------------------------------------------------------===// struct DuckDBPartitionRowGroup : public PartitionRowGroup { - explicit DuckDBPartitionRowGroup(const RowGroup &row_group_p, bool is_exact_p) - : row_group(row_group_p), is_exact(is_exact_p) { + explicit DuckDBPartitionRowGroup(shared_ptr row_group_p, bool is_exact_p) + : row_group(std::move(row_group_p)), is_exact(is_exact_p) { } - const RowGroup &row_group; + shared_ptr row_group; const bool is_exact; unique_ptr GetColumnStatistics(const StorageIndex &storage_index) override { - return row_group.GetStatistics(storage_index); + return row_group->GetStatistics(storage_index); } bool MinMaxIsExact(const BaseStatistics &stats, const StorageIndex &) override { - if (!is_exact || row_group.HasChanges()) { + if (!is_exact || row_group->HasChanges()) { return false; } if (stats.GetStatsType() == StatisticsType::STRING_STATS) { @@ -1444,17 +1444,19 @@ struct DuckDBPartitionRowGroup : public PartitionRowGroup { } }; -PartitionStatistics RowGroup::GetPartitionStats(idx_t row_group_start) { +PartitionStatistics RowGroup::GetPartitionStats(SegmentNode &row_group) { + auto &row_group_ref = row_group.GetNode(); + PartitionStatistics result; - result.row_start = row_group_start; - result.count = count; - if (HasUnloadedDeletes() || version_info.load().get()) { + result.row_start = row_group.GetRowStart(); + result.count = row_group_ref.count; + if (row_group_ref.HasUnloadedDeletes() || row_group_ref.GetVersionInfoIfLoaded()) { // we have version info - approx count result.count_type = CountType::COUNT_APPROXIMATE; - result.partition_row_group = make_shared_ptr(*this, false); + result.partition_row_group = make_shared_ptr(row_group.ReferenceNode(), false); } else { result.count_type = CountType::COUNT_EXACT; - result.partition_row_group = make_shared_ptr(*this, true); + result.partition_row_group = make_shared_ptr(row_group.ReferenceNode(), true); } return result; diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 4069e2a91..cf4af0034 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -3,7 +3,6 @@ #include "duckdb/common/serializer/binary_deserializer.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/execution/index/bound_index.hpp" -#include "duckdb/execution/task_error_manager.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/parallel/task_executor.hpp" #include "duckdb/planner/constraints/bound_not_null_constraint.hpp" @@ -1724,8 +1723,7 @@ vector RowGroupCollection::GetPartitionStats() const { vector result; auto row_groups = GetRowGroups(); for (auto &entry : row_groups->SegmentNodes()) { - auto &row_group = entry.GetNode(); - result.push_back(row_group.GetPartitionStats(entry.GetRowStart())); + result.push_back(RowGroup::GetPartitionStats(entry)); } return result; } diff --git a/src/duckdb/ub_src_function_table_system.cpp b/src/duckdb/ub_src_function_table_system.cpp index 5ca818791..567670d0c 100644 --- a/src/duckdb/ub_src_function_table_system.cpp +++ b/src/duckdb/ub_src_function_table_system.cpp @@ -52,6 +52,8 @@ #include "src/function/table/system/duckdb_views.cpp" +#include "src/function/table/system/enable_profiling.cpp" + #include "src/function/table/system/logging_utils.cpp" #include "src/function/table/system/pragma_collations.cpp"