diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp
index 15b7dd18f..8694c4370 100644
--- a/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp
+++ b/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp
@@ -234,15 +234,12 @@ struct BaseModeFunction {
 	}
 
 	template <class STATE, class OP>
-	static void Combine(const STATE &source, STATE &target, AggregateInputData &) {
+	static void Combine(const STATE &source, STATE &target, AggregateInputData &aggr_input_data) {
 		if (!source.frequency_map) {
 			return;
 		}
 		if (!target.frequency_map) {
-			// Copy - don't destroy! Otherwise windowing will break.
-			target.frequency_map = new typename STATE::Counts(*source.frequency_map);
-			target.count = source.count;
-			return;
+			target.frequency_map = TYPE_OP::CreateEmpty(aggr_input_data.allocator);
 		}
 		for (auto &val : *source.frequency_map) {
 			auto &i = (*target.frequency_map)[val.first];
diff --git a/src/duckdb/src/common/adbc/adbc.cpp b/src/duckdb/src/common/adbc/adbc.cpp
index b461a88c2..63f4c0a29 100644
--- a/src/duckdb/src/common/adbc/adbc.cpp
+++ b/src/duckdb/src/common/adbc/adbc.cpp
@@ -1320,12 +1320,21 @@ AdbcStatusCode StatementSetOption(struct AdbcStatement *statement, const char *k
 	return ADBC_STATUS_INVALID_ARGUMENT;
 }
 
+std::string createFilter(const char *input) {
+	if (input) {
+		auto quoted = duckdb::KeywordHelper::WriteQuoted(input, '\'');
+		return quoted;
+	}
+	return "'%'";
+}
+
 AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
                                     const char *db_schema, const char *table_name, const char **table_type,
                                     const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
-	std::string catalog_filter = catalog ? catalog : "%";
-	std::string db_schema_filter = db_schema ? db_schema : "%";
-	std::string table_name_filter = table_name ? table_name : "%";
+	std::string catalog_filter = createFilter(catalog);
+	std::string db_schema_filter = createFilter(db_schema);
+	std::string table_name_filter = createFilter(table_name);
+	std::string column_name_filter = createFilter(column_name);
 	std::string table_type_condition = "";
 	if (table_type && table_type[0]) {
 		table_type_condition = " AND table_type IN (";
@@ -1341,13 +1350,10 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 			if (i > 0) {
 				table_type_condition += ", ";
 			}
-			table_type_condition += "'";
-			table_type_condition += table_type[i];
-			table_type_condition += "'";
+			table_type_condition += createFilter(table_type[i]);
 		}
 		table_type_condition += ")";
 	}
-	std::string column_name_filter = column_name ? column_name : "%";
 
 	std::string query;
 	switch (depth) {
@@ -1392,7 +1398,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					)[] catalog_db_schemas
 				FROM
 					information_schema.schemata
-				WHERE catalog_name LIKE '%s'
+				WHERE catalog_name LIKE %s
 				GROUP BY catalog_name
 				)",
 		                                   catalog_filter);
@@ -1405,7 +1411,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 						catalog_name,
 						schema_name,
 					FROM information_schema.schemata
-					WHERE schema_name LIKE '%s'
+					WHERE schema_name LIKE %s
 				)
 
 				SELECT
@@ -1448,7 +1454,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					information_schema.schemata
 				LEFT JOIN db_schemas dbs
 				USING (catalog_name, schema_name)
-				WHERE catalog_name LIKE '%s'
+				WHERE catalog_name LIKE %s
 				GROUP BY catalog_name
 				)",
 		                                   db_schema_filter, catalog_filter);
@@ -1492,7 +1498,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 							)[],
 						}) db_schema_tables
 					FROM information_schema.tables
-					WHERE table_name LIKE '%s'%s
+					WHERE table_name LIKE %s%s
 					GROUP BY table_catalog, table_schema
 				),
 				db_schemas AS (
@@ -1503,7 +1509,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					FROM information_schema.schemata
 					LEFT JOIN tables
 					USING (catalog_name, schema_name)
-					WHERE schema_name LIKE '%s'
+					WHERE schema_name LIKE %s
 				)
 
 				SELECT
@@ -1516,7 +1522,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					information_schema.schemata
 				LEFT JOIN db_schemas dbs
 				USING (catalog_name, schema_name)
-				WHERE catalog_name LIKE '%s'
+				WHERE catalog_name LIKE %s
 				GROUP BY catalog_name
 				)",
 		                                   table_name_filter, table_type_condition, db_schema_filter, catalog_filter);
@@ -1551,7 +1557,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 							xdbc_is_generatedcolumn: NULL::BOOLEAN,
 						}) table_columns
 					FROM information_schema.columns
-					WHERE column_name LIKE '%s'
+					WHERE column_name LIKE %s
 					GROUP BY table_catalog, table_schema, table_name
 				),
 				constraints AS (
@@ -1580,7 +1586,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 							constraint_column_names,
 							list_filter(
 								constraint_column_names,
-								lambda name: name LIKE '%s'
+								lambda name: name LIKE %s
 							)
 						)
 					GROUP BY database_name, schema_name, table_name
@@ -1600,7 +1606,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					USING (table_catalog, table_schema, table_name)
 					LEFT JOIN constraints
 					USING (table_catalog, table_schema, table_name)
-					WHERE table_name LIKE '%s'%s
+					WHERE table_name LIKE %s%s
 					GROUP BY table_catalog, table_schema
 				),
 				db_schemas AS (
@@ -1611,7 +1617,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					FROM information_schema.schemata
 					LEFT JOIN tables
 					USING (catalog_name, schema_name)
-					WHERE schema_name LIKE '%s'
+					WHERE schema_name LIKE %s
 				)
 
 				SELECT
@@ -1624,7 +1630,7 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
 					information_schema.schemata
 				LEFT JOIN db_schemas dbs
 				USING (catalog_name, schema_name)
-				WHERE catalog_name LIKE '%s'
+				WHERE catalog_name LIKE %s
 				GROUP BY catalog_name
 				)",
 		                                   column_name_filter, column_name_filter, table_name_filter,
diff --git a/src/duckdb/src/common/arrow/schema_metadata.cpp b/src/duckdb/src/common/arrow/schema_metadata.cpp
index 0728df9a1..d408d2bb3 100644
--- a/src/duckdb/src/common/arrow/schema_metadata.cpp
+++ b/src/duckdb/src/common/arrow/schema_metadata.cpp
@@ -97,13 +97,13 @@ unsafe_unique_array<char> ArrowSchemaMetadata::SerializeMetadata() const {
 	auto metadata_array_ptr = make_unsafe_uniq_array<char>(total_size);
 	auto metadata_ptr = metadata_array_ptr.get();
 	// 1. number of key-value pairs (int32)
-	const idx_t map_size = schema_metadata_map.size();
+	const int32_t map_size = static_cast<int32_t>(schema_metadata_map.size());
 	memcpy(metadata_ptr, &map_size, sizeof(int32_t));
 	metadata_ptr += sizeof(int32_t);
 	// Iterate through each key-value pair in the map
 	for (const auto &pair : schema_metadata_map) {
 		const std::string &key = pair.first;
-		idx_t key_size = key.size();
+		int32_t key_size = static_cast<int32_t>(key.size());
 		// Length of the key (int32)
 		memcpy(metadata_ptr, &key_size, sizeof(int32_t));
 		metadata_ptr += sizeof(int32_t);
@@ -111,7 +111,7 @@ unsafe_unique_array<char> ArrowSchemaMetadata::SerializeMetadata() const {
 		memcpy(metadata_ptr, key.c_str(), key_size);
 		metadata_ptr += key_size;
 		const std::string &value = pair.second;
-		const idx_t value_size = value.size();
+		const int32_t value_size = static_cast<int32_t>(value.size());
 		// Length of the value (int32)
 		memcpy(metadata_ptr, &value_size, sizeof(int32_t));
 		metadata_ptr += sizeof(int32_t);
diff --git a/src/duckdb/src/common/enum_util.cpp b/src/duckdb/src/common/enum_util.cpp
index d2bb2f772..cfb6de9af 100644
--- a/src/duckdb/src/common/enum_util.cpp
+++ b/src/duckdb/src/common/enum_util.cpp
@@ -3029,6 +3029,7 @@ const StringUtil::EnumStringLiteral *GetMetricTypeValues() {
 		{ static_cast<uint32_t>(MetricType::OPTIMIZER_CTE_INLINING), "OPTIMIZER_CTE_INLINING" },
 		{ static_cast<uint32_t>(MetricType::OPTIMIZER_COMMON_SUBPLAN), "OPTIMIZER_COMMON_SUBPLAN" },
 		{ static_cast<uint32_t>(MetricType::OPTIMIZER_JOIN_ELIMINATION), "OPTIMIZER_JOIN_ELIMINATION" },
+		{ static_cast<uint32_t>(MetricType::OPTIMIZER_COUNT_WINDOW_ELIMINATION), "OPTIMIZER_COUNT_WINDOW_ELIMINATION" },
 		{ static_cast<uint32_t>(MetricType::ALL_OPTIMIZERS), "ALL_OPTIMIZERS" },
 		{ static_cast<uint32_t>(MetricType::CUMULATIVE_OPTIMIZER_TIMING), "CUMULATIVE_OPTIMIZER_TIMING" },
 		{ static_cast<uint32_t>(MetricType::PHYSICAL_PLANNER), "PHYSICAL_PLANNER" },
@@ -3043,12 +3044,12 @@ const StringUtil::EnumStringLiteral *GetMetricTypeValues() {
 
 template<>
 const char* EnumUtil::ToChars<MetricType>(MetricType value) {
-	return StringUtil::EnumToString(GetMetricTypeValues(), 66, "MetricType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetMetricTypeValues(), 67, "MetricType", static_cast<uint32_t>(value));
 }
 
 template<>
 MetricType EnumUtil::FromString<MetricType>(const char *value) {
-	return static_cast<MetricType>(StringUtil::StringToEnum(GetMetricTypeValues(), 66, "MetricType", value));
+	return static_cast<MetricType>(StringUtil::StringToEnum(GetMetricTypeValues(), 67, "MetricType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetMultiFileColumnMappingModeValues() {
@@ -3284,19 +3285,20 @@ const StringUtil::EnumStringLiteral *GetOptimizerTypeValues() {
 		{ static_cast<uint32_t>(OptimizerType::LATE_MATERIALIZATION), "LATE_MATERIALIZATION" },
 		{ static_cast<uint32_t>(OptimizerType::CTE_INLINING), "CTE_INLINING" },
 		{ static_cast<uint32_t>(OptimizerType::COMMON_SUBPLAN), "COMMON_SUBPLAN" },
-		{ static_cast<uint32_t>(OptimizerType::JOIN_ELIMINATION), "JOIN_ELIMINATION" }
+		{ static_cast<uint32_t>(OptimizerType::JOIN_ELIMINATION), "JOIN_ELIMINATION" },
+		{ static_cast<uint32_t>(OptimizerType::COUNT_WINDOW_ELIMINATION), "COUNT_WINDOW_ELIMINATION" }
 	};
 	return values;
 }
 
 template<>
 const char* EnumUtil::ToChars<OptimizerType>(OptimizerType value) {
-	return StringUtil::EnumToString(GetOptimizerTypeValues(), 33, "OptimizerType", static_cast<uint32_t>(value));
+	return StringUtil::EnumToString(GetOptimizerTypeValues(), 34, "OptimizerType", static_cast<uint32_t>(value));
 }
 
 template<>
 OptimizerType EnumUtil::FromString<OptimizerType>(const char *value) {
-	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 33, "OptimizerType", value));
+	return static_cast<OptimizerType>(StringUtil::StringToEnum(GetOptimizerTypeValues(), 34, "OptimizerType", value));
 }
 
 const StringUtil::EnumStringLiteral *GetOrderByNullTypeValues() {
diff --git a/src/duckdb/src/common/enums/optimizer_type.cpp b/src/duckdb/src/common/enums/optimizer_type.cpp
index f62af9626..353073f2a 100644
--- a/src/duckdb/src/common/enums/optimizer_type.cpp
+++ b/src/duckdb/src/common/enums/optimizer_type.cpp
@@ -45,6 +45,7 @@ static const DefaultOptimizerType internal_optimizer_types[] = {
     {"cte_inlining", OptimizerType::CTE_INLINING},
     {"common_subplan", OptimizerType::COMMON_SUBPLAN},
     {"join_elimination", OptimizerType::JOIN_ELIMINATION},
+    {"count_window_elimination", OptimizerType::COUNT_WINDOW_ELIMINATION},
     {nullptr, OptimizerType::INVALID}};
 
 string OptimizerTypeToString(OptimizerType type) {
diff --git a/src/duckdb/src/common/types/geometry.cpp b/src/duckdb/src/common/types/geometry.cpp
index d565d36f8..cc9bacfda 100644
--- a/src/duckdb/src/common/types/geometry.cpp
+++ b/src/duckdb/src/common/types/geometry.cpp
@@ -16,7 +16,8 @@ class BlobWriter {
 public:
 	template <class T>
 	void Write(const T &value) {
-		auto ptr = reinterpret_cast<const char *>(&value);
+		auto le_value = BSwapIfBE(value);
+		auto ptr = reinterpret_cast<const char *>(&le_value);
 		buffer.insert(buffer.end(), ptr, ptr + sizeof(T));
 	}
 
@@ -38,16 +39,12 @@ class BlobWriter {
 		if (reserved.offset + sizeof(T) > buffer.size()) {
 			throw InternalException("Write out of bounds in BinaryWriter");
 		}
-		auto ptr = reinterpret_cast<const char *>(&reserved.value);
+		auto le_value = BSwapIfBE(reserved.value);
+		auto ptr = reinterpret_cast<const char *>(&le_value);
 		// We've reserved 0 bytes, so we can safely memcpy
 		memcpy(buffer.data() + reserved.offset, ptr, sizeof(T));
 	}
 
-	void Write(const char *data, size_t size) {
-		D_ASSERT(data != nullptr);
-		buffer.insert(buffer.end(), data, data + size);
-	}
-
 	const vector<char> &GetBuffer() const {
 		return buffer;
 	}
@@ -70,18 +67,11 @@ class FixedSizeBlobWriter {
 		if (pos + sizeof(T) > end) {
 			throw InvalidInputException("Writing beyond end of binary data at position %zu", pos - beg);
 		}
-		memcpy(pos, &value, sizeof(T));
+		auto le_value = BSwapIfBE(value);
+		memcpy(pos, &le_value, sizeof(T));
 		pos += sizeof(T);
 	}
 
-	void Write(const char *data, size_t size) {
-		if (pos + size > end) {
-			throw InvalidInputException("Writing beyond end of binary data at position %zu", pos - beg);
-		}
-		memcpy(pos, data, size);
-		pos += size;
-	}
-
 	size_t GetPosition() const {
 		return static_cast<idx_t>(pos - beg);
 	}
@@ -112,17 +102,9 @@ class BlobReader {
 			throw InvalidInputException("Unexpected end of binary data at position %zu", pos - beg);
 		}
 		T value;
-		if (LE) {
-			memcpy(&value, pos, sizeof(T));
-			pos += sizeof(T);
-		} else {
-			char temp[sizeof(T)];
-			for (size_t i = 0; i < sizeof(T); ++i) {
-				temp[i] = pos[sizeof(T) - 1 - i];
-			}
-			memcpy(&value, temp, sizeof(T));
-			pos += sizeof(T);
-		}
+		memcpy(&value, pos, sizeof(T));
+		value = LE ? BSwapIfBE(value) : BSwapIfLE(value);
+		pos += sizeof(T);
 		return value;
 	}
 
@@ -1060,9 +1042,20 @@ static uint32_t ParseVerticesInternal(BlobReader &reader, GeometryExtent &extent
 
 	// Issue a single .Reserve() for all vertices, to minimize bounds checking overhead
 	const auto ptr = const_data_ptr_cast(reader.Reserve(vert_count * sizeof(VERTEX_TYPE)));
-
+#if DUCKDB_IS_BIG_ENDIAN
+	double be_buffer[sizeof(VERTEX_TYPE)];
+	auto be_ptr = reinterpret_cast<const_data_ptr_t>(be_buffer);
+#endif
 	for (uint32_t vert_idx = 0; vert_idx < vert_count; vert_idx++) {
+#if DUCKDB_IS_BIG_ENDIAN
+		auto vert_ofs = vert_idx * sizeof(VERTEX_TYPE);
+		for (idx_t i = 0; i < sizeof(VERTEX_TYPE) / sizeof(double); ++i) {
+			be_buffer[i] = LoadLE<double>(ptr + vert_ofs + i * sizeof(double));
+		}
+		VERTEX_TYPE vertex = Load<VERTEX_TYPE>(be_ptr);
+#else
 		VERTEX_TYPE vertex = Load<VERTEX_TYPE>(ptr + vert_idx * sizeof(VERTEX_TYPE));
+#endif
 		if (check_nan && vertex.AllNan()) {
 			continue;
 		}
diff --git a/src/duckdb/src/common/types/hash.cpp b/src/duckdb/src/common/types/hash.cpp
index b8453ac3d..17505081e 100644
--- a/src/duckdb/src/common/types/hash.cpp
+++ b/src/duckdb/src/common/types/hash.cpp
@@ -84,7 +84,7 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 	// Hash/combine in blocks of 8 bytes
 	const auto remainder = len & 7U;
 	for (const auto end = ptr + len - remainder; ptr != end; ptr += 8U) {
-		h ^= Load<hash_t>(ptr);
+		h ^= LoadLE<hash_t>(ptr);
 		h *= 0xd6e8feb86659fd93U;
 	}
 
@@ -93,7 +93,7 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 			D_ASSERT(len >= 8);
 			// Load remaining (<8) bytes (with a Load instead of a memcpy)
 			const auto inv_rem = 8U - remainder;
-			const auto hr = Load<hash_t>(ptr - inv_rem) >> (inv_rem * 8U);
+			const auto hr = LoadLE<hash_t>(ptr - inv_rem) >> (inv_rem * 8U);
 
 			h ^= hr;
 			h *= 0xd6e8feb86659fd93U;
@@ -101,6 +101,7 @@ hash_t HashBytes(const_data_ptr_t ptr, const idx_t len) noexcept {
 			// Load remaining (<8) bytes (with a memcpy)
 			hash_t hr = 0;
 			memcpy(&hr, ptr, remainder);
+			hr = BSwapIfBE(hr);
 
 			h ^= hr;
 			h *= 0xd6e8feb86659fd93U;
@@ -122,7 +123,7 @@ hash_t Hash(string_t val) {
 
 		// Hash/combine the first 8-byte block
 		if (!val.Empty()) {
-			h ^= Load<hash_t>(const_data_ptr_cast(val.GetPrefix()));
+			h ^= LoadLE<hash_t>(const_data_ptr_cast(val.GetPrefix()));
 			h *= 0xd6e8feb86659fd93U;
 		}
 
@@ -130,6 +131,7 @@ hash_t Hash(string_t val) {
 		if (val.GetSize() > sizeof(hash_t)) {
 			hash_t hr = 0;
 			memcpy(&hr, const_data_ptr_cast(val.GetPrefix()) + sizeof(hash_t), 4U);
+			hr = BSwapIfBE(hr);
 
 			h ^= hr;
 			h *= 0xd6e8feb86659fd93U;
diff --git a/src/duckdb/src/common/types/vector.cpp b/src/duckdb/src/common/types/vector.cpp
index 7363f952a..f8b2d23e3 100644
--- a/src/duckdb/src/common/types/vector.cpp
+++ b/src/duckdb/src/common/types/vector.cpp
@@ -1,11 +1,8 @@
 #include "duckdb/common/types/vector.hpp"
 
-#include "duckdb/common/algorithm.hpp"
 #include "duckdb/common/assert.hpp"
 #include "duckdb/common/exception.hpp"
 #include "duckdb/common/fsst.hpp"
-#include "duckdb/common/operator/comparison_operators.hpp"
-#include "duckdb/common/pair.hpp"
 #include "duckdb/common/printer.hpp"
 #include "duckdb/common/serializer/deserializer.hpp"
 #include "duckdb/common/serializer/serializer.hpp"
@@ -20,11 +17,8 @@
 #include "duckdb/common/types/vector_cache.hpp"
 #include "duckdb/common/uhugeint.hpp"
 #include "duckdb/common/vector_operations/vector_operations.hpp"
-#include "duckdb/function/scalar/nested_functions.hpp"
 #include "duckdb/storage/buffer/buffer_handle.hpp"
-#include "duckdb/storage/string_uncompressed.hpp"
 #include "duckdb/common/types/uuid.hpp"
-#include "fsst.h"
 
 #include <cstring> // strlen() on Solaris
 namespace duckdb {
@@ -746,6 +740,9 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) {
 		auto str = reinterpret_cast<string_t *>(data)[index];
 		return Value::BIT(const_data_ptr_cast(str.GetData()), str.GetSize());
 	}
+	case LogicalTypeId::SQLNULL: {
+		return Value();
+	}
 	case LogicalTypeId::MAP: {
 		auto offlen = reinterpret_cast<list_entry_t *>(data)[index];
 		auto &child_vec = ListVector::GetEntry(*vector);
diff --git a/src/duckdb/src/execution/index/art/art.cpp b/src/duckdb/src/execution/index/art/art.cpp
index 770db8818..800135f0d 100644
--- a/src/duckdb/src/execution/index/art/art.cpp
+++ b/src/duckdb/src/execution/index/art/art.cpp
@@ -231,6 +231,9 @@ unique_ptr<IndexScanState> ART::TryInitializeScan(const Expression &expr, const
 	return InitializeScanSinglePredicate(high_value, high_comparison_type);
 }
 
+unique_ptr<IndexScanState> ART::InitializeFullScan() {
+	return make_uniq<ARTIndexScanState>();
+}
 //===--------------------------------------------------------------------===//
 // ART Keys
 //===--------------------------------------------------------------------===//
@@ -466,11 +469,6 @@ ErrorData ART::Insert(IndexLock &l, DataChunk &chunk, Vector &row_ids, IndexAppe
 	unsafe_vector<ARTKey> row_id_keys(row_count);
 	GenerateKeyVectors(arena, chunk, row_ids, keys, row_id_keys);
 
-	optional_ptr<ART> delete_art;
-	if (info.delete_index) {
-		delete_art = info.delete_index->Cast<ART>();
-	}
-
 	auto conflict_type = ARTConflictType::NO_CONFLICT;
 	optional_idx conflict_idx;
 	auto was_empty = !tree.HasMetadata();
@@ -481,7 +479,7 @@ ErrorData ART::Insert(IndexLock &l, DataChunk &chunk, Vector &row_ids, IndexAppe
 			continue;
 		}
 		conflict_type = ARTOperator::Insert(arena, *this, tree, keys[i], 0, row_id_keys[i], GateStatus::GATE_NOT_SET,
-		                                    delete_art, info.append_mode);
+		                                    DeleteIndexInfo(info.delete_indexes), info.append_mode);
 		if (conflict_type != ARTConflictType::NO_CONFLICT) {
 			conflict_idx = i;
 			break;
@@ -569,26 +567,37 @@ void ART::CommitDrop(IndexLock &index_lock) {
 	tree.Clear();
 }
 
-void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
+idx_t ART::TryDelete(IndexLock &state, DataChunk &entries, Vector &row_ids, optional_ptr<SelectionVector> deleted_sel,
+                     optional_ptr<SelectionVector> non_deleted_sel) {
 	// FIXME: We could pass a row_count in here, as we sometimes don't have to delete all row IDs in the chunk,
 	// FIXME: but rather all row IDs up to the conflicting row.
-	auto row_count = input.size();
+	auto row_count = entries.size();
 
 	DataChunk expr_chunk;
 	expr_chunk.Initialize(Allocator::DefaultAllocator(), logical_types);
-	ExecuteExpressions(input, expr_chunk);
+	ExecuteExpressions(entries, expr_chunk);
 
 	ArenaAllocator allocator(BufferAllocator::Get(db));
 	unsafe_vector<ARTKey> keys(row_count);
 	unsafe_vector<ARTKey> row_id_keys(row_count);
 	GenerateKeyVectors(allocator, expr_chunk, row_ids, keys, row_id_keys);
 
+	idx_t delete_count = 0;
 	for (idx_t i = 0; i < row_count; i++) {
-		if (keys[i].Empty()) {
-			continue;
+		bool deleted = true;
+		if (!keys[i].Empty()) {
+			D_ASSERT(tree.GetGateStatus() == GateStatus::GATE_NOT_SET);
+			deleted = ARTOperator::Delete(*this, tree, keys[i], row_id_keys[i]);
+		}
+		if (deleted) {
+			if (deleted_sel) {
+				deleted_sel->set_index(delete_count, i);
+			}
+			delete_count++;
+		} else if (non_deleted_sel) {
+			idx_t non_delete_count = i - delete_count;
+			non_deleted_sel->set_index(non_delete_count, i);
 		}
-		D_ASSERT(tree.GetGateStatus() == GateStatus::GATE_NOT_SET);
-		ARTOperator::Delete(*this, tree, keys[i], row_id_keys[i]);
 	}
 
 	if (!tree.HasMetadata()) {
@@ -608,11 +617,21 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
 		}
 	}
 #endif
+	return delete_count;
 }
 
 //===--------------------------------------------------------------------===//
 // Point and range lookups
 //===--------------------------------------------------------------------===//
+bool ART::FullScan(idx_t max_count, set<row_t> &row_ids) {
+	if (!tree.HasMetadata()) {
+		return true;
+	}
+	Iterator it(*this);
+	it.FindMinimum(tree);
+	ARTKey empty_key = ARTKey();
+	return it.Scan(empty_key, max_count, row_ids, false);
+}
 
 bool ART::SearchEqual(ARTKey &key, idx_t max_count, set<row_t> &row_ids) {
 	auto leaf = ARTOperator::Lookup(*this, tree, key, 0);
@@ -678,15 +697,20 @@ bool ART::SearchCloseRange(ARTKey &lower_bound, ARTKey &upper_bound, bool left_e
 
 bool ART::Scan(IndexScanState &state, const idx_t max_count, set<row_t> &row_ids) {
 	auto &scan_state = state.Cast<ARTIndexScanState>();
+	if (scan_state.values[0].IsNull()) {
+		// full scan
+		lock_guard<mutex> l(lock);
+		return FullScan(max_count, row_ids);
+	}
 	D_ASSERT(scan_state.values[0].type().InternalType() == types[0]);
 	ArenaAllocator arena_allocator(Allocator::Get(db));
 	auto key = ARTKey::CreateKey(arena_allocator, types[0], scan_state.values[0]);
 	auto max_len = MAX_KEY_LEN * prefix_count;
 	key.VerifyKeyLength(max_len);
 
+	lock_guard<mutex> l(lock);
 	if (scan_state.values[1].IsNull()) {
 		// Single predicate.
-		lock_guard<mutex> l(lock);
 		switch (scan_state.expressions[0]) {
 		case ExpressionType::COMPARE_EQUAL:
 			return SearchEqual(key, max_count, row_ids);
@@ -704,7 +728,6 @@ bool ART::Scan(IndexScanState &state, const idx_t max_count, set<row_t> &row_ids
 	}
 
 	// Two predicates.
-	lock_guard<mutex> l(lock);
 	D_ASSERT(scan_state.values[1].type().InternalType() == types[0]);
 	auto upper_bound = ARTKey::CreateKey(arena_allocator, types[0], scan_state.values[1]);
 	upper_bound.VerifyKeyLength(max_len);
@@ -758,39 +781,36 @@ string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, cons
 	}
 }
 
-void ART::VerifyLeaf(const Node &leaf, const ARTKey &key, optional_ptr<ART> delete_art, ConflictManager &manager,
+void ART::VerifyLeaf(const Node &leaf, const ARTKey &key, DeleteIndexInfo delete_index_info, ConflictManager &manager,
                      optional_idx &conflict_idx, idx_t i) {
-	// Fast path, the leaf is inlined, and the delete ART does not exist.
-	if (leaf.GetType() == NType::LEAF_INLINED && !delete_art) {
-		if (manager.AddHit(i, leaf.GetRowId())) {
-			conflict_idx = i;
-		}
-		return;
-	}
-
-	// Get the delete_leaf.
-	// All leaves in the delete ART are inlined.
-	unsafe_optional_ptr<const Node> deleted_leaf;
-	if (delete_art) {
-		deleted_leaf = ARTOperator::Lookup(*delete_art, delete_art->tree, key, 0);
-	}
-
-	// The leaf is inlined, and there is no deleted leaf with the same key.
-	if (leaf.GetType() == NType::LEAF_INLINED && !deleted_leaf) {
-		if (manager.AddHit(i, leaf.GetRowId())) {
-			conflict_idx = i;
+	// Get the set of deleted row ids for this value if we have any delete indexes
+	vector<row_t> deleted_row_ids;
+	if (delete_index_info.delete_indexes) {
+		for (auto &index : *delete_index_info.delete_indexes) {
+			auto &delete_art = index.get().Cast<ART>();
+			auto deleted_leaf = ARTOperator::Lookup(delete_art, delete_art.tree, key, 0);
+			if (!deleted_leaf) {
+				continue;
+			}
+			// All leaves in the delete ART are inlined.
+			if (deleted_leaf->GetType() != NType::LEAF_INLINED) {
+				throw InternalException("Non-inlined leaf?");
+			}
+			auto deleted_row_id = deleted_leaf->GetRowId();
+			deleted_row_ids.push_back(deleted_row_id);
 		}
-		return;
 	}
 
-	// The leaf is inlined, and the same key exists in the delete ART.
-	if (leaf.GetType() == NType::LEAF_INLINED && deleted_leaf) {
-		D_ASSERT(deleted_leaf->GetType() == NType::LEAF_INLINED);
-		auto deleted_row_id = deleted_leaf->GetRowId();
+	if (leaf.GetType() == NType::LEAF_INLINED) {
 		auto this_row_id = leaf.GetRowId();
-
-		if (deleted_row_id == this_row_id) {
-			return;
+		if (!deleted_row_ids.empty()) {
+			// The leaf is inlined, and the same key exists in the delete ART.
+			// check if the row-id matches - if it does there is no conflict
+			for (auto &deleted_row_id : deleted_row_ids) {
+				if (deleted_row_id == this_row_id) {
+					return;
+				}
+			}
 		}
 
 		if (manager.AddHit(i, this_row_id)) {
@@ -803,7 +823,7 @@ void ART::VerifyLeaf(const Node &leaf, const ARTKey &key, optional_ptr<ART> dele
 	// Up to here, the above code paths work implicitly for FKs, as the leaf is inlined.
 	// FIXME: proper foreign key + delete ART support.
 	if (index_constraint_type == IndexConstraintType::FOREIGN) {
-		D_ASSERT(!deleted_leaf);
+		D_ASSERT(deleted_row_ids.empty());
 		// We don't handle FK conflicts in UPSERT, so the row ID should not matter.
 		if (manager.AddHit(i, MAX_ROW_ID)) {
 			conflict_idx = i;
@@ -821,11 +841,12 @@ void ART::VerifyLeaf(const Node &leaf, const ARTKey &key, optional_ptr<ART> dele
 		throw InternalException("VerifyLeaf expects exactly two row IDs to be scanned");
 	}
 
-	if (deleted_leaf) {
-		auto deleted_row_id = deleted_leaf->GetRowId();
+	if (!deleted_row_ids.empty()) {
 		for (const auto row_id : row_ids) {
-			if (deleted_row_id == row_id) {
-				return;
+			for (auto deleted_row_id : deleted_row_ids) {
+				if (deleted_row_id == row_id) {
+					return;
+				}
 			}
 		}
 	}
@@ -850,11 +871,6 @@ void ART::VerifyConstraint(DataChunk &chunk, IndexAppendInfo &info, ConflictMana
 	unsafe_vector<ARTKey> keys(expr_chunk.size());
 	GenerateKeys<>(arena_allocator, expr_chunk, keys);
 
-	optional_ptr<ART> delete_art;
-	if (info.delete_index) {
-		delete_art = info.delete_index->Cast<ART>();
-	}
-
 	optional_idx conflict_idx;
 	for (idx_t i = 0; !conflict_idx.IsValid() && i < chunk.size(); i++) {
 		if (keys[i].Empty()) {
@@ -868,7 +884,7 @@ void ART::VerifyConstraint(DataChunk &chunk, IndexAppendInfo &info, ConflictMana
 		if (!leaf) {
 			continue;
 		}
-		VerifyLeaf(*leaf, keys[i], delete_art, manager, conflict_idx, i);
+		VerifyLeaf(*leaf, keys[i], DeleteIndexInfo(info.delete_indexes), manager, conflict_idx, i);
 	}
 
 	manager.FinishLookup();
@@ -882,6 +898,7 @@ void ART::VerifyConstraint(DataChunk &chunk, IndexAppendInfo &info, ConflictMana
 }
 
 string ART::GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index, DataChunk &input) {
+	lock_guard<mutex> l(lock);
 	auto key_name = GenerateErrorKeyName(input, failed_index);
 	auto exception_msg = GenerateConstraintErrorMessage(verify_type, key_name);
 	return exception_msg;
@@ -1053,13 +1070,19 @@ idx_t ART::GetInMemorySize(IndexLock &index_lock) {
 	return in_memory_size;
 }
 
-bool ART::RequiresTransactionality() const {
+bool ART::SupportsDeltaIndexes() const {
 	return true;
 }
 
-unique_ptr<BoundIndex> ART::CreateEmptyCopy(const string &name_prefix, IndexConstraintType constraint_type) const {
-	return make_uniq<ART>(name_prefix + name, constraint_type, GetColumnIds(), table_io_manager, unbound_expressions,
-	                      db);
+unique_ptr<BoundIndex> ART::CreateDeltaIndex(DeltaIndexType target_delta_index) const {
+	auto constraint_type = index_constraint_type;
+	if (target_delta_index == DeltaIndexType::DELETED_ROWS_IN_USE) {
+		// deleted_rows_in_use allows duplicates regardless of whether or not the main index is a unique index or not
+		constraint_type = IndexConstraintType::NONE;
+	}
+	auto result = make_uniq<ART>(name, constraint_type, GetColumnIds(), table_io_manager, unbound_expressions, db);
+	result->delta_index_type = target_delta_index;
+	return std::move(result);
 }
 
 //===-------------------------------------------------------------------===//
@@ -1190,11 +1213,7 @@ bool ART::MergeIndexes(IndexLock &state, BoundIndex &other_index) {
 
 	if (other_art.owns_data) {
 		if (prefix_count != other_art.prefix_count) {
-			// this ART uses the deprecated form and the other one does not - transform the other one prior to merging
-			if (prefix_count != Prefix::DEPRECATED_COUNT) {
-				throw InternalException("Failed to merge ARTs - other ART is deprecated but this one is not");
-			}
-			other_art.TransformToDeprecated();
+			throw InternalException("Failed to merge ARTs - prefix count does not match");
 		}
 		if (tree.HasMetadata()) {
 			// Fully deserialize other_index, and traverse it to increment its buffer IDs.
diff --git a/src/duckdb/src/execution/index/art/art_builder.cpp b/src/duckdb/src/execution/index/art/art_builder.cpp
index 92b719ea8..f6721a943 100644
--- a/src/duckdb/src/execution/index/art/art_builder.cpp
+++ b/src/duckdb/src/execution/index/art/art_builder.cpp
@@ -49,7 +49,7 @@ ARTConflictType ARTBuilder::Build() {
 			// We cannot iterate into the nested leaf with the builder
 			// because row IDs are not sorted.
 			for (idx_t i = entry.start; i < entry.start + row_id_count; i++) {
-				ARTOperator::Insert(arena, art, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, nullptr,
+				ARTOperator::Insert(arena, art, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, DeleteIndexInfo(),
 				                    IndexAppendMode::DEFAULT);
 			}
 			ref.get().SetGateStatus(GateStatus::GATE_SET);
diff --git a/src/duckdb/src/execution/index/art/art_index.cpp b/src/duckdb/src/execution/index/art/art_index.cpp
index 05e97f847..c4ba2c504 100644
--- a/src/duckdb/src/execution/index/art/art_index.cpp
+++ b/src/duckdb/src/execution/index/art/art_index.cpp
@@ -100,8 +100,9 @@ void ARTBuildSinkUnsorted(IndexBuildSinkInput &input, DataChunk &key_chunk, Data
 	// Insert each key and its corresponding row ID.
 	for (idx_t i = 0; i < row_count; i++) {
 		auto status = art.tree.GetGateStatus();
-		auto conflict_type = ARTOperator::Insert(l_state.arena_allocator, art, art.tree, l_state.keys[i], 0,
-		                                         l_state.row_ids[i], status, nullptr, IndexAppendMode::DEFAULT);
+		auto conflict_type =
+		    ARTOperator::Insert(l_state.arena_allocator, art, art.tree, l_state.keys[i], 0, l_state.row_ids[i], status,
+		                        DeleteIndexInfo(), IndexAppendMode::DEFAULT);
 		D_ASSERT(conflict_type != ARTConflictType::TRANSACTION);
 		if (conflict_type == ARTConflictType::CONSTRAINT) {
 			throw ConstraintException("Data contains duplicates on indexed column(s)");
diff --git a/src/duckdb/src/execution/index/art/art_merger.cpp b/src/duckdb/src/execution/index/art/art_merger.cpp
index f2fe5a8d8..3b562374c 100644
--- a/src/duckdb/src/execution/index/art/art_merger.cpp
+++ b/src/duckdb/src/execution/index/art/art_merger.cpp
@@ -107,7 +107,7 @@ ARTConflictType ARTMerger::MergeNodeAndInlined(NodeEntry &entry) {
 	// We fall back to the ART insertion code.
 	auto row_id_key = ARTKey::CreateARTKey<row_t>(arena, entry.right.GetRowId());
 	return ARTOperator::Insert(arena, art, entry.left, row_id_key, entry.depth, row_id_key, GateStatus::GATE_SET,
-	                           nullptr, IndexAppendMode::DEFAULT);
+	                           DeleteIndexInfo(), IndexAppendMode::DEFAULT);
 }
 
 array_ptr<uint8_t> ARTMerger::GetBytes(Node &leaf) {
diff --git a/src/duckdb/src/execution/index/art/leaf.cpp b/src/duckdb/src/execution/index/art/leaf.cpp
index 3f1190216..4895f996b 100644
--- a/src/duckdb/src/execution/index/art/leaf.cpp
+++ b/src/duckdb/src/execution/index/art/leaf.cpp
@@ -88,8 +88,8 @@ void Leaf::TransformToNested(ART &art, Node &node) {
 		auto &leaf = Node::Ref<const Leaf>(art, leaf_ref, LEAF);
 		for (uint8_t i = 0; i < leaf.count; i++) {
 			auto row_id = ARTKey::CreateARTKey<row_t>(arena, leaf.row_ids[i]);
-			auto conflict_type = ARTOperator::Insert(arena, art, root, row_id, 0, row_id, GateStatus::GATE_SET, nullptr,
-			                                         IndexAppendMode::INSERT_DUPLICATES);
+			auto conflict_type = ARTOperator::Insert(arena, art, root, row_id, 0, row_id, GateStatus::GATE_SET,
+			                                         DeleteIndexInfo(), IndexAppendMode::INSERT_DUPLICATES);
 			if (conflict_type != ARTConflictType::NO_CONFLICT) {
 				throw InternalException("invalid conflict type in Leaf::TransformToNested");
 			}
diff --git a/src/duckdb/src/execution/index/bound_index.cpp b/src/duckdb/src/execution/index/bound_index.cpp
index c60886c31..cb10bc1f1 100644
--- a/src/duckdb/src/execution/index/bound_index.cpp
+++ b/src/duckdb/src/execution/index/bound_index.cpp
@@ -64,12 +64,33 @@ void BoundIndex::CommitDrop() {
 	CommitDrop(index_lock);
 }
 
+idx_t BoundIndex::TryDelete(DataChunk &entries, Vector &row_identifiers, optional_ptr<SelectionVector> deleted_sel,
+                            optional_ptr<SelectionVector> non_deleted_sel) {
+	IndexLock state;
+	InitializeLock(state);
+	return TryDelete(state, entries, row_identifiers, deleted_sel, non_deleted_sel);
+}
+
+idx_t BoundIndex::TryDelete(IndexLock &state, DataChunk &entries, Vector &row_identifiers,
+                            optional_ptr<SelectionVector> deleted_sel, optional_ptr<SelectionVector> non_deleted_sel) {
+	throw InternalException("TryDelete not implemented");
+}
+
 void BoundIndex::Delete(DataChunk &entries, Vector &row_identifiers) {
 	IndexLock state;
 	InitializeLock(state);
 	Delete(state, entries, row_identifiers);
 }
 
+void BoundIndex::Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) {
+	TryDelete(state, entries, row_identifiers);
+	// FIXME: enable this
+	// if (deleted_rows != entries.size()) {
+	// 	throw InvalidInputException("Failed to delete all rows from index. Only deleted %d out of %d rows.\nChunk: %s",
+	// deleted_rows, entries.size(), entries.ToString());
+	// }
+}
+
 ErrorData BoundIndex::Insert(IndexLock &l, DataChunk &chunk, Vector &row_ids, IndexAppendInfo &info) {
 	throw NotImplementedException("this implementation of Insert does not exist.");
 }
@@ -142,13 +163,12 @@ bool BoundIndex::IndexIsUpdated(const vector<PhysicalIndex> &column_ids_p) const
 	return false;
 }
 
-bool BoundIndex::RequiresTransactionality() const {
+bool BoundIndex::SupportsDeltaIndexes() const {
 	return false;
 }
 
-unique_ptr<BoundIndex> BoundIndex::CreateEmptyCopy(const string &name_prefix,
-                                                   IndexConstraintType constraint_type) const {
-	throw InternalException("BoundIndex::CreateEmptyCopy is not supported for this index type");
+unique_ptr<BoundIndex> BoundIndex::CreateDeltaIndex(DeltaIndexType delta_index_type) const {
+	throw InternalException("BoundIndex::CreateDeltaIndex is not supported for this index type");
 }
 
 IndexStorageInfo BoundIndex::SerializeToDisk(QueryContext context, const case_insensitive_map_t<Value> &options) {
diff --git a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp
index 0f465620c..097994855 100644
--- a/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp
+++ b/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp
@@ -17,11 +17,12 @@ PhysicalStreamingWindow::PhysicalStreamingWindow(PhysicalPlan &physical_plan, ve
 
 class StreamingWindowGlobalState : public GlobalOperatorState {
 public:
-	StreamingWindowGlobalState() : row_number(1) {
-	}
+	explicit StreamingWindowGlobalState(ClientContext &client);
 
 	//! The next row number.
 	std::atomic<int64_t> row_number;
+	//! The single local state
+	unique_ptr<OperatorState> local_state;
 };
 
 class StreamingWindowState : public OperatorState {
@@ -348,6 +349,10 @@ class StreamingWindowState : public OperatorState {
 	SelectionVector sel;
 };
 
+StreamingWindowGlobalState::StreamingWindowGlobalState(ClientContext &client) : row_number(1) {
+	local_state = make_uniq<StreamingWindowState>(client);
+}
+
 bool PhysicalStreamingWindow::IsStreamingFunction(ClientContext &context, unique_ptr<Expression> &expr) {
 	auto &wexpr = expr->Cast<BoundWindowExpression>();
 	if (!wexpr.partitions.empty() || !wexpr.orders.empty() || !wexpr.arg_orders.empty() ||
@@ -392,12 +397,8 @@ bool PhysicalStreamingWindow::IsStreamingFunction(ClientContext &context, unique
 	}
 }
 
-unique_ptr<GlobalOperatorState> PhysicalStreamingWindow::GetGlobalOperatorState(ClientContext &context) const {
-	return make_uniq<StreamingWindowGlobalState>();
-}
-
-unique_ptr<OperatorState> PhysicalStreamingWindow::GetOperatorState(ExecutionContext &context) const {
-	return make_uniq<StreamingWindowState>(context.client);
+unique_ptr<GlobalOperatorState> PhysicalStreamingWindow::GetGlobalOperatorState(ClientContext &client) const {
+	return make_uniq<StreamingWindowGlobalState>(client);
 }
 
 void StreamingWindowState::AggregateState::Execute(ExecutionContext &context, DataChunk &input, Vector &result) {
@@ -505,9 +506,9 @@ void StreamingWindowState::AggregateState::Execute(ExecutionContext &context, Da
 }
 
 void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataChunk &output, DataChunk &delayed,
-                                               GlobalOperatorState &gstate_p, OperatorState &state_p) const {
+                                               GlobalOperatorState &gstate_p) const {
 	auto &gstate = gstate_p.Cast<StreamingWindowGlobalState>();
-	auto &state = state_p.Cast<StreamingWindowState>();
+	auto &state = gstate.local_state->Cast<StreamingWindowState>();
 
 	// Compute window functions
 	const idx_t count = output.size();
@@ -624,9 +625,9 @@ void PhysicalStreamingWindow::ExecuteFunctions(ExecutionContext &context, DataCh
 }
 
 void PhysicalStreamingWindow::ExecuteInput(ExecutionContext &context, DataChunk &delayed, DataChunk &input,
-                                           DataChunk &output, GlobalOperatorState &gstate_p,
-                                           OperatorState &state_p) const {
-	auto &state = state_p.Cast<StreamingWindowState>();
+                                           DataChunk &output, GlobalOperatorState &gstate_p) const {
+	auto &gstate = gstate_p.Cast<StreamingWindowGlobalState>();
+	auto &state = gstate.local_state->Cast<StreamingWindowState>();
 
 	// Put payload columns in place
 	for (idx_t col_idx = 0; col_idx < input.data.size(); col_idx++) {
@@ -642,13 +643,13 @@ void PhysicalStreamingWindow::ExecuteInput(ExecutionContext &context, DataChunk
 	}
 	output.SetCardinality(count);
 
-	ExecuteFunctions(context, output, state.delayed, gstate_p, state_p);
+	ExecuteFunctions(context, output, state.delayed, gstate_p);
 }
 
 void PhysicalStreamingWindow::ExecuteShifted(ExecutionContext &context, DataChunk &delayed, DataChunk &input,
-                                             DataChunk &output, GlobalOperatorState &gstate_p,
-                                             OperatorState &state_p) const {
-	auto &state = state_p.Cast<StreamingWindowState>();
+                                             DataChunk &output, GlobalOperatorState &gstate_p) const {
+	auto &gstate = gstate_p.Cast<StreamingWindowGlobalState>();
+	auto &state = gstate.local_state->Cast<StreamingWindowState>();
 	auto &shifted = state.shifted;
 
 	idx_t out = output.size();
@@ -670,12 +671,11 @@ void PhysicalStreamingWindow::ExecuteShifted(ExecutionContext &context, DataChun
 	}
 	delayed.SetCardinality(delay - out + in);
 
-	ExecuteFunctions(context, output, delayed, gstate_p, state_p);
+	ExecuteFunctions(context, output, delayed, gstate_p);
 }
 
 void PhysicalStreamingWindow::ExecuteDelayed(ExecutionContext &context, DataChunk &delayed, DataChunk &input,
-                                             DataChunk &output, GlobalOperatorState &gstate_p,
-                                             OperatorState &state_p) const {
+                                             DataChunk &output, GlobalOperatorState &gstate_p) const {
 	// Put payload columns in place
 	for (idx_t col_idx = 0; col_idx < delayed.data.size(); col_idx++) {
 		output.data[col_idx].Reference(delayed.data[col_idx]);
@@ -683,12 +683,13 @@ void PhysicalStreamingWindow::ExecuteDelayed(ExecutionContext &context, DataChun
 	idx_t count = delayed.size();
 	output.SetCardinality(count);
 
-	ExecuteFunctions(context, output, input, gstate_p, state_p);
+	ExecuteFunctions(context, output, input, gstate_p);
 }
 
 OperatorResultType PhysicalStreamingWindow::Execute(ExecutionContext &context, DataChunk &input, DataChunk &output,
-                                                    GlobalOperatorState &gstate_p, OperatorState &state_p) const {
-	auto &state = state_p.Cast<StreamingWindowState>();
+                                                    GlobalOperatorState &gstate_p, OperatorState &) const {
+	auto &gstate = gstate_p.Cast<StreamingWindowGlobalState>();
+	auto &state = gstate.local_state->Cast<StreamingWindowState>();
 	if (!state.initialized) {
 		state.Initialize(context.client, input, select_list);
 	}
@@ -709,27 +710,27 @@ OperatorResultType PhysicalStreamingWindow::Execute(ExecutionContext &context, D
 		// If we can't consume all of the delayed values,
 		// we need to split them instead of referencing them all
 		output.SetCardinality(input.size());
-		ExecuteShifted(context, delayed, input, output, gstate_p, state_p);
+		ExecuteShifted(context, delayed, input, output, gstate_p);
 		// We delayed the unused input so ask for more
 		return OperatorResultType::NEED_MORE_INPUT;
 	} else if (delayed.size()) {
 		//	We have enough delayed rows so flush them
-		ExecuteDelayed(context, delayed, input, output, gstate_p, state_p);
+		ExecuteDelayed(context, delayed, input, output, gstate_p);
 		// Defer resetting delayed as it may be referenced.
 		delayed.SetCardinality(0);
 		// Come back to process the input
 		return OperatorResultType::HAVE_MORE_OUTPUT;
 	} else {
 		//	No delayed rows, so emit what we can and delay the rest.
-		ExecuteInput(context, delayed, input, output, gstate_p, state_p);
+		ExecuteInput(context, delayed, input, output, gstate_p);
 		return OperatorResultType::NEED_MORE_INPUT;
 	}
 }
 
 OperatorFinalizeResultType PhysicalStreamingWindow::FinalExecute(ExecutionContext &context, DataChunk &output,
-                                                                 GlobalOperatorState &gstate_p,
-                                                                 OperatorState &state_p) const {
-	auto &state = state_p.Cast<StreamingWindowState>();
+                                                                 GlobalOperatorState &gstate_p, OperatorState &) const {
+	auto &gstate = gstate_p.Cast<StreamingWindowGlobalState>();
+	auto &state = gstate.local_state->Cast<StreamingWindowState>();
 
 	if (state.initialized && state.lead_count) {
 		auto &delayed = state.delayed;
@@ -740,10 +741,10 @@ OperatorFinalizeResultType PhysicalStreamingWindow::FinalExecute(ExecutionContex
 		if (output.GetCapacity() < delayed.size()) {
 			//	More than one output buffer was delayed, so shift in what we can
 			output.SetCardinality(output.GetCapacity());
-			ExecuteShifted(context, delayed, input, output, gstate_p, state_p);
+			ExecuteShifted(context, delayed, input, output, gstate_p);
 			return OperatorFinalizeResultType::HAVE_MORE_OUTPUT;
 		}
-		ExecuteDelayed(context, delayed, input, output, gstate_p, state_p);
+		ExecuteDelayed(context, delayed, input, output, gstate_p);
 	}
 
 	return OperatorFinalizeResultType::FINISHED;
diff --git a/src/duckdb/src/execution/operator/persistent/physical_merge_into.cpp b/src/duckdb/src/execution/operator/persistent/physical_merge_into.cpp
index 672a9b861..796d3cec8 100644
--- a/src/duckdb/src/execution/operator/persistent/physical_merge_into.cpp
+++ b/src/duckdb/src/execution/operator/persistent/physical_merge_into.cpp
@@ -472,10 +472,17 @@ SourceResultType PhysicalMergeInto::GetDataInternal(ExecutionContext &context, D
 			// no action to scan from
 			continue;
 		}
+		// found a good one
+		break;
+	}
+	if (lstate.index < actions.size()) {
+		auto &action = *actions[lstate.index];
+
 		auto &child_gstate = *gstate.global_states[lstate.index];
 		auto &child_lstate = *lstate.local_states[lstate.index];
 		OperatorSourceInput source_input {child_gstate, child_lstate, input.interrupt_state};
 
+		lstate.scan_chunk.Reset();
 		auto result = action.op->GetData(context, lstate.scan_chunk, source_input);
 		if (lstate.scan_chunk.size() > 0) {
 			// construct the result chunk
@@ -504,9 +511,13 @@ SourceResultType PhysicalMergeInto::GetDataInternal(ExecutionContext &context, D
 
 		if (result != SourceResultType::FINISHED) {
 			return result;
-		}
-		if (chunk.size() != 0) {
-			return SourceResultType::HAVE_MORE_OUTPUT;
+		} else {
+			lstate.index++;
+			if (lstate.index < actions.size()) {
+				return SourceResultType::HAVE_MORE_OUTPUT;
+			} else {
+				return SourceResultType::FINISHED;
+			}
 		}
 	}
 	return SourceResultType::FINISHED;
diff --git a/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp b/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp
index eee2d4a8d..c638f45b8 100644
--- a/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp
+++ b/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp
@@ -91,8 +91,9 @@ SinkResultType PhysicalCreateARTIndex::SinkUnsorted(OperatorSinkInput &input) co
 	// Insert each key and its corresponding row ID.
 	for (idx_t i = 0; i < row_count; i++) {
 		auto status = art.tree.GetGateStatus();
-		auto conflict_type = ARTOperator::Insert(l_state.arena_allocator, art, art.tree, l_state.keys[i], 0,
-		                                         l_state.row_ids[i], status, nullptr, IndexAppendMode::DEFAULT);
+		auto conflict_type =
+		    ARTOperator::Insert(l_state.arena_allocator, art, art.tree, l_state.keys[i], 0, l_state.row_ids[i], status,
+		                        DeleteIndexInfo(), IndexAppendMode::DEFAULT);
 		D_ASSERT(conflict_type != ARTConflictType::TRANSACTION);
 		if (conflict_type == ARTConflictType::CONSTRAINT) {
 			throw ConstraintException("Data contains duplicates on indexed column(s)");
diff --git a/src/duckdb/src/function/scalar/string/concat.cpp b/src/duckdb/src/function/scalar/string/concat.cpp
index 97a74cebe..7d1bab8be 100644
--- a/src/duckdb/src/function/scalar/string/concat.cpp
+++ b/src/duckdb/src/function/scalar/string/concat.cpp
@@ -1,15 +1,10 @@
 #include "duckdb/common/exception.hpp"
-#include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/vector.hpp"
 #include "duckdb/common/vector_operations/binary_executor.hpp"
-#include "duckdb/common/vector_operations/vector_operations.hpp"
-#include "duckdb/function/scalar/nested_functions.hpp"
 #include "duckdb/function/scalar/string_functions.hpp"
 
-#include "duckdb/planner/expression/bound_cast_expression.hpp"
 #include "duckdb/planner/expression/bound_function_expression.hpp"
 
-#include <string.h>
-
 namespace duckdb {
 
 namespace {
@@ -209,6 +204,7 @@ void ConcatFunction(DataChunk &args, ExpressionState &state, Vector &result) {
 	auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
 	auto &info = func_expr.bind_info->Cast<ConcatFunctionData>();
 	if (info.return_type.id() == LogicalTypeId::SQLNULL) {
+		result.SetVectorType(VectorType::CONSTANT_VECTOR);
 		return;
 	}
 	if (info.return_type.id() == LogicalTypeId::LIST) {
diff --git a/src/duckdb/src/function/scalar/string/md5.cpp b/src/duckdb/src/function/scalar/string/md5.cpp
index 94a035ee7..8c9894977 100644
--- a/src/duckdb/src/function/scalar/string/md5.cpp
+++ b/src/duckdb/src/function/scalar/string/md5.cpp
@@ -28,7 +28,7 @@ struct MD5Number128Operator {
 		MD5Context context;
 		context.Add(input);
 		context.Finish(digest);
-		return *reinterpret_cast<uhugeint_t *>(digest);
+		return BSwapIfBE(*reinterpret_cast<uhugeint_t *>(digest));
 	}
 };
 
diff --git a/src/duckdb/src/function/table/arrow.cpp b/src/duckdb/src/function/table/arrow.cpp
index f2f932768..f7b5e3ff7 100644
--- a/src/duckdb/src/function/table/arrow.cpp
+++ b/src/duckdb/src/function/table/arrow.cpp
@@ -245,10 +245,10 @@ static bool CanPushdown(const ArrowType &type) {
 	case LogicalTypeId::UBIGINT:
 	case LogicalTypeId::FLOAT:
 	case LogicalTypeId::DOUBLE:
-	case LogicalTypeId::VARCHAR:
 		return true;
+	case LogicalTypeId::VARCHAR:
 	case LogicalTypeId::BLOB:
-		// PyArrow doesn't support binary view filters yet
+		// PyArrow doesn't support binary and string view filters yet
 		return type.GetTypeInfo<ArrowStringInfo>().GetSizeType() != ArrowVariableSizeType::VIEW;
 	case LogicalTypeId::DECIMAL: {
 		switch (duck_type.InternalType()) {
diff --git a/src/duckdb/src/function/table/arrow_conversion.cpp b/src/duckdb/src/function/table/arrow_conversion.cpp
index 511a272dc..5eba8026b 100644
--- a/src/duckdb/src/function/table/arrow_conversion.cpp
+++ b/src/duckdb/src/function/table/arrow_conversion.cpp
@@ -55,7 +55,7 @@ static void GetValidityMask(ValidityMask &mask, ArrowArray &array, idx_t chunk_o
 	if (array.null_count != 0 && array.n_buffers > 0 && array.buffers[0]) {
 		auto bit_offset = GetEffectiveOffset(array, parent_offset, chunk_offset, nested_offset);
 		mask.EnsureWritable();
-#if STANDARD_VECTOR_SIZE > 64
+#if STANDARD_VECTOR_SIZE > 64 && !DUCKDB_IS_BIG_ENDIAN
 		auto n_bitmask_bytes = (size + 8 - 1) / 8;
 		if (bit_offset % 8 == 0) {
 			//! just memcpy nullmask
diff --git a/src/duckdb/src/function/table/table_scan.cpp b/src/duckdb/src/function/table/table_scan.cpp
index 563189942..596860bea 100644
--- a/src/duckdb/src/function/table/table_scan.cpp
+++ b/src/duckdb/src/function/table/table_scan.cpp
@@ -387,7 +387,7 @@ unique_ptr<GlobalTableFunctionState> DuckTableScanInitGlobal(ClientContext &cont
 		g_state->state.local_state.reorderer = make_uniq<RowGroupReorderer>(*bind_data.order_options);
 	}
 
-	storage.InitializeParallelScan(context, g_state->state);
+	storage.InitializeParallelScan(context, g_state->state, input.column_indexes);
 	if (!input.CanRemoveFilterColumns()) {
 		return std::move(g_state);
 	}
diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp
index 12500c8f1..ae94dca3b 100644
--- a/src/duckdb/src/function/table/version/pragma_version.cpp
+++ b/src/duckdb/src/function/table/version/pragma_version.cpp
@@ -1,5 +1,5 @@
 #ifndef DUCKDB_PATCH_VERSION
-#define DUCKDB_PATCH_VERSION "0-dev4892"
+#define DUCKDB_PATCH_VERSION "0-dev5016"
 #endif
 #ifndef DUCKDB_MINOR_VERSION
 #define DUCKDB_MINOR_VERSION 5
@@ -8,10 +8,10 @@
 #define DUCKDB_MAJOR_VERSION 1
 #endif
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "v1.5.0-dev4892"
+#define DUCKDB_VERSION "v1.5.0-dev5016"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "c46a01b579"
+#define DUCKDB_SOURCE_ID "b5761ca54c"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"
diff --git a/src/duckdb/src/function/window/window_value_function.cpp b/src/duckdb/src/function/window/window_value_function.cpp
index adf60be11..276b99fad 100644
--- a/src/duckdb/src/function/window/window_value_function.cpp
+++ b/src/duckdb/src/function/window/window_value_function.cpp
@@ -465,7 +465,11 @@ void WindowFirstValueExecutor::EvaluateInternal(ExecutionContext &context, DataC
 			if (frame_width) {
 				const auto first_idx = gvstate.value_tree->SelectNth(frames, 0);
 				D_ASSERT(first_idx.second == 0);
-				cursor.CopyCell(0, first_idx.first, result, i);
+				if (first_idx.first < cursor.Count()) {
+					cursor.CopyCell(0, first_idx.first, result, i);
+				} else {
+					FlatVector::SetNull(result, i, true);
+				}
 			} else {
 				FlatVector::SetNull(result, i, true);
 			}
@@ -519,7 +523,7 @@ void WindowLastValueExecutor::EvaluateInternal(ExecutionContext &context, DataCh
 					n -= last_idx.second;
 					last_idx = gvstate.value_tree->SelectNth(frames, n);
 				}
-				if (last_idx.second) {
+				if (last_idx.second || last_idx.first >= cursor.Count()) {
 					//	No last value - give up.
 					FlatVector::SetNull(result, i, true);
 				} else {
@@ -589,7 +593,7 @@ void WindowNthValueExecutor::EvaluateInternal(ExecutionContext &context, DataChu
 
 			if (n < frame_width) {
 				const auto nth_index = gvstate.value_tree->SelectNth(frames, n - 1);
-				if (nth_index.second) {
+				if (nth_index.second || nth_index.first >= cursor.Count()) {
 					// Past end of frame
 					FlatVector::SetNull(result, i, true);
 				} else {
diff --git a/src/duckdb/src/include/duckdb/common/bswap.hpp b/src/duckdb/src/include/duckdb/common/bswap.hpp
index a1434da73..db82f237b 100644
--- a/src/duckdb/src/include/duckdb/common/bswap.hpp
+++ b/src/duckdb/src/include/duckdb/common/bswap.hpp
@@ -8,8 +8,8 @@
 
 #pragma once
 
-#include "duckdb/common/common.hpp"
-#include "duckdb/common/numeric_utils.hpp"
+#include "duckdb/common/hugeint.hpp"
+#include "duckdb/common/uhugeint.hpp"
 
 #include <cstring>
 
diff --git a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
index 82208c895..38c3b94c2 100644
--- a/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
+++ b/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp
@@ -93,6 +93,7 @@ enum class MetricType : uint8_t {
 	OPTIMIZER_CTE_INLINING,
 	OPTIMIZER_COMMON_SUBPLAN,
 	OPTIMIZER_JOIN_ELIMINATION,
+	OPTIMIZER_COUNT_WINDOW_ELIMINATION,
 	// PhaseTiming metrics
 	ALL_OPTIMIZERS,
 	CUMULATIVE_OPTIMIZER_TIMING,
@@ -128,7 +129,7 @@ class MetricsUtils {
 	static constexpr uint8_t END_OPERATOR = static_cast<uint8_t>(MetricType::OPERATOR_TYPE);
 
 	static constexpr uint8_t START_OPTIMIZER = static_cast<uint8_t>(MetricType::OPTIMIZER_EXPRESSION_REWRITER);
-	static constexpr uint8_t END_OPTIMIZER = static_cast<uint8_t>(MetricType::OPTIMIZER_JOIN_ELIMINATION);
+	static constexpr uint8_t END_OPTIMIZER = static_cast<uint8_t>(MetricType::OPTIMIZER_COUNT_WINDOW_ELIMINATION);
 
 	static constexpr uint8_t START_PHASE_TIMING = static_cast<uint8_t>(MetricType::ALL_OPTIMIZERS);
 	static constexpr uint8_t END_PHASE_TIMING = static_cast<uint8_t>(MetricType::PLANNER_BINDING);
diff --git a/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp b/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
index 7f6864aac..8d2928af4 100644
--- a/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
+++ b/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp
@@ -46,7 +46,8 @@ enum class OptimizerType : uint32_t {
 	LATE_MATERIALIZATION,
 	CTE_INLINING,
 	COMMON_SUBPLAN,
-	JOIN_ELIMINATION
+	JOIN_ELIMINATION,
+	COUNT_WINDOW_ELIMINATION
 };
 
 string OptimizerTypeToString(OptimizerType type);
diff --git a/src/duckdb/src/include/duckdb/common/helper.hpp b/src/duckdb/src/include/duckdb/common/helper.hpp
index 118bada1e..13a66aa68 100644
--- a/src/duckdb/src/include/duckdb/common/helper.hpp
+++ b/src/duckdb/src/include/duckdb/common/helper.hpp
@@ -8,6 +8,7 @@
 
 #pragma once
 
+#include "duckdb/common/bswap.hpp"
 #include "duckdb/common/constants.hpp"
 #include "duckdb/common/shared_ptr.hpp"
 #include <string.h>
@@ -220,6 +221,11 @@ const T Load(const_data_ptr_t ptr) {
 	return ret;
 }
 
+template <typename T>
+const T LoadLE(const_data_ptr_t ptr) {
+	return BSwapIfBE(Load<T>(ptr));
+}
+
 template <typename T>
 void Store(const T &val, data_ptr_t ptr) {
 	memcpy(ptr, (void *)&val, sizeof(val)); // NOLINT
diff --git a/src/duckdb/src/include/duckdb/common/http_util.hpp b/src/duckdb/src/include/duckdb/common/http_util.hpp
index 11fc26c48..a493647b3 100644
--- a/src/duckdb/src/include/duckdb/common/http_util.hpp
+++ b/src/duckdb/src/include/duckdb/common/http_util.hpp
@@ -139,7 +139,7 @@ struct BaseRequest {
 	const string &url;
 	string path;
 	string proto_host_port;
-	const HTTPHeaders &headers;
+	HTTPHeaders headers;
 	HTTPParams &params;
 	//! Whether or not to return failed requests (instead of throwing)
 	bool try_request = false;
@@ -157,6 +157,14 @@ struct BaseRequest {
 	const TARGET &Cast() const {
 		return reinterpret_cast<const TARGET &>(*this);
 	}
+
+	static HTTPHeaders MergeHeaders(const HTTPHeaders &headers, HTTPParams &params) {
+		HTTPHeaders result = headers;
+		for (const auto &header : params.extra_headers) {
+			result.Insert(header.first, header.second);
+		}
+		return result;
+	}
 };
 
 struct GetRequestInfo : public BaseRequest {
diff --git a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp
index 1a14b24c1..09e10aa80 100644
--- a/src/duckdb/src/include/duckdb/execution/index/art/art.hpp
+++ b/src/duckdb/src/include/duckdb/execution/index/art/art.hpp
@@ -25,6 +25,15 @@ class FixedSizeAllocator;
 
 struct ARTIndexScanState;
 
+struct DeleteIndexInfo {
+	DeleteIndexInfo() : delete_indexes(nullptr) {
+	}
+	explicit DeleteIndexInfo(vector<reference<BoundIndex>> &delete_indexes) : delete_indexes(delete_indexes) {
+	}
+
+	optional_ptr<vector<reference<BoundIndex>>> delete_indexes;
+};
+
 class ART : public BoundIndex {
 public:
 	friend class Leaf;
@@ -67,6 +76,7 @@ class ART : public BoundIndex {
 public:
 	//! Try to initialize a scan on the ART with the given expression and filter.
 	unique_ptr<IndexScanState> TryInitializeScan(const Expression &expr, const Expression &filter_expr);
+	unique_ptr<IndexScanState> InitializeFullScan();
 	//! Perform a lookup on the ART, fetching up to max_count row IDs.
 	//! If all row IDs were fetched, it return true, else false.
 	bool Scan(IndexScanState &state, idx_t max_count, set<row_t> &row_ids);
@@ -85,7 +95,8 @@ class ART : public BoundIndex {
 	void VerifyAppend(DataChunk &chunk, IndexAppendInfo &info, optional_ptr<ConflictManager> manager) override;
 
 	//! Delete a chunk from the ART.
-	void Delete(IndexLock &lock, DataChunk &entries, Vector &row_ids) override;
+	idx_t TryDelete(IndexLock &state, DataChunk &entries, Vector &row_identifiers,
+	                optional_ptr<SelectionVector> deleted_sel, optional_ptr<SelectionVector> non_deleted_sel) override;
 	//! Drop the ART.
 	void CommitDrop(IndexLock &index_lock) override;
 
@@ -107,9 +118,8 @@ class ART : public BoundIndex {
 	//! Returns the in-memory usage of the ART.
 	idx_t GetInMemorySize(IndexLock &index_lock) override;
 
-	bool RequiresTransactionality() const override;
-	unique_ptr<BoundIndex> CreateEmptyCopy(const string &name_prefix,
-	                                       IndexConstraintType constraint_type) const override;
+	bool SupportsDeltaIndexes() const override;
+	unique_ptr<BoundIndex> CreateDeltaIndex(DeltaIndexType delta_index_type) const override;
 
 	//! ART key generation.
 	template <bool IS_NOT_NULL = false>
@@ -136,6 +146,7 @@ class ART : public BoundIndex {
 	//! The number of bytes fitting in the prefix.
 	uint8_t prefix_count;
 
+	bool FullScan(idx_t max_count, set<row_t> &row_ids);
 	bool SearchEqual(ARTKey &key, idx_t max_count, set<row_t> &row_ids);
 	bool SearchGreater(ARTKey &key, bool equal, idx_t max_count, set<row_t> &row_ids);
 	bool SearchLess(ARTKey &upper_bound, bool equal, idx_t max_count, set<row_t> &row_ids);
@@ -144,7 +155,7 @@ class ART : public BoundIndex {
 
 	string GenerateErrorKeyName(DataChunk &input, idx_t row);
 	string GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name);
-	void VerifyLeaf(const Node &leaf, const ARTKey &key, optional_ptr<ART> delete_art, ConflictManager &manager,
+	void VerifyLeaf(const Node &leaf, const ARTKey &key, DeleteIndexInfo delete_index_info, ConflictManager &manager,
 	                optional_idx &conflict_idx, idx_t i);
 	void VerifyConstraint(DataChunk &chunk, IndexAppendInfo &info, ConflictManager &manager) override;
 	string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index,
diff --git a/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp b/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp
index 0efadc991..7d66df318 100644
--- a/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp
+++ b/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp
@@ -120,7 +120,7 @@ class ARTOperator {
 	//! Starts at depth (in the key).
 	//! status indicates if the insert happens inside a gate or not.
 	static ARTConflictType Insert(ArenaAllocator &arena, ART &art, Node &node, const ARTKey &key, idx_t depth,
-	                              const ARTKey &row_id, GateStatus status, optional_ptr<ART> delete_art,
+	                              const ARTKey &row_id, GateStatus status, DeleteIndexInfo delete_index_info,
 	                              const IndexAppendMode append_mode) {
 		reference<Node> active_node_ref(node);
 		reference<const ARTKey> active_key_ref(key);
@@ -164,7 +164,8 @@ class ARTOperator {
 			const auto type = active_node.GetType();
 			switch (type) {
 			case NType::LEAF_INLINED: {
-				return InsertIntoInlined(arena, art, active_node, key, row_id, depth, status, delete_art, append_mode);
+				return InsertIntoInlined(arena, art, active_node, key, row_id, depth, status, delete_index_info,
+				                         append_mode);
 			}
 			case NType::LEAF: {
 				Leaf::TransformToNested(art, active_node);
@@ -217,7 +218,7 @@ class ARTOperator {
 
 	//! Delete a key and its row ID.
 	//! Assumes that deletion starts at the root of the tree.
-	static void Delete(ART &art, Node &node, const ARTKey &key, const ARTKey &row_id) {
+	static bool Delete(ART &art, Node &node, const ARTKey &key, const ARTKey &row_id) {
 		// If we need to compress a Node4 into a one-way node,
 		// then we need the previous prefix before the Node4.
 		Node empty;
@@ -246,12 +247,12 @@ class ARTOperator {
 			switch (type) {
 			case NType::LEAF_INLINED: {
 				if (current.get().GetRowId() != row_id.GetRowId()) {
-					return;
+					return false;
 				}
 				if (!passed_node && parent.get().GetType() == NType::PREFIX) {
 					// The tree contains exactly one element with a prefix.
 					Node::FreeTree(art, parent);
-					return;
+					return true;
 				}
 				if (parent.get().GetType() == NType::PREFIX) {
 					// We might have to compress:
@@ -260,10 +261,10 @@ class ARTOperator {
 					// Then, when we delete that child, we also free it.
 					Node::DeleteChild(art, grandparent, greatgrandparent, current_key.get()[grandparent_depth], status,
 					                  row_id);
-					return;
+					return true;
 				}
 				Node::DeleteChild(art, parent, grandparent, current_key.get()[parent_depth], status, row_id);
-				return;
+				return true;
 			}
 			case NType::LEAF: {
 				D_ASSERT(status == GateStatus::GATE_NOT_SET);
@@ -282,7 +283,7 @@ class ARTOperator {
 					Prefix prefix(art, current, true);
 					for (idx_t i = 0; i < prefix.data[art.PrefixCount()]; i++) {
 						if (prefix.data[i] != current_key.get()[depth]) {
-							return;
+							return false;
 						}
 						depth++;
 					}
@@ -307,7 +308,7 @@ class ARTOperator {
 				auto child = current.get().GetChildMutable(art, current_key.get()[depth]);
 				if (!child) {
 					// No child at the byte: nothing to erase.
-					return;
+					return false;
 				}
 
 				current = *child;
@@ -321,16 +322,17 @@ class ARTOperator {
 				if (current.get().HasByte(art, byte)) {
 					Node::DeleteChild(art, current, parent, byte, status, row_id);
 				}
-				return;
+				return true;
 			}
 			}
 		}
+		return false;
 	}
 
 private:
 	static ARTConflictType InsertIntoInlined(ArenaAllocator &arena, ART &art, Node &node, const ARTKey &key,
 	                                         const ARTKey &row_id, const idx_t depth, const GateStatus status,
-	                                         optional_ptr<ART> delete_art, const IndexAppendMode append_mode) {
+	                                         DeleteIndexInfo delete_index_info, const IndexAppendMode append_mode) {
 		Node row_id_node;
 		Leaf::New(row_id_node, row_id.GetRowId());
 
@@ -339,31 +341,33 @@ class ARTOperator {
 			return ARTConflictType::NO_CONFLICT;
 		}
 
-		if (!delete_art) {
-			if (append_mode == IndexAppendMode::IGNORE_DUPLICATES) {
+		if (delete_index_info.delete_indexes) {
+			// Lookup in the delete_art.
+			for (auto &delete_index : *delete_index_info.delete_indexes) {
+				auto &delete_art = delete_index.get().Cast<ART>();
+				auto delete_leaf = Lookup(delete_art, delete_art.tree, key, 0);
+				if (!delete_leaf) {
+					continue;
+				}
+
+				// The row ID has changed.
+				// Thus, the local index has a newer (local) row ID, and this is a constraint violation.
+				D_ASSERT(delete_leaf->GetType() == NType::LEAF_INLINED);
+				auto deleted_row_id = delete_leaf->GetRowId();
+				auto this_row_id = node.GetRowId();
+				if (deleted_row_id != this_row_id) {
+					continue;
+				}
+
+				// The deleted key and its row ID match the current key and its row ID.
+				Leaf::MergeInlined(arena, art, node, row_id_node, status, depth);
 				return ARTConflictType::NO_CONFLICT;
 			}
-			return ARTConflictType::CONSTRAINT;
-		}
-
-		// Lookup in the delete_art.
-		auto delete_leaf = Lookup(*delete_art, delete_art->tree, key, 0);
-		if (!delete_leaf) {
-			return ARTConflictType::CONSTRAINT;
 		}
-
-		// The row ID has changed.
-		// Thus, the local index has a newer (local) row ID, and this is a constraint violation.
-		D_ASSERT(delete_leaf->GetType() == NType::LEAF_INLINED);
-		auto deleted_row_id = delete_leaf->GetRowId();
-		auto this_row_id = node.GetRowId();
-		if (deleted_row_id != this_row_id) {
-			return ARTConflictType::CONSTRAINT;
+		if (append_mode == IndexAppendMode::IGNORE_DUPLICATES) {
+			return ARTConflictType::NO_CONFLICT;
 		}
-
-		// The deleted key and its row ID match the current key and its row ID.
-		Leaf::MergeInlined(arena, art, node, row_id_node, status, depth);
-		return ARTConflictType::NO_CONFLICT;
+		return ARTConflictType::CONSTRAINT;
 	}
 
 	static void InsertIntoNode(ART &art, Node &node, const ARTKey &key, const ARTKey &row_id, const idx_t depth,
diff --git a/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp b/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp
index ae6daa0cd..9bc40582c 100644
--- a/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp
+++ b/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp
@@ -33,13 +33,27 @@ enum class IndexAppendMode : uint8_t { DEFAULT = 0, IGNORE_DUPLICATES = 1, INSER
 
 class IndexAppendInfo {
 public:
-	IndexAppendInfo() : append_mode(IndexAppendMode::DEFAULT), delete_index(nullptr) {};
-	IndexAppendInfo(const IndexAppendMode append_mode, const optional_ptr<BoundIndex> delete_index)
-	    : append_mode(append_mode), delete_index(delete_index) {};
+	IndexAppendInfo() : append_mode(IndexAppendMode::DEFAULT) {
+	}
+	IndexAppendInfo(const IndexAppendMode append_mode, optional_ptr<BoundIndex> delete_index)
+	    : append_mode(append_mode) {
+		if (delete_index) {
+			delete_indexes.push_back(*delete_index);
+		}
+	}
 
 public:
 	IndexAppendMode append_mode;
-	optional_ptr<BoundIndex> delete_index;
+	vector<reference<BoundIndex>> delete_indexes;
+};
+
+enum class DeltaIndexType {
+	NONE,
+	LOCAL_APPEND,
+	LOCAL_DELETE,
+	ADDED_DURING_CHECKPOINT,
+	REMOVED_DURING_CHECKPOINT,
+	DELETED_ROWS_IN_USE
 };
 
 //! The index is an abstract base class that serves as the basis for indexes
@@ -73,6 +87,9 @@ class BoundIndex : public Index {
 	//! and we use them when binding the unbound expressions.
 	vector<unique_ptr<Expression>> unbound_expressions;
 
+	//! Whether or not this is a delta index - and if it is, which type it is
+	DeltaIndexType delta_index_type = DeltaIndexType::NONE;
+
 public:
 	bool IsBound() const override {
 		return true;
@@ -108,8 +125,18 @@ class BoundIndex : public Index {
 	virtual void CommitDrop(IndexLock &index_lock) = 0;
 	//! Deletes all data from the index
 	void CommitDrop() override;
-	//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
-	virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
+	//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held.
+	//! Returns the amount of rows successfully deleted from the index.
+	//! If either deleted_sel or non_deleted_sel are provided the exact rows that were (not) deleted are written there
+	virtual idx_t TryDelete(IndexLock &state, DataChunk &entries, Vector &row_identifiers,
+	                        optional_ptr<SelectionVector> deleted_sel = nullptr,
+	                        optional_ptr<SelectionVector> non_deleted_sel = nullptr);
+	//! Obtains a lock and calls TryDelete while holding that lock
+	idx_t TryDelete(DataChunk &entries, Vector &row_identifiers, optional_ptr<SelectionVector> deleted_sel = nullptr,
+	                optional_ptr<SelectionVector> non_deleted_sel = nullptr);
+	//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held.
+	//! Throws an error if not all rows are deleted
+	virtual void Delete(IndexLock &state, DataChunk &entries, Vector &row_identifiers);
 	//! Obtains a lock and calls Delete while holding that lock
 	void Delete(DataChunk &entries, Vector &row_identifiers);
 
@@ -130,12 +157,11 @@ class BoundIndex : public Index {
 	//! Obtains a lock and calls Vacuum while holding that lock.
 	void Vacuum();
 
-	//! Whether or not the index requires transactionality. If true we will create delta indexes
-	virtual bool RequiresTransactionality() const;
-	//! Creates an empty copy of the index with the same schema, etc, but a different constraint type
-	//! This will only be called if RequiresTransactionality returns true
-	virtual unique_ptr<BoundIndex> CreateEmptyCopy(const string &name_prefix,
-	                                               IndexConstraintType constraint_type) const;
+	//! Whether or not the index supports the creation of delta indexes
+	virtual bool SupportsDeltaIndexes() const;
+	//! Creates a delta index - an empty copy of the index with the same schema, etc
+	//! This will only be called if SupportsDeltaIndexes returns true
+	virtual unique_ptr<BoundIndex> CreateDeltaIndex(DeltaIndexType delta_index_type) const;
 
 	//! Returns the in-memory usage of the index. The lock obtained from InitializeLock must be held
 	virtual idx_t GetInMemorySize(IndexLock &state) = 0;
diff --git a/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp b/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp
index d17e6944f..1ab568fb5 100644
--- a/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp
+++ b/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp
@@ -86,6 +86,8 @@ struct MergeSortTree {
 	using RunElements = array<RunElement, F>;
 	using Games = array<RunElement, F - 1>;
 
+	static constexpr ElementType INVALID = std::numeric_limits<ElementType>::max();
+
 	struct CompareElements {
 		explicit CompareElements(const CMP &cmp) : cmp(cmp) {
 		}
@@ -122,6 +124,9 @@ struct MergeSortTree {
 	pair<idx_t, idx_t> SelectNth(const SubFrames &frames, idx_t n) const;
 
 	inline ElementType NthElement(idx_t i) const {
+		if (tree.empty() || tree.front().first.empty()) {
+			return INVALID;
+		}
 		return tree.front().first[i];
 	}
 
diff --git a/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp b/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp
index 52e32d3fd..dfca3fa5e 100644
--- a/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp
+++ b/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp
@@ -30,7 +30,6 @@ class PhysicalStreamingWindow : public PhysicalOperator {
 
 public:
 	unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
-	unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
 
 	OperatorResultType Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
 	                           GlobalOperatorState &gstate, OperatorState &state) const override;
@@ -50,13 +49,13 @@ class PhysicalStreamingWindow : public PhysicalOperator {
 
 private:
 	void ExecuteFunctions(ExecutionContext &context, DataChunk &chunk, DataChunk &delayed,
-	                      GlobalOperatorState &gstate_p, OperatorState &state_p) const;
+	                      GlobalOperatorState &gstate_p) const;
 	void ExecuteInput(ExecutionContext &context, DataChunk &delayed, DataChunk &input, DataChunk &chunk,
-	                  GlobalOperatorState &gstate, OperatorState &state) const;
+	                  GlobalOperatorState &gstate) const;
 	void ExecuteDelayed(ExecutionContext &context, DataChunk &delayed, DataChunk &input, DataChunk &chunk,
-	                    GlobalOperatorState &gstate, OperatorState &state) const;
+	                    GlobalOperatorState &gstate) const;
 	void ExecuteShifted(ExecutionContext &context, DataChunk &delayed, DataChunk &input, DataChunk &chunk,
-	                    GlobalOperatorState &gstate, OperatorState &state) const;
+	                    GlobalOperatorState &gstate) const;
 };
 
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/function/window/window_collection.hpp b/src/duckdb/src/include/duckdb/function/window/window_collection.hpp
index 2dae27c6a..7f828c1d9 100644
--- a/src/duckdb/src/include/duckdb/function/window/window_collection.hpp
+++ b/src/duckdb/src/include/duckdb/function/window/window_collection.hpp
@@ -86,6 +86,10 @@ class WindowCursor {
 	WindowCursor(const WindowCollection &paged, column_t col_idx);
 	WindowCursor(const WindowCollection &paged, vector<column_t> column_ids);
 
+	//! The row count of the paged collection
+	idx_t Count() const {
+		return paged.size();
+	}
 	//! Is the scan in range?
 	inline bool RowIsVisible(idx_t row_idx) const {
 		return (row_idx < state.next_row_index && state.current_row_index <= row_idx);
diff --git a/src/duckdb/src/include/duckdb/optimizer/count_window_elimination.hpp b/src/duckdb/src/include/duckdb/optimizer/count_window_elimination.hpp
new file mode 100644
index 000000000..dc9c0a300
--- /dev/null
+++ b/src/duckdb/src/include/duckdb/optimizer/count_window_elimination.hpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//                         DuckDB
+//
+// duckdb/optimizer/count_window_elimination.hpp
+//
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "duckdb/optimizer/optimizer.hpp"
+
+#include "duckdb/optimizer/column_binding_replacer.hpp"
+
+namespace duckdb {
+
+class WindowSelfJoinOptimizer {
+public:
+	explicit WindowSelfJoinOptimizer(Optimizer &optimizer);
+
+	unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> op);
+
+private:
+	unique_ptr<LogicalOperator> OptimizeInternal(unique_ptr<LogicalOperator> op, ColumnBindingReplacer &replacer);
+
+	Optimizer &optimizer;
+};
+
+} // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/parser/transformer.hpp b/src/duckdb/src/include/duckdb/parser/transformer.hpp
index 2afa96722..8a8ba2ecb 100644
--- a/src/duckdb/src/include/duckdb/parser/transformer.hpp
+++ b/src/duckdb/src/include/duckdb/parser/transformer.hpp
@@ -113,7 +113,7 @@ class Transformer {
 	unique_ptr<SelectStatement> TransformSelectStmt(duckdb_libpgquery::PGSelectStmt &select, bool is_select = true);
 	unique_ptr<SelectStatement> TransformSelectStmt(duckdb_libpgquery::PGNode &node, bool is_select = true);
 	//! Transform a Postgres T_AlterStmt node into a AlterStatement
-	unique_ptr<AlterStatement> TransformAlter(duckdb_libpgquery::PGAlterTableStmt &stmt);
+	unique_ptr<SQLStatement> TransformAlter(duckdb_libpgquery::PGAlterTableStmt &stmt);
 	//! Transform a Postgres T_AlterDatabaseStmt node into a AlterStatement
 	unique_ptr<AlterStatement> TransformAlterDatabase(duckdb_libpgquery::PGAlterDatabaseStmt &stmt);
 	//! Transform a Postgres duckdb_libpgquery::T_PGRenameStmt node into a RenameStatement
diff --git a/src/duckdb/src/include/duckdb/storage/data_table.hpp b/src/duckdb/src/include/duckdb/storage/data_table.hpp
index 3b37752cd..43ccf7350 100644
--- a/src/duckdb/src/include/duckdb/storage/data_table.hpp
+++ b/src/duckdb/src/include/duckdb/storage/data_table.hpp
@@ -85,7 +85,8 @@ class DataTable : public enable_shared_from_this<DataTable> {
 
 	//! Returns the maximum amount of threads that should be assigned to scan this data table
 	idx_t MaxThreads(ClientContext &context) const;
-	void InitializeParallelScan(ClientContext &context, ParallelTableScanState &state);
+	void InitializeParallelScan(ClientContext &context, ParallelTableScanState &state,
+	                            const vector<ColumnIndex> &column_indexes);
 	idx_t NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state);
 
 	//! Scans up to STANDARD_VECTOR_SIZE elements from the table starting
@@ -97,6 +98,8 @@ class DataTable : public enable_shared_from_this<DataTable> {
 	//! Fetch data from the specific row identifiers from the base table
 	void Fetch(DuckTransaction &transaction, DataChunk &result, const vector<StorageIndex> &column_ids,
 	           const Vector &row_ids, idx_t fetch_count, ColumnFetchState &state);
+	void FetchCommitted(DataChunk &result, const vector<StorageIndex> &column_ids, const Vector &row_identifiers,
+	                    idx_t fetch_count, ColumnFetchState &state);
 	//! Returns true, if the transaction can fetch the row ID.
 	bool CanFetch(DuckTransaction &transaction, const row_t row_id);
 
@@ -197,7 +200,7 @@ class DataTable : public enable_shared_from_this<DataTable> {
 	void RevertIndexAppend(TableAppendState &state, DataChunk &chunk, Vector &row_identifiers);
 	//! Remove the row identifiers from all the indexes of the table
 	void RemoveFromIndexes(const QueryContext &context, Vector &row_identifiers, idx_t count,
-	                       IndexRemovalType removal_type);
+	                       IndexRemovalType removal_type, optional_idx checkpoint_id = optional_idx());
 
 	void SetAsMainTable() {
 		this->version = DataTableVersion::MAIN_TABLE;
diff --git a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp
index b7fa7ccec..261c4fb99 100644
--- a/src/duckdb/src/include/duckdb/storage/storage_manager.hpp
+++ b/src/duckdb/src/include/duckdb/storage/storage_manager.hpp
@@ -77,7 +77,7 @@ class StorageManager {
 	//! Write that we started a checkpoint to the WAL if there is one - returns whether or not there is a WAL
 	bool WALStartCheckpoint(MetaBlockPointer meta_block, CheckpointOptions &options);
 	//! Finishes a checkpoint
-	void WALFinishCheckpoint();
+	void WALFinishCheckpoint(lock_guard<mutex> &wal_lock);
 	// Get the WAL lock
 	unique_ptr<lock_guard<mutex>> GetWALLock();
 
diff --git a/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp b/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp
index db959f4cd..45e8c33c2 100644
--- a/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp
@@ -51,9 +51,9 @@ class ChunkInfo {
 	virtual bool Cleanup(transaction_t lowest_transaction) const;
 	virtual string ToString(idx_t max_count) const = 0;
 
-	virtual bool HasDeletes() const = 0;
+	virtual bool HasDeletes(transaction_t transaction_id = MAX_TRANSACTION_ID) const = 0;
 
-	virtual void Write(WriteStream &writer) const;
+	virtual void Write(WriteStream &writer, transaction_t transaction_id) const;
 	static unique_ptr<ChunkInfo> Read(FixedSizeAllocator &allocator, ReadStream &reader);
 
 public:
@@ -95,9 +95,9 @@ class ChunkConstantInfo : public ChunkInfo {
 	bool Cleanup(transaction_t lowest_transaction) const override;
 	string ToString(idx_t max_count) const override;
 
-	bool HasDeletes() const override;
+	bool HasDeletes(transaction_t transaction_id = MAX_TRANSACTION_ID) const override;
 
-	void Write(WriteStream &writer) const override;
+	void Write(WriteStream &writer, transaction_t transaction_id) const override;
 	static unique_ptr<ChunkInfo> Read(ReadStream &reader);
 
 private:
@@ -137,12 +137,12 @@ class ChunkVectorInfo : public ChunkInfo {
 	idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count);
 	void CommitDelete(transaction_t commit_id, const DeleteInfo &info);
 
-	bool HasDeletes() const override;
+	bool HasDeletes(transaction_t transaction_id = MAX_TRANSACTION_ID) const override;
 	bool AnyDeleted() const;
 	bool HasConstantInsertionId() const;
 	transaction_t ConstantInsertId() const;
 
-	void Write(WriteStream &writer) const override;
+	void Write(WriteStream &writer, transaction_t transaction_id) const override;
 	static unique_ptr<ChunkInfo> Read(FixedSizeAllocator &allocator, ReadStream &reader);
 
 private:
diff --git a/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp b/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp
index d3a05eeeb..662a26ebd 100644
--- a/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp
@@ -18,6 +18,7 @@ struct TableDeleteState {
 	bool has_delete_constraints = false;
 	DataChunk verify_chunk;
 	vector<StorageIndex> col_ids;
+	shared_ptr<CheckpointLock> checkpoint_lock;
 };
 
 } // namespace duckdb
diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp
index 836759f0a..2d1f2424d 100644
--- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp
@@ -219,7 +219,7 @@ class RowGroup : public SegmentBase<RowGroup> {
 	static FilterPropagateResult CheckRowIdFilter(const TableFilter &filter, idx_t beg_row, idx_t end_row);
 	idx_t GetColumnCount() const;
 
-	vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
+	vector<MetaBlockPointer> CheckpointDeletes(RowGroupWriter &writer);
 
 private:
 	optional_ptr<RowVersionManager> GetVersionInfo();
diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp
index 28756838d..cd944ff1e 100644
--- a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp
@@ -82,6 +82,7 @@ class RowGroupCollection {
 
 	void Fetch(TransactionData transaction, DataChunk &result, const vector<StorageIndex> &column_ids,
 	           const Vector &row_identifiers, idx_t fetch_count, ColumnFetchState &state);
+
 	//! Returns true, if the row group can fetch the row id for the transaction.
 	bool CanFetch(TransactionData, const row_t row_id);
 
@@ -103,7 +104,7 @@ class RowGroupCollection {
 	bool IsPersistent() const;
 
 	void RemoveFromIndexes(const QueryContext &context, TableIndexList &indexes, Vector &row_identifiers, idx_t count,
-	                       IndexRemovalType removal_type);
+	                       IndexRemovalType removal_type, optional_idx active_checkpoint = optional_idx());
 
 	idx_t Delete(TransactionData transaction, DataTable &table, row_t *ids, idx_t count);
 	void Update(TransactionData transaction, DataTable &table, row_t *ids, const vector<PhysicalIndex> &column_ids,
diff --git a/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp b/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp
index 8856ce57b..ab761179c 100644
--- a/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp
@@ -25,9 +25,6 @@ class RowVersionManager {
 public:
 	explicit RowVersionManager(BufferManager &buffer_manager) noexcept;
 
-	FixedSizeAllocator &GetAllocator() {
-		return allocator;
-	}
 	idx_t GetCommittedDeletedCount(idx_t count);
 
 	bool ShouldCheckpointRowGroup(transaction_t checkpoint_id, idx_t count);
@@ -44,7 +41,7 @@ class RowVersionManager {
 	idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count);
 	void CommitDelete(idx_t vector_idx, transaction_t commit_id, const DeleteInfo &info);
 
-	vector<MetaBlockPointer> Checkpoint(MetadataManager &manager);
+	vector<MetaBlockPointer> Checkpoint(RowGroupWriter &writer);
 	static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager);
 
 	bool HasUnserializedChanges();
@@ -54,10 +51,13 @@ class RowVersionManager {
 	mutex version_lock;
 	FixedSizeAllocator allocator;
 	vector<unique_ptr<ChunkInfo>> vector_info;
-	bool has_unserialized_changes;
+	optional_idx uncheckpointed_delete_commit;
 	vector<MetaBlockPointer> storage_pointers;
 
 private:
+	FixedSizeAllocator &GetAllocator() {
+		return allocator;
+	}
 	optional_ptr<ChunkInfo> GetChunkInfo(idx_t vector_idx);
 	ChunkVectorInfo &GetVectorInfo(idx_t vector_idx);
 	void FillVectorInfo(idx_t vector_idx);
diff --git a/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp b/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp
index 6308dcfa8..7f6228819 100644
--- a/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp
@@ -144,7 +144,15 @@ struct ColumnScanState {
 	idx_t GetPositionInSegment() const;
 };
 
+enum class FetchType {
+	//! Verify if each row is valid for the transaction prior to fetching
+	TRANSACTIONAL_FETCH,
+	// Force fetch the row, regardless of it if is valid for the transaction or not
+	FORCE_FETCH
+};
+
 struct ColumnFetchState {
+	FetchType fetch_type = FetchType::TRANSACTIONAL_FETCH;
 	//! The query context for this fetch
 	QueryContext context;
 	//! The set of pinned block handles for this set of fetches
diff --git a/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp b/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp
index bb26084df..b21e90c08 100644
--- a/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp
+++ b/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp
@@ -34,6 +34,8 @@ struct IndexEntry {
 	unique_ptr<BoundIndex> deleted_rows_in_use;
 	//! Data that was added to the index during the last checkpoint
 	unique_ptr<BoundIndex> added_data_during_checkpoint;
+	//! Data that was removed from the index during the last checkpoint
+	unique_ptr<BoundIndex> removed_data_during_checkpoint;
 	//! The last checkpoint index that was written with this index
 	optional_idx last_written_checkpoint;
 };
@@ -98,7 +100,7 @@ class TableIndexList {
 		index_entries = std::move(other.index_entries);
 	}
 	//! Merge any changes added to deltas during a checkpoint back into the main indexes
-	void MergeCheckpointDeltas(transaction_t checkpoint_id);
+	void MergeCheckpointDeltas(DataTable &storage, transaction_t checkpoint_id);
 	//! Returns true, if all indexes
 	//! Find the foreign key matching the keys.
 	optional_ptr<IndexEntry> FindForeignKeyIndex(const vector<PhysicalIndex> &fk_keys, const ForeignKeyType fk_type);
diff --git a/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp b/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp
index 0d91a9dc0..5998cbe54 100644
--- a/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp
+++ b/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp
@@ -23,14 +23,13 @@ struct UpdateInfo;
 
 class CleanupState {
 public:
-	explicit CleanupState(const QueryContext &context, transaction_t lowest_active_transaction,
+	explicit CleanupState(DuckTransaction &transaction, transaction_t lowest_active_transaction,
 	                      ActiveTransactionState transaction_state);
 
 public:
 	void CleanupEntry(UndoFlags type, data_ptr_t data);
 
 private:
-	QueryContext context;
 	//! Lowest active transaction
 	transaction_t lowest_active_transaction;
 	ActiveTransactionState transaction_state;
diff --git a/src/duckdb/src/include/duckdb/transaction/commit_state.hpp b/src/duckdb/src/include/duckdb/transaction/commit_state.hpp
index 3ad975925..97ea0be77 100644
--- a/src/duckdb/src/include/duckdb/transaction/commit_state.hpp
+++ b/src/duckdb/src/include/duckdb/transaction/commit_state.hpp
@@ -29,7 +29,7 @@ enum class CommitMode { COMMIT, REVERT_COMMIT };
 
 struct IndexDataRemover {
 public:
-	explicit IndexDataRemover(QueryContext context, IndexRemovalType removal_type);
+	explicit IndexDataRemover(DuckTransaction &transaction, QueryContext context, IndexRemovalType removal_type);
 
 	void PushDelete(DeleteInfo &info);
 	void Verify();
@@ -38,6 +38,7 @@ struct IndexDataRemover {
 	void Flush(DataTable &table, row_t *row_numbers, idx_t count);
 
 private:
+	DuckTransaction &transaction;
 	// data for index cleanup
 	QueryContext context;
 	//! While committing, we remove data from any indexes that was deleted
diff --git a/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp b/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp
index f0169b3b8..361424053 100644
--- a/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp
+++ b/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp
@@ -89,9 +89,6 @@ class DuckTransaction : public Transaction {
 	}
 
 	unique_ptr<StorageLockKey> TryGetCheckpointLock();
-	bool HasWriteLock() const {
-		return write_lock.get();
-	}
 
 	//! Get a shared lock on a table
 	shared_ptr<CheckpointLock> SharedLockTable(DataTableInfo &info);
@@ -105,8 +102,10 @@ class DuckTransaction : public Transaction {
 	UndoBuffer undo_buffer;
 	//! The set of uncommitted appends for the transaction
 	unique_ptr<LocalStorage> storage;
-	//! Write lock
-	unique_ptr<StorageLockKey> write_lock;
+	//! Lock that prevents checkpoints from starting
+	unique_ptr<StorageLockKey> checkpoint_lock;
+	//! Lock that prevents vacuums from starting
+	unique_ptr<StorageLockKey> vacuum_lock;
 	//! Lock for accessing sequence_usage
 	mutex sequence_lock;
 	//! Map of all sequences that were used during the transaction and the value they had in this transaction
diff --git a/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp b/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp
index 7466fd254..66cac1943 100644
--- a/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp
+++ b/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp
@@ -82,6 +82,8 @@ class DuckTransactionManager : public TransactionManager {
 	//! Try to obtain an exclusive checkpoint lock
 	unique_ptr<StorageLockKey> TryGetCheckpointLock();
 	unique_ptr<StorageLockKey> TryUpgradeCheckpointLock(StorageLockKey &lock);
+	unique_ptr<StorageLockKey> SharedVacuumLock();
+	unique_ptr<StorageLockKey> TryGetVacuumLock();
 
 	//! Returns the current version of the catalog (incremented whenever anything changes, not stored between restarts)
 	DUCKDB_API idx_t GetCatalogVersion(Transaction &transaction);
@@ -135,6 +137,8 @@ class DuckTransactionManager : public TransactionManager {
 	mutex transaction_lock;
 	//! The checkpoint lock
 	StorageLock checkpoint_lock;
+	//! The vacuum lock - necessary to start vacuum operations
+	StorageLock vacuum_lock;
 	//! Lock necessary to start transactions only - used by FORCE CHECKPOINT to prevent new transactions from starting
 	mutex start_transaction_lock;
 
diff --git a/src/duckdb/src/main/http/http_util.cpp b/src/duckdb/src/main/http/http_util.cpp
index fb5a9491f..f562dd8cc 100644
--- a/src/duckdb/src/main/http/http_util.cpp
+++ b/src/duckdb/src/main/http/http_util.cpp
@@ -123,7 +123,7 @@ unique_ptr<HTTPResponse> HTTPUtil::Request(BaseRequest &request, unique_ptr<HTTP
 }
 
 BaseRequest::BaseRequest(RequestType type, const string &url, const HTTPHeaders &headers, HTTPParams &params)
-    : type(type), url(url), headers(headers), params(params) {
+    : type(type), url(url), headers(MergeHeaders(headers, params)), params(params) {
 	HTTPUtil::DecomposeURL(url, path, proto_host_port);
 }
 
@@ -191,9 +191,6 @@ class HTTPLibClient : public HTTPClient {
 		for (auto &entry : header_map) {
 			headers.insert(entry);
 		}
-		for (auto &entry : params.extra_headers) {
-			headers.insert(entry);
-		}
 		return headers;
 	}
 
diff --git a/src/duckdb/src/optimizer/count_window_elimination.cpp b/src/duckdb/src/optimizer/count_window_elimination.cpp
new file mode 100644
index 000000000..53eaf0f00
--- /dev/null
+++ b/src/duckdb/src/optimizer/count_window_elimination.cpp
@@ -0,0 +1,271 @@
+#include "duckdb/optimizer/count_window_elimination.hpp"
+#include "duckdb/optimizer/optimizer.hpp"
+#include "duckdb/planner/binder.hpp"
+#include "duckdb/planner/operator/logical_filter.hpp"
+#include "duckdb/planner/operator/logical_window.hpp"
+#include "duckdb/planner/operator/logical_comparison_join.hpp"
+#include "duckdb/planner/operator/logical_aggregate.hpp"
+#include "duckdb/planner/operator/logical_projection.hpp"
+#include "duckdb/planner/expression/bound_window_expression.hpp"
+#include "duckdb/planner/expression/bound_comparison_expression.hpp"
+#include "duckdb/planner/expression/bound_constant_expression.hpp"
+#include "duckdb/planner/expression/bound_columnref_expression.hpp"
+#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
+#include "duckdb/function/aggregate_function.hpp"
+#include "duckdb/function/aggregate_state.hpp"
+#include "duckdb/planner/operator/logical_cross_product.hpp"
+#include "duckdb/planner/operator/logical_dummy_scan.hpp"
+#include "duckdb/planner/logical_operator_visitor.hpp"
+#include "duckdb/planner/operator/logical_get.hpp"
+
+namespace duckdb {
+
+class CountWindowTableRebinder : public LogicalOperatorVisitor {
+public:
+	explicit CountWindowTableRebinder(Optimizer &optimizer) : optimizer(optimizer) {
+	}
+
+	unordered_map<idx_t, idx_t> table_map;
+	Optimizer &optimizer;
+
+	void VisitOperator(LogicalOperator &op) override {
+		// Rebind definitions
+		if (op.type == LogicalOperatorType::LOGICAL_GET) {
+			auto &get = op.Cast<LogicalGet>();
+			auto new_idx = optimizer.binder.GenerateTableIndex();
+			table_map[get.table_index] = new_idx;
+			get.table_index = new_idx;
+		}
+		if (op.type == LogicalOperatorType::LOGICAL_PROJECTION) {
+			auto &proj = op.Cast<LogicalProjection>();
+			auto new_idx = optimizer.binder.GenerateTableIndex();
+			table_map[proj.table_index] = new_idx;
+			proj.table_index = new_idx;
+		}
+		if (op.type == LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY) {
+			auto &agg = op.Cast<LogicalAggregate>();
+			auto new_agg_idx = optimizer.binder.GenerateTableIndex();
+			auto new_grp_idx = optimizer.binder.GenerateTableIndex();
+			table_map[agg.aggregate_index] = new_agg_idx;
+			table_map[agg.group_index] = new_grp_idx;
+			agg.aggregate_index = new_agg_idx;
+			agg.group_index = new_grp_idx;
+		}
+		// TODO: Handle other operators defining tables if needed
+		// But Get/Projection/Aggregate are most common in subplans.
+
+		VisitOperatorChildren(op);
+		VisitOperatorExpressions(op);
+	}
+
+	void VisitExpression(unique_ptr<Expression> *expression) override {
+		auto &expr = *expression;
+		if (expr->GetExpressionClass() == ExpressionClass::BOUND_COLUMN_REF) {
+			auto &bound = expr->Cast<BoundColumnRefExpression>();
+			if (table_map.count(bound.binding.table_index)) {
+				bound.binding.table_index = table_map[bound.binding.table_index];
+			}
+		}
+		VisitExpressionChildren(**expression);
+	}
+};
+
+WindowSelfJoinOptimizer::WindowSelfJoinOptimizer(Optimizer &optimizer) : optimizer(optimizer) {
+}
+
+unique_ptr<LogicalOperator> WindowSelfJoinOptimizer::Optimize(unique_ptr<LogicalOperator> op) {
+	ColumnBindingReplacer replacer;
+	op = OptimizeInternal(std::move(op), replacer);
+	if (!replacer.replacement_bindings.empty()) {
+		replacer.VisitOperator(*op);
+	}
+	return op;
+}
+
+unique_ptr<LogicalOperator> WindowSelfJoinOptimizer::OptimizeInternal(unique_ptr<LogicalOperator> op,
+                                                                      ColumnBindingReplacer &replacer) {
+	if (op->type == LogicalOperatorType::LOGICAL_FILTER) {
+		auto &filter = op->Cast<LogicalFilter>();
+		if (filter.expressions.size() == 1 && filter.children.size() == 1 &&
+		    filter.children[0]->type == LogicalOperatorType::LOGICAL_WINDOW) {
+			auto &window = filter.children[0]->Cast<LogicalWindow>();
+
+			// Check recursively
+			window.children[0] = OptimizeInternal(std::move(window.children[0]), replacer);
+
+			if (window.expressions.size() != 1) {
+				return op;
+			}
+			if (window.expressions[0]->type != ExpressionType::WINDOW_AGGREGATE) {
+				return op;
+			}
+
+			// We can only optimize if there is a single window function equality comparison
+			// Check matches
+			if (filter.expressions[0]->type != ExpressionType::COMPARE_EQUAL) {
+				return op;
+			}
+			auto &comp = filter.expressions[0]->Cast<BoundComparisonExpression>();
+			if (comp.left->type != ExpressionType::BOUND_COLUMN_REF) {
+				return op;
+			}
+			auto &col_ref = comp.left->Cast<BoundColumnRefExpression>();
+			auto t_idx = col_ref.binding.table_index;
+			auto c_idx = col_ref.binding.column_index;
+			auto w_idx = window.window_index;
+
+			if (t_idx != w_idx || c_idx != 0) {
+				return op;
+			}
+
+			// Check right side is constant 1
+			if (comp.right->type != ExpressionType::VALUE_CONSTANT) {
+				return op;
+			}
+			auto &const_expr = comp.right->Cast<BoundConstantExpression>();
+			if (!const_expr.value.type().IsIntegral()) {
+				return op;
+			}
+			if (const_expr.value.GetValue<int64_t>() != 1) {
+				return op;
+			}
+
+			auto &w_expr = window.expressions[0]->Cast<BoundWindowExpression>();
+			if (w_expr.aggregate->name != "count" && w_expr.aggregate->name != "count_star") {
+				return op;
+			}
+			if (!w_expr.orders.empty()) {
+				return op;
+			}
+			if (w_expr.partitions.empty()) {
+				return op;
+			}
+
+			// --- Transformation ---
+
+			auto original_child = std::move(window.children[0]);
+			auto copy_child = original_child->Copy(optimizer.context);
+
+			// Rebind copy_child to avoid duplicate table indices
+			CountWindowTableRebinder rebinder(optimizer);
+			rebinder.VisitOperator(*copy_child);
+
+			auto aggregate_index = optimizer.binder.GenerateTableIndex();
+			auto group_index = optimizer.binder.GenerateTableIndex();
+
+			vector<unique_ptr<Expression>> groups;
+			vector<unique_ptr<Expression>> aggregates;
+
+			// Create Aggregate Operator
+			for (auto &part : w_expr.partitions) {
+				auto part_copy = part->Copy();
+				rebinder.VisitExpression(&part_copy); // Update bindings
+				groups.push_back(std::move(part_copy));
+			}
+
+			auto count_func = *w_expr.aggregate;
+			unique_ptr<FunctionData> bind_info;
+			if (w_expr.bind_info) {
+				bind_info = w_expr.bind_info->Copy();
+			} else {
+				bind_info = nullptr;
+			}
+
+			vector<unique_ptr<Expression>> children;
+			for (auto &child : w_expr.children) {
+				auto child_copy = child->Copy();
+				rebinder.VisitExpression(&child_copy); // Update bindings
+				children.push_back(std::move(child_copy));
+			}
+
+			auto aggr_type = w_expr.distinct ? AggregateType::DISTINCT : AggregateType::NON_DISTINCT;
+
+			auto agg_expr = make_uniq<BoundAggregateExpression>(std::move(count_func), std::move(children), nullptr,
+			                                                    std::move(bind_info), aggr_type);
+
+			aggregates.push_back(std::move(agg_expr));
+
+			// args: group_index, aggregate_index, ...
+			auto agg_op = make_uniq<LogicalAggregate>(group_index, aggregate_index, std::move(aggregates));
+
+			agg_op->groups = std::move(groups);
+			agg_op->children.push_back(std::move(copy_child));
+			agg_op->ResolveOperatorTypes();
+
+			if (agg_op->types.size() <= agg_op->groups.size()) {
+				throw InternalException("LogicalAggregate types size mismatch");
+			}
+
+			// Filter on aggregate: count = 1
+			// Count is the first aggregate, so it's at agg_op->groups.size() in the types list
+			// Bindings: Aggregates are at aggregate_index
+			auto cnt_ref = make_uniq<BoundColumnRefExpression>(agg_op->types[agg_op->groups.size()],
+			                                                   ColumnBinding(aggregate_index, 0));
+
+			auto filter_expr =
+			    make_uniq<BoundComparisonExpression>(ExpressionType::COMPARE_EQUAL, std::move(cnt_ref),
+			                                         make_uniq<BoundConstantExpression>(Value::BIGINT(1)));
+
+			auto rhs_filter = make_uniq<LogicalFilter>();
+			rhs_filter->expressions.push_back(std::move(filter_expr));
+			rhs_filter->children.push_back(std::move(agg_op));
+			rhs_filter->ResolveOperatorTypes();
+
+			// Semi Join
+			auto join = make_uniq<LogicalComparisonJoin>(JoinType::SEMI);
+
+			for (size_t i = 0; i < w_expr.partitions.size(); ++i) {
+				JoinCondition cond;
+				cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
+				cond.left = w_expr.partitions[i]->Copy();
+				cond.right = make_uniq<BoundColumnRefExpression>(w_expr.partitions[i]->return_type,
+				                                                 ColumnBinding(group_index, i));
+				join->conditions.push_back(std::move(cond));
+			}
+
+			join->children.push_back(std::move(original_child));
+			join->children.push_back(std::move(rhs_filter));
+			join->ResolveOperatorTypes();
+
+			// Create Constant 1
+			auto dummy_index = optimizer.binder.GenerateTableIndex();
+			auto dummy = make_uniq<LogicalDummyScan>(dummy_index);
+			dummy->ResolveOperatorTypes();
+
+			auto const_one = make_uniq<BoundConstantExpression>(Value::BIGINT(1));
+			const_one->alias = "count_window_result";
+
+			auto proj_index = optimizer.binder.GenerateTableIndex();
+			vector<unique_ptr<Expression>> proj_expressions;
+			proj_expressions.push_back(std::move(const_one));
+
+			auto projection = make_uniq<LogicalProjection>(proj_index, std::move(proj_expressions));
+			projection->children.push_back(std::move(dummy));
+			projection->ResolveOperatorTypes();
+
+			// Cross Product
+			auto cross = make_uniq<LogicalCrossProduct>(std::move(join), std::move(projection));
+			cross->ResolveOperatorTypes();
+
+			// Replace Count binding
+			// Old window column: (window.window_index, 0)
+			// New constant column: (proj_index, 0)
+			ColumnBinding old_binding(window.window_index, 0);
+			ColumnBinding new_binding(proj_index, 0);
+
+			replacer.replacement_bindings.emplace_back(old_binding, new_binding);
+
+			// We do NOT need to replace other bindings because CrossProduct preserves left child bindings,
+			// and Window (presumably) passed through input bindings without re-binding.
+
+			return std::move(cross);
+		}
+	} else if (!op->children.empty()) {
+		for (auto &child : op->children) {
+			child = OptimizeInternal(std::move(child), replacer);
+		}
+	}
+	return op;
+}
+
+} // namespace duckdb
diff --git a/src/duckdb/src/optimizer/filter_combiner.cpp b/src/duckdb/src/optimizer/filter_combiner.cpp
index f7099c9a1..e2480b963 100644
--- a/src/duckdb/src/optimizer/filter_combiner.cpp
+++ b/src/duckdb/src/optimizer/filter_combiner.cpp
@@ -367,7 +367,7 @@ FilterPushdownResult FilterCombiner::TryPushdownConstantFilter(TableFilterSet &t
 void ReplaceWithBoundReference(unique_ptr<Expression> &root_expr) {
 	ExpressionIterator::VisitExpressionMutable<BoundColumnRefExpression>(
 	    root_expr, [&](BoundColumnRefExpression &col_ref, unique_ptr<Expression> &expr) {
-		    expr = make_uniq<BoundReferenceExpression>(col_ref.return_type, 0ULL);
+		    expr = make_uniq<BoundReferenceExpression>(col_ref.alias, col_ref.return_type, 0ULL);
 	    });
 }
 
diff --git a/src/duckdb/src/optimizer/optimizer.cpp b/src/duckdb/src/optimizer/optimizer.cpp
index 9bf4fcf8d..4f811bd84 100644
--- a/src/duckdb/src/optimizer/optimizer.cpp
+++ b/src/duckdb/src/optimizer/optimizer.cpp
@@ -38,6 +38,7 @@
 #include "duckdb/optimizer/unnest_rewriter.hpp"
 #include "duckdb/optimizer/late_materialization.hpp"
 #include "duckdb/optimizer/common_subplan_optimizer.hpp"
+#include "duckdb/optimizer/count_window_elimination.hpp"
 #include "duckdb/planner/binder.hpp"
 #include "duckdb/planner/planner.hpp"
 
@@ -186,6 +187,11 @@ void Optimizer::RunBuiltInOptimizers() {
 		plan = empty_result_pullup.Optimize(std::move(plan));
 	});
 
+	RunOptimizer(OptimizerType::COUNT_WINDOW_ELIMINATION, [&]() {
+		WindowSelfJoinOptimizer window_self_join_optimizer(*this);
+		plan = window_self_join_optimizer.Optimize(std::move(plan));
+	});
+
 	// then we perform the join ordering optimization
 	// this also rewrites cross products + filters into joins and performs filter pushdowns
 	RunOptimizer(OptimizerType::JOIN_ORDER, [&]() {
diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp
index 105953fe4..708221883 100644
--- a/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp
+++ b/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp
@@ -15,13 +15,15 @@
 
 namespace duckdb {
 
-static void GetColumnIndex(unique_ptr<Expression> &expr, idx_t &index) {
+static void GetColumnIndex(unique_ptr<Expression> &expr, idx_t &index, string &alias) {
 	if (expr->type == ExpressionType::BOUND_REF) {
 		auto &bound_ref = expr->Cast<BoundReferenceExpression>();
 		index = bound_ref.index;
+		alias = bound_ref.alias;
 		return;
 	}
-	ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr<Expression> &child) { GetColumnIndex(child, index); });
+	ExpressionIterator::EnumerateChildren(*expr,
+	                                      [&](unique_ptr<Expression> &child) { GetColumnIndex(child, index, alias); });
 }
 
 FilterPropagateResult StatisticsPropagator::PropagateTableFilter(ColumnBinding stats_binding, BaseStatistics &stats,
@@ -32,15 +34,16 @@ FilterPropagateResult StatisticsPropagator::PropagateTableFilter(ColumnBinding s
 		// get physical storage index of the filter
 		// since it is a table filter, every storage index is the same
 		idx_t physical_index = DConstants::INVALID_INDEX;
-		GetColumnIndex(expr_filter.expr, physical_index);
+		string column_alias;
+		GetColumnIndex(expr_filter.expr, physical_index, column_alias);
 		D_ASSERT(physical_index != DConstants::INVALID_INDEX);
 
-		auto column_ref = make_uniq<BoundColumnRefExpression>(stats.GetType(), stats_binding);
+		auto column_ref = make_uniq<BoundColumnRefExpression>(column_alias, stats.GetType(), stats_binding);
 		auto filter_expr = expr_filter.ToExpression(*column_ref);
 		// handle the filter before updating the statistics
 		// otherwise the filter can be pruned by the updated statistics
 		auto propagate_result = HandleFilter(filter_expr);
-		auto colref = make_uniq<BoundReferenceExpression>(stats.GetType(), physical_index);
+		auto colref = make_uniq<BoundReferenceExpression>(column_alias, stats.GetType(), physical_index);
 		UpdateFilterStatistics(*filter_expr);
 
 		// replace BoundColumnRefs with BoundRefs
diff --git a/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp b/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp
index 8e676f3c8..203d90fdc 100644
--- a/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp
+++ b/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp
@@ -2,8 +2,13 @@
 #include "duckdb/parser/expression/cast_expression.hpp"
 #include "duckdb/parser/expression/columnref_expression.hpp"
 #include "duckdb/parser/statement/alter_statement.hpp"
+#include "duckdb/parser/sql_statement.hpp"
 #include "duckdb/parser/transformer.hpp"
 #include "duckdb/common/exception/parser_exception.hpp"
+#include "duckdb/parser/expression/constant_expression.hpp"
+#include "duckdb/parser/statement/multi_statement.hpp"
+#include "duckdb/parser/statement/update_statement.hpp"
+#include "duckdb/parser/tableref/basetableref.hpp"
 
 namespace duckdb {
 
@@ -19,7 +24,60 @@ vector<string> Transformer::TransformNameList(duckdb_libpgquery::PGList &list) {
 	return result;
 }
 
-unique_ptr<AlterStatement> Transformer::TransformAlter(duckdb_libpgquery::PGAlterTableStmt &stmt) {
+void AddToMultiStatement(const unique_ptr<MultiStatement> &multi_statement, unique_ptr<AlterInfo> alter_info) {
+	auto alter_statement = make_uniq<AlterStatement>();
+	alter_statement->info = std::move(alter_info);
+	multi_statement->statements.push_back(std::move(alter_statement));
+}
+
+void AddUpdateToMultiStatement(const unique_ptr<MultiStatement> &multi_statement, const string &column_name,
+                               const string &table_name, const unique_ptr<ParsedExpression> &original_expression) {
+	auto update_statement = make_uniq<UpdateStatement>();
+
+	auto table_ref = make_uniq<BaseTableRef>();
+
+	table_ref->table_name = table_name;
+	update_statement->table = std::move(table_ref);
+
+	auto set_info = make_uniq<UpdateSetInfo>();
+	set_info->columns.push_back(column_name);
+	set_info->expressions.push_back(original_expression->Copy());
+	update_statement->set_info = std::move(set_info);
+
+	multi_statement->statements.push_back(std::move(update_statement));
+}
+
+unique_ptr<MultiStatement> TransformAndMaterializeAlter(const duckdb_libpgquery::PGAlterTableStmt &stmt,
+                                                        AlterEntryData &data,
+                                                        unique_ptr<AlterInfo> info_with_null_placeholder,
+                                                        const string &column_name,
+                                                        unique_ptr<ParsedExpression> expression) {
+	auto multi_statement = make_uniq<MultiStatement>();
+	/* Here we do a workaround that consists of the following statements:
+	 *	 1. `ALTER TABLE t ADD COLUMN col <type> DEFAULT NULL;`
+	 *	 2. `UPDATE t SET u = <expression>;`
+	 *	 3. `ALTER TABLE t ALTER u SET DEFAULT <expression>;`
+	 *
+	 * This workaround exists because, when statements like this were executed:
+	 *	`ALTER TABLE ... ADD COLUMN ... DEFAULT <expression>`
+	 * the WAL replay would re-run the default expression, and with expressions such as RANDOM or CURRENT_TIMESTAMP, the
+	 * value would be different from that of the original run. By now doing an UPDATE, we force materialization of these
+	 * values, which makes WAL replays consistent.
+	 */
+
+	// 1. `ALTER TABLE t ADD COLUMN col <type> DEFAULT NULL;`
+	AddToMultiStatement(multi_statement, std::move(info_with_null_placeholder));
+
+	// 2. `UPDATE t SET u = <expression>;`
+	AddUpdateToMultiStatement(multi_statement, column_name, stmt.relation->relname, expression);
+
+	// 3. `ALTER TABLE t ALTER u SET DEFAULT <expression>;`
+	// Reinstate the original default expression.
+	AddToMultiStatement(multi_statement, make_uniq<SetDefaultInfo>(data, column_name, std::move(expression)));
+	return multi_statement;
+}
+
+unique_ptr<SQLStatement> Transformer::TransformAlter(duckdb_libpgquery::PGAlterTableStmt &stmt) {
 	D_ASSERT(stmt.relation);
 	if (stmt.cmds->length != 1) {
 		throw ParserException("Only one ALTER command per statement is supported");
@@ -62,7 +120,18 @@ unique_ptr<AlterStatement> Transformer::TransformAlter(duckdb_libpgquery::PGAlte
 			column_entry.SetName(column_names.back());
 			if (column_names.size() == 1) {
 				// ADD COLUMN
-				result->info = make_uniq<AddColumnInfo>(std::move(data), std::move(column_entry), command->missing_ok);
+				if (!column_entry.HasDefaultValue() ||
+				    column_entry.DefaultValue().GetExpressionClass() == ExpressionClass::CONSTANT) {
+					result->info =
+					    make_uniq<AddColumnInfo>(std::move(data), std::move(column_entry), command->missing_ok);
+					break;
+				}
+				auto null_column = column_entry.Copy();
+				null_column.SetDefaultValue(make_uniq<ConstantExpression>(ConstantExpression(Value(nullptr))));
+				return unique_ptr<SQLStatement>(std::move(TransformAndMaterializeAlter(
+				    stmt, data, make_uniq<AddColumnInfo>(data, std::move(null_column), command->missing_ok),
+				    column_entry.GetName(), column_entry.DefaultValue().Copy())));
+
 			} else {
 				// ADD FIELD
 				column_names.pop_back();
@@ -158,7 +227,7 @@ unique_ptr<AlterStatement> Transformer::TransformAlter(duckdb_libpgquery::PGAlte
 			throw NotImplementedException("No support for that ALTER TABLE option yet!");
 		}
 	}
-	return result;
+	return unique_ptr<SQLStatement>(std::move(result));
 }
 
 } // namespace duckdb
diff --git a/src/duckdb/src/planner/planner.cpp b/src/duckdb/src/planner/planner.cpp
index ca5e72d88..e6794dcfc 100644
--- a/src/duckdb/src/planner/planner.cpp
+++ b/src/duckdb/src/planner/planner.cpp
@@ -14,7 +14,7 @@
 #include "duckdb/transaction/meta_transaction.hpp"
 #include "duckdb/execution/column_binding_resolver.hpp"
 #include "duckdb/main/attached_database.hpp"
-
+#include "duckdb/parser/statement/multi_statement.hpp"
 #include "duckdb/planner/subquery/flatten_dependent_join.hpp"
 
 namespace duckdb {
diff --git a/src/duckdb/src/storage/checkpoint_manager.cpp b/src/duckdb/src/storage/checkpoint_manager.cpp
index 854a1c11a..fb9859315 100644
--- a/src/duckdb/src/storage/checkpoint_manager.cpp
+++ b/src/duckdb/src/storage/checkpoint_manager.cpp
@@ -263,10 +263,19 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
 	}
 
 	// truncate the WAL
+	unique_ptr<lock_guard<mutex>> wal_lock;
 	if (has_wal) {
-		storage_manager.WALFinishCheckpoint();
+		wal_lock = storage_manager.GetWALLock();
+		storage_manager.WALFinishCheckpoint(*wal_lock);
 	}
 
+	// FIXME: hold the WAL lock while we are merging checkpoint deltas
+	// this prevents any commits from happening while this is going on
+	// this is currently required because of the way that "deletes + inserts" of the same row are processed
+	// currently we FIRST append the new (duplicate) insert, THEN delete the old value
+	// if we append the duplicate value, then call MergeCheckpointDeltas, that will fail with a duplicate entry error
+	// we can fix this and stop holding the WAL lock once we fix / remove that order of operations in the commit
+
 	// for any indexes that were appended to while checkpointing, merge the delta back into the main index
 	// FIXME: we only clean up appends made to tables that are part of this checkpoint
 	// Currently, that is correct, since we don't allow creating tables DURING a checkpoint
@@ -283,7 +292,7 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
 		auto &storage = table.GetStorage();
 		auto &table_info = storage.GetDataTableInfo();
 		auto &index_list = table_info->GetIndexes();
-		index_list.MergeCheckpointDeltas(options.transaction_id);
+		index_list.MergeCheckpointDeltas(storage, options.transaction_id);
 	}
 }
 
diff --git a/src/duckdb/src/storage/compression/numeric_constant.cpp b/src/duckdb/src/storage/compression/numeric_constant.cpp
index f9cc79b47..e13a0748f 100644
--- a/src/duckdb/src/storage/compression/numeric_constant.cpp
+++ b/src/duckdb/src/storage/compression/numeric_constant.cpp
@@ -160,7 +160,8 @@ void ConstantFun::FiltersNullValues(const LogicalType &type, const TableFilter &
 		auto &expr_filter = filter.Cast<ExpressionFilter>();
 		auto &state = filter_state.Cast<ExpressionFilterState>();
 		Value val(type);
-		filters_nulls = expr_filter.EvaluateWithConstant(state.executor, val);
+		//! If the expression evaluates to true, containing only a NULL vector, it *must* be an IS NULL filter
+		filters_nulls = !expr_filter.EvaluateWithConstant(state.executor, val);
 		filters_valid_values = false;
 		break;
 	}
diff --git a/src/duckdb/src/storage/data_table.cpp b/src/duckdb/src/storage/data_table.cpp
index aaae4de5b..330b2a882 100644
--- a/src/duckdb/src/storage/data_table.cpp
+++ b/src/duckdb/src/storage/data_table.cpp
@@ -142,11 +142,6 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, BoundConstraint
     : db(parent.db), info(parent.info), row_groups(parent.row_groups), version(DataTableVersion::MAIN_TABLE) {
 	// ALTER COLUMN to add a new constraint.
 
-	// Clone the storage info vector or the table.
-	for (const auto &index_info : parent.info->index_storage_infos) {
-		info->index_storage_infos.push_back(IndexStorageInfo(index_info.name));
-	}
-
 	// Bind all indexes.
 	info->BindIndexes(context);
 
@@ -268,7 +263,8 @@ idx_t DataTable::MaxThreads(ClientContext &context) const {
 	return GetTotalRows() / parallel_scan_tuple_count + 1;
 }
 
-void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScanState &state) {
+void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScanState &state,
+                                       const vector<ColumnIndex> &column_indexes) {
 	auto &local_storage = LocalStorage::Get(context, db);
 	row_groups->InitializeParallelScan(state.scan_state);
 
@@ -426,6 +422,12 @@ void DataTable::Fetch(DuckTransaction &transaction, DataChunk &result, const vec
 	row_groups->Fetch(transaction, result, column_ids, row_identifiers, fetch_count, state);
 }
 
+void DataTable::FetchCommitted(DataChunk &result, const vector<StorageIndex> &column_ids, const Vector &row_identifiers,
+                               idx_t fetch_count, ColumnFetchState &state) {
+	TransactionData commit_transaction(MAX_TRANSACTION_ID, TRANSACTION_ID_START - 1);
+	row_groups->Fetch(commit_transaction, result, column_ids, row_identifiers, fetch_count, state);
+}
+
 bool DataTable::CanFetch(DuckTransaction &transaction, const row_t row_id) {
 	return row_groups->CanFetch(transaction, row_id);
 }
@@ -683,21 +685,26 @@ void DataTable::VerifyUniqueIndexes(TableIndexList &indexes, optional_ptr<LocalT
                                     optional_ptr<ConflictManager> manager) {
 	// Verify the constraint without a conflict manager.
 	if (!manager) {
-		return indexes.Scan([&](Index &index) {
+		return indexes.ScanEntries([&](IndexEntry &entry) {
+			auto &index = *entry.index;
 			if (!index.IsUnique() || index.GetIndexType() != ART::TYPE_NAME) {
 				return false;
 			}
 			D_ASSERT(index.IsBound());
 			auto &art = index.Cast<ART>();
+
+			lock_guard<mutex> guard(entry.lock);
+			IndexAppendInfo index_append_info;
 			if (storage) {
 				auto delete_index = storage->delete_indexes.Find(art.GetIndexName());
-				D_ASSERT(!delete_index || delete_index->IsBound());
-				IndexAppendInfo index_append_info(IndexAppendMode::DEFAULT, delete_index);
-				art.VerifyAppend(chunk, index_append_info, nullptr);
-			} else {
-				IndexAppendInfo index_append_info;
-				art.VerifyAppend(chunk, index_append_info, nullptr);
+				if (delete_index) {
+					index_append_info.delete_indexes.push_back(*delete_index);
+				}
+			}
+			if (entry.removed_data_during_checkpoint) {
+				index_append_info.delete_indexes.push_back(*entry.removed_data_during_checkpoint);
 			}
+			art.VerifyAppend(chunk, index_append_info, nullptr);
 			return false;
 		});
 	}
@@ -729,9 +736,8 @@ void DataTable::VerifyUniqueIndexes(TableIndexList &indexes, optional_ptr<LocalT
 	manager->SetMode(ConflictManagerMode::SCAN);
 	auto &matching_indexes = manager->MatchingIndexes();
 	auto &matching_delete_indexes = manager->MatchingDeleteIndexes();
-	IndexAppendInfo index_append_info(IndexAppendMode::DEFAULT, nullptr);
 	for (idx_t i = 0; i < matching_indexes.size(); i++) {
-		index_append_info.delete_index = matching_delete_indexes[i];
+		IndexAppendInfo index_append_info(IndexAppendMode::DEFAULT, matching_delete_indexes[i]);
 		matching_indexes[i].get().VerifyAppend(chunk, index_append_info, *manager);
 	}
 
@@ -1243,22 +1249,24 @@ ErrorData DataTable::AppendToIndexes(TableIndexList &indexes, optional_ptr<Table
 			}
 		}
 		optional_ptr<BoundIndex> append_index = bound_index;
-		optional_ptr<BoundIndex> lookup_index;
+		optional_ptr<BoundIndex> lookup_index, lookup_delete_index;
 		// check if there's an on-going checkpoint
-		if (active_checkpoint.IsValid() && bound_index.RequiresTransactionality()) {
-			// check if we've already written this index during the on-going checkpoint
+		if (active_checkpoint.IsValid() && bound_index.SupportsDeltaIndexes()) {
+			// there's an ongoing checkpoint - check if we need to use delta indexes or if we can write to the main
+			// index
 			if (!entry.last_written_checkpoint.IsValid() ||
 			    entry.last_written_checkpoint.GetIndex() != active_checkpoint.GetIndex()) {
-				// there's an on-going checkpoint and we haven't written the index to disk yet
+				// there's an on-going checkpoint and we haven't flushed the index yet
 				// we need to append to the "added_data_during_checkpoint" instead
 				// create it if it does not exist
 				if (!entry.added_data_during_checkpoint) {
 					entry.added_data_during_checkpoint =
-					    bound_index.CreateEmptyCopy("added_during_checkpoint_", bound_index.index_constraint_type);
+					    bound_index.CreateDeltaIndex(DeltaIndexType::ADDED_DURING_CHECKPOINT);
 				}
 				if (bound_index.IsUnique()) {
 					// before appending we still need to look-up in the main index to verify there are no conflicts
 					lookup_index = bound_index;
+					lookup_delete_index = delete_index;
 				}
 				append_index = entry.added_data_during_checkpoint;
 			}
@@ -1268,8 +1276,14 @@ ErrorData DataTable::AppendToIndexes(TableIndexList &indexes, optional_ptr<Table
 			if (lookup_index) {
 				// if there's a look-up index - first verify we can append to that index before actually appending to
 				// the main index
-				IndexAppendInfo index_append_info(IndexAppendMode::DEFAULT, nullptr);
-				lookup_index->VerifyAppend(table_chunk, index_append_info, nullptr);
+				IndexAppendInfo lookup_append_info;
+				if (lookup_delete_index) {
+					lookup_append_info.delete_indexes.push_back(*lookup_delete_index);
+				}
+				if (entry.removed_data_during_checkpoint) {
+					lookup_append_info.delete_indexes.push_back(*entry.removed_data_during_checkpoint);
+				}
+				lookup_index->VerifyAppend(table_chunk, lookup_append_info, nullptr);
 			}
 
 			// Append the mock chunk containing empty columns for non-key columns.
@@ -1330,9 +1344,9 @@ void DataTable::RevertIndexAppend(TableAppendState &state, DataChunk &chunk, Vec
 }
 
 void DataTable::RemoveFromIndexes(const QueryContext &context, Vector &row_identifiers, idx_t count,
-                                  IndexRemovalType removal_type) {
+                                  IndexRemovalType removal_type, optional_idx active_checkpoint) {
 	D_ASSERT(IsMainTable());
-	row_groups->RemoveFromIndexes(context, info->indexes, row_identifiers, count, removal_type);
+	row_groups->RemoveFromIndexes(context, info->indexes, row_identifiers, count, removal_type, active_checkpoint);
 }
 
 //===--------------------------------------------------------------------===//
@@ -1382,6 +1396,7 @@ void DataTable::VerifyDeleteConstraints(optional_ptr<LocalTableStorage> storage,
 
 unique_ptr<TableDeleteState> DataTable::InitializeDelete(TableCatalogEntry &table, ClientContext &context,
                                                          const vector<unique_ptr<BoundConstraint>> &bound_constraints) {
+	auto &transaction = DuckTransaction::Get(context, db);
 	// Bind all indexes.
 	info->BindIndexes(context);
 
@@ -1398,6 +1413,7 @@ unique_ptr<TableDeleteState> DataTable::InitializeDelete(TableCatalogEntry &tabl
 		result->verify_chunk.Initialize(Allocator::Get(context), types);
 		result->constraint_state = make_uniq<ConstraintState>(table, bound_constraints);
 	}
+	result->checkpoint_lock = transaction.SharedLockTable(*info);
 	return result;
 }
 
diff --git a/src/duckdb/src/storage/local_storage.cpp b/src/duckdb/src/storage/local_storage.cpp
index 6cb4b82e1..d72374a50 100644
--- a/src/duckdb/src/storage/local_storage.cpp
+++ b/src/duckdb/src/storage/local_storage.cpp
@@ -30,31 +30,19 @@ LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &table)
 		if (constraint == IndexConstraintType::NONE) {
 			return false;
 		}
-		if (index.GetIndexType() != ART::TYPE_NAME) {
-			return false;
-		}
 		if (!index.IsBound()) {
 			return false;
 		}
-		auto &art = index.Cast<ART>();
-
-		// UNIQUE constraint.
-		vector<unique_ptr<Expression>> expressions;
-		vector<unique_ptr<Expression>> delete_expressions;
-		for (auto &expr : art.unbound_expressions) {
-			expressions.push_back(expr->Copy());
-			delete_expressions.push_back(expr->Copy());
+		auto &bound_index = index.Cast<BoundIndex>();
+		if (!bound_index.SupportsDeltaIndexes()) {
+			return false;
 		}
 
 		// Create a delete index and a local index.
-		auto &name = art.GetIndexName();
-		auto &io_manager = art.table_io_manager;
-		auto delete_index =
-		    make_uniq<ART>(name, constraint, art.GetColumnIds(), io_manager, std::move(delete_expressions), art.db);
+		auto delete_index = bound_index.CreateDeltaIndex(DeltaIndexType::LOCAL_DELETE);
 		delete_indexes.AddIndex(std::move(delete_index));
 
-		auto append_index =
-		    make_uniq<ART>(name, constraint, art.GetColumnIds(), io_manager, std::move(expressions), art.db);
+		auto append_index = bound_index.CreateDeltaIndex(DeltaIndexType::LOCAL_APPEND);
 		append_indexes.AddIndex(std::move(append_index));
 		return false;
 	});
diff --git a/src/duckdb/src/storage/storage_manager.cpp b/src/duckdb/src/storage/storage_manager.cpp
index b4e9b521a..339bd6152 100644
--- a/src/duckdb/src/storage/storage_manager.cpp
+++ b/src/duckdb/src/storage/storage_manager.cpp
@@ -179,8 +179,7 @@ bool StorageManager::WALStartCheckpoint(MetaBlockPointer meta_block, CheckpointO
 	return true;
 }
 
-void StorageManager::WALFinishCheckpoint() {
-	lock_guard<mutex> guard(wal_lock);
+void StorageManager::WALFinishCheckpoint(lock_guard<mutex> &) {
 	D_ASSERT(wal.get());
 
 	// "wal" points to the checkpoint WAL
@@ -616,6 +615,20 @@ void SingleFileStorageManager::CreateCheckpoint(QueryContext context, Checkpoint
 	if (read_only || !load_complete) {
 		return;
 	}
+	unique_ptr<StorageLockKey> vacuum_lock;
+	if (options.type != CheckpointType::CONCURRENT_CHECKPOINT) {
+		auto &transaction_manager = GetAttached().GetTransactionManager().Cast<DuckTransactionManager>();
+		vacuum_lock = transaction_manager.TryGetVacuumLock();
+		if (!vacuum_lock) {
+			if (options.type == CheckpointType::FULL_CHECKPOINT) {
+				options.type = CheckpointType::CONCURRENT_CHECKPOINT;
+			} else {
+				// nothing to do
+				return;
+			}
+		}
+	}
+
 	if (db.GetStorageExtension()) {
 		db.GetStorageExtension()->OnCheckpointStart(db, options);
 	}
diff --git a/src/duckdb/src/storage/table/chunk_info.cpp b/src/duckdb/src/storage/table/chunk_info.cpp
index dfef0b4a1..3a1708663 100644
--- a/src/duckdb/src/storage/table/chunk_info.cpp
+++ b/src/duckdb/src/storage/table/chunk_info.cpp
@@ -38,7 +38,7 @@ bool ChunkInfo::Cleanup(transaction_t lowest_transaction) const {
 	return false;
 }
 
-void ChunkInfo::Write(WriteStream &writer) const {
+void ChunkInfo::Write(WriteStream &writer, transaction_t checkpoint_id) const {
 	writer.Write<ChunkInfoType>(type);
 }
 
@@ -99,8 +99,11 @@ void ChunkConstantInfo::CommitAppend(transaction_t commit_id, idx_t start, idx_t
 	insert_id = commit_id;
 }
 
-bool ChunkConstantInfo::HasDeletes() const {
-	bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id < TRANSACTION_ID_START;
+bool ChunkConstantInfo::HasDeletes(transaction_t transaction_id) const {
+	if (transaction_id == MAX_TRANSACTION_ID) {
+		transaction_id = TRANSACTION_ID_START - 1;
+	}
+	bool is_deleted = insert_id >= TRANSACTION_ID_START || delete_id <= transaction_id;
 	return is_deleted;
 }
 
@@ -120,9 +123,9 @@ bool ChunkConstantInfo::Cleanup(transaction_t lowest_transaction) const {
 	return true;
 }
 
-void ChunkConstantInfo::Write(WriteStream &writer) const {
-	D_ASSERT(HasDeletes());
-	ChunkInfo::Write(writer);
+void ChunkConstantInfo::Write(WriteStream &writer, transaction_t checkpoint_id) const {
+	D_ASSERT(HasDeletes(checkpoint_id));
+	ChunkInfo::Write(writer, checkpoint_id);
 	writer.Write<idx_t>(start);
 }
 
@@ -418,8 +421,22 @@ bool ChunkVectorInfo::Cleanup(transaction_t lowest_transaction) const {
 	return true;
 }
 
-bool ChunkVectorInfo::HasDeletes() const {
-	return AnyDeleted();
+bool ChunkVectorInfo::HasDeletes(transaction_t transaction_id) const {
+	if (!AnyDeleted()) {
+		return false;
+	}
+	if (transaction_id == MAX_TRANSACTION_ID) {
+		return true;
+	}
+	auto segment = allocator.GetHandle(deleted_data);
+	auto deleted = segment.GetPtr<transaction_t>();
+
+	for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; i++) {
+		if (deleted[i] <= transaction_id) {
+			return true;
+		}
+	}
+	return false;
 }
 
 bool ChunkVectorInfo::AnyDeleted() const {
@@ -476,9 +493,9 @@ idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) const {
 	return delete_count;
 }
 
-void ChunkVectorInfo::Write(WriteStream &writer) const {
+void ChunkVectorInfo::Write(WriteStream &writer, transaction_t checkpoint_id) const {
 	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	transaction_t start_time = TRANSACTION_ID_START - 1;
+	transaction_t start_time = checkpoint_id == MAX_TRANSACTION_ID ? TRANSACTION_ID_START - 1 : checkpoint_id + 1;
 	transaction_t transaction_id = DConstants::INVALID_INDEX;
 	idx_t count = GetSelVector(start_time, transaction_id, sel, STANDARD_VECTOR_SIZE);
 	if (count == STANDARD_VECTOR_SIZE) {
@@ -493,7 +510,7 @@ void ChunkVectorInfo::Write(WriteStream &writer) const {
 		return;
 	}
 	// write a boolean vector
-	ChunkInfo::Write(writer);
+	ChunkInfo::Write(writer, checkpoint_id);
 	writer.Write<idx_t>(start);
 	ValidityMask mask(STANDARD_VECTOR_SIZE);
 	mask.Initialize(STANDARD_VECTOR_SIZE);
diff --git a/src/duckdb/src/storage/table/column_segment.cpp b/src/duckdb/src/storage/table/column_segment.cpp
index e1739bc8a..88cccfe88 100644
--- a/src/duckdb/src/storage/table/column_segment.cpp
+++ b/src/duckdb/src/storage/table/column_segment.cpp
@@ -240,9 +240,7 @@ void ColumnSegment::ConvertToPersistent(QueryContext context, optional_ptr<Block
 	// Thus, we set the compression function to constant and reset the block buffer.
 	D_ASSERT(stats.statistics.IsConstant());
 	auto &config = DBConfig::GetConfig(db);
-	if (GetCompressionFunction().type != CompressionType::COMPRESSION_EMPTY) {
-		function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
-	}
+	function = *config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
 	block.reset();
 }
 
diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp
index 35e70a58c..1834b2cf7 100644
--- a/src/duckdb/src/storage/table/row_group.cpp
+++ b/src/duckdb/src/storage/table/row_group.cpp
@@ -1255,7 +1255,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite
 		row_group_pointer.data_pointers = column_pointers;
 		row_group_pointer.has_metadata_blocks = true;
 		row_group_pointer.extra_metadata_blocks = write_data.existing_extra_metadata_blocks;
-		row_group_pointer.deletes_pointers = CheckpointDeletes(*metadata_manager);
+		row_group_pointer.deletes_pointers = CheckpointDeletes(writer);
 		if (metadata_manager) {
 			vector<MetaBlockPointer> extra_metadata_block_pointers;
 			extra_metadata_block_pointers.reserve(write_data.existing_extra_metadata_blocks.size());
@@ -1323,7 +1323,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite
 		metadata_blocks.insert(column_pointer.block_pointer);
 	}
 	if (metadata_manager) {
-		row_group_pointer.deletes_pointers = CheckpointDeletes(*metadata_manager);
+		row_group_pointer.deletes_pointers = CheckpointDeletes(writer);
 	}
 	// set up the pointers correctly within this row group for future operations
 	column_pointers = row_group_pointer.data_pointers;
@@ -1376,10 +1376,11 @@ PersistentRowGroupData RowGroup::SerializeRowGroupInfo(idx_t row_group_start) co
 	return result;
 }
 
-vector<MetaBlockPointer> RowGroup::CheckpointDeletes(MetadataManager &manager) {
+vector<MetaBlockPointer> RowGroup::CheckpointDeletes(RowGroupWriter &writer) {
 	if (HasUnloadedDeletes()) {
 		// deletes were not loaded so they cannot be changed
 		// re-use them as-is
+		auto &manager = *writer.GetMetadataManager();
 		manager.ClearModifiedBlocks(deletes_pointers);
 		return deletes_pointers;
 	}
@@ -1388,7 +1389,7 @@ vector<MetaBlockPointer> RowGroup::CheckpointDeletes(MetadataManager &manager) {
 		// no version information: write nothing
 		return vector<MetaBlockPointer>();
 	}
-	return vinfo->Checkpoint(manager);
+	return vinfo->Checkpoint(writer);
 }
 
 void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &serializer) {
diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp
index 061649166..4069e2a91 100644
--- a/src/duckdb/src/storage/table/row_group_collection.cpp
+++ b/src/duckdb/src/storage/table/row_group_collection.cpp
@@ -364,7 +364,8 @@ void RowGroupCollection::Fetch(TransactionData transaction, DataChunk &result, c
 		}
 		auto &current_row_group = row_group->GetNode();
 		auto offset_in_row_group = UnsafeNumericCast<idx_t>(row_id) - row_group->GetRowStart();
-		if (!current_row_group.Fetch(transaction, offset_in_row_group)) {
+		if (state.fetch_type == FetchType::TRANSACTIONAL_FETCH &&
+		    !current_row_group.Fetch(transaction, offset_in_row_group)) {
 			continue;
 		}
 		state.row_group = row_group;
@@ -739,46 +740,114 @@ void RowGroupCollection::Update(TransactionData transaction, DataTable &data_tab
 	} while (pos < updates.size());
 }
 
-void GetIndexRemovalTargets(IndexEntry &entry, IndexRemovalType removal_type, optional_ptr<BoundIndex> &append_target,
-                            optional_ptr<BoundIndex> &remove_target) {
+struct IndexRemovalTargets {
+	optional_ptr<BoundIndex> append_target;
+	optional_ptr<BoundIndex> remove_target;
+	optional_ptr<BoundIndex> conditional_remove_target;
+	optional_ptr<BoundIndex> conditional_append_target;
+};
+
+void GetIndexRemovalTargetsActiveCheckpoint(IndexEntry &entry, IndexRemovalType removal_type,
+                                            IndexRemovalTargets &targets) {
+	auto &main_index = entry.index->Cast<BoundIndex>();
+
+	// create "removed_data_during_checkpoint" if it does not exist
+	if (!entry.removed_data_during_checkpoint) {
+		entry.removed_data_during_checkpoint = main_index.CreateDeltaIndex(DeltaIndexType::REMOVED_DURING_CHECKPOINT);
+	}
+	if (removal_type == IndexRemovalType::MAIN_INDEX_ONLY || removal_type == IndexRemovalType::MAIN_INDEX) {
+		// removing from main index - but we cannot remove directly due to the concurrent checkpoint
+		// add removal to delta index
+		if (entry.added_data_during_checkpoint) {
+			// if we have also added data during this checkpoint - we might need to remove from there instead
+			// we FIRST try to remove from "added_data_during_checkpoint"
+			// any rows that are not there we add to "removed_data_during_checkpoint"
+			targets.conditional_remove_target = entry.added_data_during_checkpoint.get();
+			targets.conditional_append_target = entry.removed_data_during_checkpoint.get();
+		} else {
+			// add removed rows to "removed_data_during_checkpoint"
+			targets.conditional_append_target = entry.removed_data_during_checkpoint.get();
+		}
+		if (removal_type == IndexRemovalType::MAIN_INDEX) {
+			// we also need to append to "deleted_rows_in_use"
+			if (!entry.deleted_rows_in_use) {
+				// create "deleted_rows_in_use" if it does not exist yet
+				entry.deleted_rows_in_use = main_index.CreateDeltaIndex(DeltaIndexType::DELETED_ROWS_IN_USE);
+			}
+			targets.append_target = entry.deleted_rows_in_use;
+		}
+		return;
+	}
+	if (removal_type == IndexRemovalType::REVERT_MAIN_INDEX_ONLY ||
+	    removal_type == IndexRemovalType::REVERT_MAIN_INDEX) {
+		// revert adding to main index
+		if (entry.added_data_during_checkpoint) {
+			// we have added data during this checkpoint as well, remove might have EITHER:
+			// (1) added to "removed_data_during_checkpoint"
+			// (2) removed data from "added_data_during_checkpoint"
+			// revert by first trying to remove from "removed_data_during_checkpoint"
+			// any rows that were not removed are re-added back to "added_data_during_checkpoint"
+			targets.conditional_remove_target = entry.removed_data_during_checkpoint.get();
+			targets.conditional_append_target = entry.added_data_during_checkpoint.get();
+		} else {
+			targets.conditional_remove_target = entry.removed_data_during_checkpoint.get();
+		}
+		if (removal_type == IndexRemovalType::REVERT_MAIN_INDEX) {
+			// we also need to remove from "deleted_rows_in_use"
+			targets.remove_target = entry.deleted_rows_in_use.get();
+		}
+	}
+}
+void GetIndexRemovalTargets(IndexEntry &entry, IndexRemovalType removal_type, IndexRemovalTargets &targets,
+                            optional_idx active_checkpoint) {
 	auto &main_index = entry.index->Cast<BoundIndex>();
 
 	// not all indexes require delta indexes - this is tracked through BoundIndex::RequiresTransactionality
 	// if an index does not require this we skip creating to and appending to "deleted_rows_in_use"
-	bool index_requires_delta = main_index.RequiresTransactionality();
+	bool supports_delta_indexes = main_index.SupportsDeltaIndexes();
+	if (removal_type != IndexRemovalType::DELETED_ROWS_IN_USE && active_checkpoint.IsValid() &&
+	    supports_delta_indexes) {
+		// there's an ongoing checkpoint - check if we need to use delta indexes or if we can write to the main index
+		if (!entry.last_written_checkpoint.IsValid() ||
+		    entry.last_written_checkpoint.GetIndex() != active_checkpoint.GetIndex()) {
+			// there's an on-going checkpoint and we haven't flushed the index yet
+			// we can't modify the index in-place and need to modify the deltas - get the appropriate deltas to target
+			GetIndexRemovalTargetsActiveCheckpoint(entry, removal_type, targets);
+			return;
+		}
+	}
 
 	switch (removal_type) {
 	case IndexRemovalType::MAIN_INDEX_ONLY:
 		// directly remove from main index without appending to delta indexes
-		remove_target = main_index;
+		targets.remove_target = main_index;
 		break;
 	case IndexRemovalType::REVERT_MAIN_INDEX_ONLY:
 		// revert main index only append - just add back to index
-		append_target = main_index;
+		targets.append_target = main_index;
 		break;
 	case IndexRemovalType::MAIN_INDEX:
 		// regular removal from main index - add rows to delta index if required
-		if (index_requires_delta) {
+		if (supports_delta_indexes) {
 			if (!entry.deleted_rows_in_use) {
 				// create "deleted_rows_in_use" if it does not exist yet
-				entry.deleted_rows_in_use =
-				    main_index.CreateEmptyCopy("deleted_rows_in_use_", IndexConstraintType::NONE);
+				entry.deleted_rows_in_use = main_index.CreateDeltaIndex(DeltaIndexType::DELETED_ROWS_IN_USE);
 			}
-			append_target = entry.deleted_rows_in_use;
+			targets.append_target = entry.deleted_rows_in_use;
 		}
-		remove_target = main_index;
+		targets.remove_target = main_index;
 		break;
 	case IndexRemovalType::REVERT_MAIN_INDEX:
 		// revert regular append to main index - remove from deleted_rows_in_use if we appended there before
-		append_target = main_index;
-		if (index_requires_delta) {
-			remove_target = entry.deleted_rows_in_use;
+		targets.append_target = main_index;
+		if (supports_delta_indexes) {
+			targets.remove_target = entry.deleted_rows_in_use;
 		}
 		break;
 	case IndexRemovalType::DELETED_ROWS_IN_USE:
 		// remove from removal index if we appended any rows
-		if (index_requires_delta) {
-			remove_target = entry.deleted_rows_in_use;
+		if (supports_delta_indexes) {
+			targets.remove_target = entry.deleted_rows_in_use;
 		}
 		break;
 	default:
@@ -787,9 +856,8 @@ void GetIndexRemovalTargets(IndexEntry &entry, IndexRemovalType removal_type, op
 }
 
 void RowGroupCollection::RemoveFromIndexes(const QueryContext &context, TableIndexList &indexes,
-                                           Vector &row_identifiers, idx_t count, IndexRemovalType removal_type) {
-	auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
-
+                                           Vector &row_identifiers, idx_t count, IndexRemovalType removal_type,
+                                           optional_idx active_checkpoint) {
 	// Collect all Indexed columns on the table.
 	unordered_set<column_t> indexed_column_id_set;
 	indexes.Scan([&](Index &index) {
@@ -810,115 +878,104 @@ void RowGroupCollection::RemoveFromIndexes(const QueryContext &context, TableInd
 	for (auto &col : column_ids) {
 		column_types.push_back(types[col.GetPrimaryIndex()]);
 	}
-	auto row_groups = GetRowGroups();
-
-	// Initialize the fetch state. Only use indexed columns.
-	TableScanState state;
-	auto column_ids_copy = column_ids;
-	state.Initialize(std::move(column_ids_copy));
-	state.table_state.max_row = row_groups->GetBaseRowId() + total_rows;
 
 	DataChunk fetch_chunk;
 	fetch_chunk.Initialize(GetAllocator(), column_types);
 
+	ColumnFetchState state;
+	state.fetch_type = FetchType::FORCE_FETCH;
+	TransactionData commit_transaction(MAX_TRANSACTION_ID, TRANSACTION_ID_START - 1);
+	Fetch(commit_transaction, fetch_chunk, column_ids, row_identifiers, count, state);
+
 	// Used for index value removal.
 	// Contains all columns but only initializes indexed ones.
 	DataChunk result_chunk;
 	auto fetched_columns = vector<bool>(types.size(), false);
 	result_chunk.Initialize(GetAllocator(), types, fetched_columns);
-
 	// Now set all to-be-fetched columns.
 	for (auto &col : indexed_column_id_set) {
 		fetched_columns[col] = true;
 	}
 
-	// Iterate over the row ids.
-	SelectionVector sel(STANDARD_VECTOR_SIZE);
-	for (idx_t r = 0; r < count;) {
-		fetch_chunk.Reset();
-		result_chunk.Reset();
-
-		// Figure out which row_group to fetch from.
-		auto row_id = row_ids[r];
-		auto row_group = row_groups->GetSegment(UnsafeNumericCast<idx_t>(row_id));
-
-		auto &current_row_group = row_group->GetNode();
-		auto row_start = row_group->GetRowStart();
-		auto row_group_vector_idx = (UnsafeNumericCast<idx_t>(row_id) - row_start) / STANDARD_VECTOR_SIZE;
-		auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_start;
-
-		// Fetch the current vector into fetch_chunk.
-		state.table_state.Initialize(context, GetTypes());
-		current_row_group.InitializeScanWithOffset(state.table_state, *row_group, row_group_vector_idx);
-		current_row_group.ScanCommitted(state.table_state, fetch_chunk, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
-		fetch_chunk.Verify();
-
-		// Check for any remaining row ids, if they also fall into this vector.
-		// We try to fetch as many rows as possible at the same time.
-		idx_t sel_count = 0;
-		for (; r < count; r++) {
-			idx_t current_row = idx_t(row_ids[r]);
-			if (current_row < base_row_id || current_row >= base_row_id + fetch_chunk.size()) {
-				// This row id does not fall into the current chunk.
-				break;
-			}
-			auto row_in_vector = current_row - base_row_id;
-			D_ASSERT(row_in_vector < fetch_chunk.size());
-			sel.set_index(sel_count++, row_in_vector);
-		}
-		D_ASSERT(sel_count > 0);
-
-		// Reference the necessary columns of the fetch_chunk.
-		idx_t fetch_idx = 0;
-		for (idx_t j = 0; j < types.size(); j++) {
-			if (fetched_columns[j]) {
-				result_chunk.data[j].Reference(fetch_chunk.data[fetch_idx++]);
-				continue;
-			}
-			result_chunk.data[j].Reference(Value(types[j]));
+	// Reference the necessary columns of the fetch_chunk.
+	idx_t fetch_idx = 0;
+	for (idx_t j = 0; j < types.size(); j++) {
+		if (fetched_columns[j]) {
+			result_chunk.data[j].Reference(fetch_chunk.data[fetch_idx++]);
+			continue;
 		}
-		result_chunk.SetCardinality(fetch_chunk);
-
-		// Slice the vector with all rows that are present in this vector.
-		// If the index is bound, delete the data. If unbound, buffer into unbound_index.
-		result_chunk.Slice(sel, sel_count);
-		indexes.ScanEntries([&](IndexEntry &entry) {
-			auto &index = *entry.index;
-			if (index.IsBound()) {
-				lock_guard<mutex> guard(entry.lock);
-				// check which indexes we should append to or remove from
-				// note that this method might also involve appending to indexes
-				// the reason for that is that we have "delta" indexes that we must fill with data we are removing
-				// OR because we are actually reverting a previous removal
-				optional_ptr<BoundIndex> append_target, remove_target;
-				GetIndexRemovalTargets(entry, removal_type, append_target, remove_target);
-
-				// perform the targeted append / removal
-				if (append_target) {
-					IndexAppendInfo append_info;
-					auto error = append_target->Append(result_chunk, row_identifiers, append_info);
-					if (error.HasError()) {
-						throw InternalException("Failed to append to %s: %s", append_target->name, error.Message());
+		result_chunk.data[j].Reference(Value(types[j]));
+	}
+	result_chunk.SetCardinality(fetch_chunk);
+
+	DataChunk remaining_result_chunk;
+	unique_ptr<Vector> remaining_row_ids;
+
+	indexes.ScanEntries([&](IndexEntry &entry) {
+		auto &index = *entry.index;
+		if (index.IsBound()) {
+			lock_guard<mutex> guard(entry.lock);
+
+			// check which indexes we should append to or remove from
+			// note that this method might also involve appending to indexes
+			// the reason for that is that we have "delta" indexes that we must fill with data we are removing
+			// OR because we are actually reverting a previous removal
+			IndexRemovalTargets targets;
+			GetIndexRemovalTargets(entry, removal_type, targets, active_checkpoint);
+
+			bool removal_succeeded = false;
+			if (targets.conditional_remove_target) {
+				// if we have an conditional remove target, we first try to remove the chunk from there
+				idx_t delete_count = targets.conditional_remove_target->TryDelete(result_chunk, row_identifiers);
+				if (delete_count > 0) {
+					if (delete_count != result_chunk.size()) {
+						// it should not be possible to get here
+						// what this means is that we removed SOME rows from the "initial_remove_target" - but not all
+						// "initial_remove_target" contains rows that were INSERTED during the checkpoint
+						// the regular remove target contains rows that were ALREADY THERE during the checkpoint
+						// "RemoveFromIndexes" works on a per-row-group basis
+						// when appending during a checkpoint, we always insert new row groups for new data
+						// so the two groups of data should always be separate
+						throw InternalException("RowGroupCollection::RemoveFromIndexes - partially deleted from the "
+						                        "initial removal target");
 					}
+					removal_succeeded = true;
 				}
-				if (remove_target) {
-					remove_target->Delete(result_chunk, row_identifiers);
+			}
+			if (targets.conditional_append_target && !removal_succeeded) {
+				// for any rows that were not removed - append them to the conditional append target instead
+				IndexAppendInfo append_info;
+				auto error = targets.conditional_append_target->Append(result_chunk, row_identifiers, append_info);
+				if (error.HasError()) {
+					throw InternalException("Failed to append to %s: %s", targets.conditional_append_target->name,
+					                        error.Message());
+				}
+			}
+			// perform the targeted append / removal
+			if (targets.append_target) {
+				IndexAppendInfo append_info;
+				auto error = targets.append_target->Append(result_chunk, row_identifiers, append_info);
+				if (error.HasError()) {
+					throw InternalException("Failed to append to %s: %s", targets.append_target->name, error.Message());
 				}
-				return false;
 			}
-			// Buffering takes only the indexed columns in ordering of the column_ids mapping.
-			DataChunk index_column_chunk;
-			index_column_chunk.InitializeEmpty(column_types);
-			for (idx_t i = 0; i < column_types.size(); i++) {
-				auto col_id = column_ids[i].GetPrimaryIndex();
-				index_column_chunk.data[i].Reference(result_chunk.data[col_id]);
+			if (targets.remove_target) {
+				targets.remove_target->Delete(result_chunk, row_identifiers);
 			}
-			index_column_chunk.SetCardinality(result_chunk.size());
-			auto &unbound_index = index.Cast<UnboundIndex>();
-			unbound_index.BufferChunk(index_column_chunk, row_identifiers, column_ids, BufferedIndexReplay::DEL_ENTRY);
 			return false;
-		});
-	}
+		}
+		// Buffering takes only the indexed columns in ordering of the column_ids mapping.
+		DataChunk index_column_chunk;
+		index_column_chunk.InitializeEmpty(column_types);
+		for (idx_t i = 0; i < column_types.size(); i++) {
+			auto col_id = column_ids[i].GetPrimaryIndex();
+			index_column_chunk.data[i].Reference(result_chunk.data[col_id]);
+		}
+		index_column_chunk.SetCardinality(result_chunk.size());
+		auto &unbound_index = index.Cast<UnboundIndex>();
+		unbound_index.BufferChunk(index_column_chunk, row_identifiers, column_ids, BufferedIndexReplay::DEL_ENTRY);
+		return false;
+	});
 }
 
 void RowGroupCollection::UpdateColumn(TransactionData transaction, DataTable &data_table, Vector &row_ids,
@@ -1139,7 +1196,9 @@ class VacuumTask : public BaseCheckpointTask {
 			total_append_count += append_counts[target_idx];
 		}
 		if (total_append_count != merge_rows) {
-			throw InternalException("Mismatch in row group count vs verify count in RowGroupCollection::Checkpoint");
+			throw InternalException(
+			    "Mismatch in row group count %d vs verify count %d in RowGroupCollection::Checkpoint", merge_rows,
+			    total_append_count);
 		}
 		// merging is complete - execute checkpoint tasks of the target row groups
 		for (idx_t i = 0; i < target_count; i++) {
@@ -1403,7 +1462,8 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
 					extra_metadata_block_pointers.emplace_back(block_pointer, 0);
 				}
 				metadata_manager.ClearModifiedBlocks(extra_metadata_block_pointers);
-				row_group.CheckpointDeletes(metadata_manager);
+				auto row_group_writer = checkpoint_state.writer.GetRowGroupWriter(row_group);
+				row_group.CheckpointDeletes(*row_group_writer);
 			}
 			writer.WriteUnchangedTable(metadata_pointer, total_rows.load());
 
diff --git a/src/duckdb/src/storage/table/row_version_manager.cpp b/src/duckdb/src/storage/table/row_version_manager.cpp
index 20d0ebed4..8bf25b787 100644
--- a/src/duckdb/src/storage/table/row_version_manager.cpp
+++ b/src/duckdb/src/storage/table/row_version_manager.cpp
@@ -3,14 +3,13 @@
 #include "duckdb/storage/metadata/metadata_manager.hpp"
 #include "duckdb/storage/metadata/metadata_reader.hpp"
 #include "duckdb/storage/metadata/metadata_writer.hpp"
-#include "duckdb/common/pair.hpp"
+#include "duckdb/storage/checkpoint/row_group_writer.hpp"
 
 namespace duckdb {
 
 RowVersionManager::RowVersionManager(BufferManager &buffer_manager_p) noexcept
     : allocator(STANDARD_VECTOR_SIZE * sizeof(transaction_t), buffer_manager_p.GetTemporaryBlockManager(),
-                MemoryTag::BASE_TABLE),
-      has_unserialized_changes(false) {
+                MemoryTag::BASE_TABLE) {
 }
 
 idx_t RowVersionManager::GetCommittedDeletedCount(idx_t count) {
@@ -126,7 +125,6 @@ void RowVersionManager::FillVectorInfo(idx_t vector_idx) {
 void RowVersionManager::AppendVersionInfo(TransactionData transaction, idx_t count, idx_t row_group_start,
                                           idx_t row_group_end) {
 	lock_guard<mutex> lock(version_lock);
-	has_unserialized_changes = true;
 	idx_t start_vector_idx = row_group_start / STANDARD_VECTOR_SIZE;
 	idx_t end_vector_idx = (row_group_end - 1) / STANDARD_VECTOR_SIZE;
 
@@ -179,7 +177,6 @@ void RowVersionManager::CommitAppend(transaction_t commit_id, idx_t row_group_st
 		idx_t vend =
 		    vector_idx == end_vector_idx ? row_group_end - end_vector_idx * STANDARD_VECTOR_SIZE : STANDARD_VECTOR_SIZE;
 		auto &info = *vector_info[vector_idx];
-		D_ASSERT(has_unserialized_changes);
 		info.CommitAppend(commit_id, vstart, vend);
 	}
 }
@@ -208,9 +205,6 @@ void RowVersionManager::CleanupAppend(transaction_t lowest_active_transaction, i
 		// if we wrote the entire chunk info try to compress it
 		auto cleanup = info.Cleanup(lowest_active_transaction);
 		if (cleanup) {
-			if (info.HasDeletes()) {
-				has_unserialized_changes = true;
-			}
 			vector_info[vector_idx].reset();
 		}
 	}
@@ -220,7 +214,6 @@ void RowVersionManager::RevertAppend(idx_t new_count) {
 	lock_guard<mutex> lock(version_lock);
 	idx_t start_vector_idx = (new_count + (STANDARD_VECTOR_SIZE - 1)) / STANDARD_VECTOR_SIZE;
 	for (idx_t vector_idx = start_vector_idx; vector_idx < vector_info.size(); vector_idx++) {
-		D_ASSERT(has_unserialized_changes);
 		vector_info[vector_idx].reset();
 	}
 }
@@ -243,19 +236,22 @@ ChunkVectorInfo &RowVersionManager::GetVectorInfo(idx_t vector_idx) {
 
 idx_t RowVersionManager::DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count) {
 	lock_guard<mutex> lock(version_lock);
-	has_unserialized_changes = true;
 	return GetVectorInfo(vector_idx).Delete(transaction_id, rows, count);
 }
 
 void RowVersionManager::CommitDelete(idx_t vector_idx, transaction_t commit_id, const DeleteInfo &info) {
 	lock_guard<mutex> lock(version_lock);
-	has_unserialized_changes = true;
+	if (!uncheckpointed_delete_commit.IsValid() || commit_id > uncheckpointed_delete_commit.GetIndex()) {
+		uncheckpointed_delete_commit = commit_id;
+	}
 	GetVectorInfo(vector_idx).CommitDelete(commit_id, info);
 }
 
-vector<MetaBlockPointer> RowVersionManager::Checkpoint(MetadataManager &manager) {
+vector<MetaBlockPointer> RowVersionManager::Checkpoint(RowGroupWriter &writer) {
 	lock_guard<mutex> lock(version_lock);
-	if (!has_unserialized_changes) {
+	auto &manager = *writer.GetMetadataManager();
+	auto options = writer.GetCheckpointOptions();
+	if (!uncheckpointed_delete_commit.IsValid()) {
 		// we can write the current pointer as-is
 		// ensure the blocks we are pointing to are not marked as free
 		manager.ClearModifiedBlocks(storage_pointers);
@@ -269,7 +265,7 @@ vector<MetaBlockPointer> RowVersionManager::Checkpoint(MetadataManager &manager)
 		if (!chunk_info) {
 			continue;
 		}
-		if (!chunk_info->HasDeletes()) {
+		if (!chunk_info->HasDeletes(options.transaction_id)) {
 			continue;
 		}
 		to_serialize.emplace_back(vector_idx, *chunk_info);
@@ -278,19 +274,23 @@ vector<MetaBlockPointer> RowVersionManager::Checkpoint(MetadataManager &manager)
 	storage_pointers.clear();
 
 	if (!to_serialize.empty()) {
-		MetadataWriter writer(manager, &storage_pointers);
+		MetadataWriter metadata_writer(manager, &storage_pointers);
 		// now serialize the actual version information
-		writer.Write<idx_t>(to_serialize.size());
+		metadata_writer.Write<idx_t>(to_serialize.size());
 		for (auto &entry : to_serialize) {
 			auto &vector_idx = entry.first;
 			auto &chunk_info = entry.second.get();
-			writer.Write<idx_t>(vector_idx);
-			chunk_info.Write(writer);
+			metadata_writer.Write<idx_t>(vector_idx);
+			chunk_info.Write(metadata_writer, options.transaction_id);
 		}
-		writer.Flush();
+		metadata_writer.Flush();
 	}
 
-	has_unserialized_changes = false;
+	if (uncheckpointed_delete_commit.IsValid() && uncheckpointed_delete_commit.GetIndex() <= options.transaction_id) {
+		// the last checkpointed id was either before or on the transaction we are checkpointing
+		// nothing to checkpoint in future commits until more deletes appear
+		uncheckpointed_delete_commit = optional_idx();
+	}
 	return storage_pointers;
 }
 
@@ -314,18 +314,18 @@ shared_ptr<RowVersionManager> RowVersionManager::Deserialize(MetaBlockPointer de
 		version_info->FillVectorInfo(vector_index);
 		version_info->vector_info[vector_index] = ChunkInfo::Read(version_info->GetAllocator(), source);
 	}
-	version_info->has_unserialized_changes = false;
+	version_info->uncheckpointed_delete_commit = optional_idx();
 	return version_info;
 }
 
 bool RowVersionManager::HasUnserializedChanges() {
 	lock_guard<mutex> lock(version_lock);
-	return has_unserialized_changes;
+	return uncheckpointed_delete_commit.IsValid();
 }
 
 vector<MetaBlockPointer> RowVersionManager::GetStoragePointers() {
 	lock_guard<mutex> lock(version_lock);
-	D_ASSERT(!has_unserialized_changes);
+	D_ASSERT(!uncheckpointed_delete_commit.IsValid());
 	return storage_pointers;
 }
 
diff --git a/src/duckdb/src/storage/table_index_list.cpp b/src/duckdb/src/storage/table_index_list.cpp
index 49d81398c..186db5143 100644
--- a/src/duckdb/src/storage/table_index_list.cpp
+++ b/src/duckdb/src/storage/table_index_list.cpp
@@ -9,6 +9,7 @@
 #include "duckdb/planner/expression_binder/index_binder.hpp"
 #include "duckdb/storage/data_table.hpp"
 #include "duckdb/storage/table/data_table_info.hpp"
+#include "duckdb/storage/table/scan_state.hpp"
 
 namespace duckdb {
 
@@ -214,14 +215,19 @@ void TableIndexList::VerifyForeignKey(optional_ptr<LocalTableStorage> storage, c
 	// Check whether the chunk can be inserted in or deleted from the referenced table storage.
 	auto entry = FindForeignKeyIndex(fk_keys, fk_type);
 	auto &index = *entry->index;
+	lock_guard<mutex> guard(entry->lock);
 	D_ASSERT(index.IsBound());
-	optional_ptr<BoundIndex> delete_index;
+	IndexAppendInfo index_append_info;
 	if (storage) {
-		delete_index = storage->delete_indexes.Find(index.GetIndexName());
+		auto delete_index = storage->delete_indexes.Find(index.GetIndexName());
+		if (delete_index) {
+			index_append_info.delete_indexes.push_back(*delete_index);
+		}
+	}
+	if (entry->removed_data_during_checkpoint) {
+		index_append_info.delete_indexes.push_back(*entry->removed_data_during_checkpoint);
 	}
-	IndexAppendInfo index_append_info(IndexAppendMode::DEFAULT, delete_index);
 
-	lock_guard<mutex> entry_lock(entry->lock);
 	auto &main_index = index.Cast<BoundIndex>();
 	main_index.VerifyConstraint(chunk, index_append_info, conflict_manager);
 	if (entry->added_data_during_checkpoint) {
@@ -263,7 +269,7 @@ vector<IndexStorageInfo> TableIndexList::SerializeToDisk(QueryContext context, c
 	return infos;
 }
 
-void TableIndexList::MergeCheckpointDeltas(transaction_t checkpoint_id) {
+void TableIndexList::MergeCheckpointDeltas(DataTable &storage, transaction_t checkpoint_id) {
 	lock_guard<mutex> lock(index_entries_lock);
 	for (auto &entry : index_entries) {
 		// merge any data appended to the index while the checkpoint was running
@@ -272,12 +278,100 @@ void TableIndexList::MergeCheckpointDeltas(transaction_t checkpoint_id) {
 			continue;
 		}
 		lock_guard<mutex> guard(entry->lock);
+		auto &bound_index = index.Cast<BoundIndex>();
+		vector<reference<BoundIndex>> delta_indexes;
+		vector<bool> delta_index_is_delete;
+		if (entry->removed_data_during_checkpoint) {
+			delta_indexes.push_back(*entry->removed_data_during_checkpoint);
+			delta_index_is_delete.push_back(true);
+		}
 		if (entry->added_data_during_checkpoint) {
-			// we have written data here while checkpointing - merge it into the main index
-			auto &bound_index = index.Cast<BoundIndex>();
-			bound_index.MergeIndexes(*entry->added_data_during_checkpoint);
-			entry->added_data_during_checkpoint.reset();
+			delta_indexes.push_back(*entry->added_data_during_checkpoint);
+			delta_index_is_delete.push_back(false);
+		}
+		for (idx_t i = 0; i < delta_indexes.size(); i++) {
+			auto &delta_index = delta_indexes[i].get();
+			auto is_delete = delta_index_is_delete[i];
+			// FIXME: this should use an optimized (removal) merge instead of doing fetches in the base table
+			// fetch all row-ids to delete
+			auto &art = delta_index.Cast<ART>();
+			auto scan_state = art.InitializeFullScan();
+			set<row_t> all_row_ids;
+			art.Scan(*scan_state, NumericLimits<idx_t>::Maximum(), all_row_ids);
+
+			// FIXME: this is mostly copied over from RowGroupCollection::RemoveFromIndexes, but we shouldn't be doing
+			// this anyway...
+			if (!all_row_ids.empty()) {
+				// in a loop fetch the
+				Vector row_identifiers(LogicalType::BIGINT);
+				auto row_ids = FlatVector::GetData<int64_t>(row_identifiers);
+				idx_t count = 0;
+
+				auto indexed_column_id_set = bound_index.GetColumnIdSet();
+				vector<StorageIndex> column_ids;
+				for (auto &col : indexed_column_id_set) {
+					column_ids.emplace_back(col);
+				}
+				sort(column_ids.begin(), column_ids.end());
+
+				auto types = storage.GetTypes();
+				vector<LogicalType> column_types;
+				for (auto &col : column_ids) {
+					column_types.push_back(types[col.GetPrimaryIndex()]);
+				}
+
+				DataChunk fetch_chunk;
+				fetch_chunk.Initialize(Allocator::DefaultAllocator(), column_types);
+
+				ColumnFetchState state;
+				state.fetch_type = FetchType::FORCE_FETCH;
+
+				DataChunk result_chunk;
+				auto fetched_columns = vector<bool>(types.size(), false);
+				result_chunk.Initialize(Allocator::DefaultAllocator(), types, fetched_columns);
+				// Now set all to-be-fetched columns.
+				for (auto &col : indexed_column_id_set) {
+					fetched_columns[col] = true;
+				}
+				auto last_row_id = *all_row_ids.rbegin();
+				for (auto &row_id : all_row_ids) {
+					row_ids[count++] = row_id;
+					if (row_id == last_row_id || count == STANDARD_VECTOR_SIZE) {
+						fetch_chunk.Reset();
+						storage.FetchCommitted(fetch_chunk, column_ids, row_identifiers, count, state);
+
+						// Reference the necessary columns of the fetch_chunk.
+						idx_t fetch_idx = 0;
+						for (idx_t j = 0; j < types.size(); j++) {
+							if (fetched_columns[j]) {
+								result_chunk.data[j].Reference(fetch_chunk.data[fetch_idx++]);
+								continue;
+							}
+							result_chunk.data[j].Reference(Value(types[j]));
+						}
+						result_chunk.SetCardinality(fetch_chunk);
+						if (is_delete) {
+							auto delete_count = bound_index.TryDelete(result_chunk, row_identifiers);
+							if (delete_count != result_chunk.size()) {
+								throw InternalException("Failed to remove all rows while merging checkpoint deltas - "
+								                        "this signifies a bug or broken index\nChunk: %s",
+								                        result_chunk.ToString());
+							}
+						} else {
+							auto error = bound_index.Append(result_chunk, row_identifiers);
+							if (error.HasError()) {
+								throw InternalException("Failed to append while merging checkpoint deltas - this "
+								                        "signifies a bug or broken index: %s",
+								                        error.Message());
+							}
+						}
+						count = 0;
+					}
+				}
+			}
 		}
+		entry->removed_data_during_checkpoint.reset();
+		entry->added_data_during_checkpoint.reset();
 		entry->last_written_checkpoint = checkpoint_id;
 	}
 }
diff --git a/src/duckdb/src/transaction/cleanup_state.cpp b/src/duckdb/src/transaction/cleanup_state.cpp
index 1a07bf6ee..e6abec9ab 100644
--- a/src/duckdb/src/transaction/cleanup_state.cpp
+++ b/src/duckdb/src/transaction/cleanup_state.cpp
@@ -14,10 +14,10 @@
 
 namespace duckdb {
 
-CleanupState::CleanupState(const QueryContext &context, transaction_t lowest_active_transaction,
+CleanupState::CleanupState(DuckTransaction &transaction, transaction_t lowest_active_transaction,
                            ActiveTransactionState transaction_state)
     : lowest_active_transaction(lowest_active_transaction), transaction_state(transaction_state),
-      index_data_remover(context, IndexRemovalType::DELETED_ROWS_IN_USE) {
+      index_data_remover(transaction, QueryContext(), IndexRemovalType::DELETED_ROWS_IN_USE) {
 }
 
 void CleanupState::CleanupEntry(UndoFlags type, data_ptr_t data) {
diff --git a/src/duckdb/src/transaction/commit_state.cpp b/src/duckdb/src/transaction/commit_state.cpp
index 1819e0c46..be4806985 100644
--- a/src/duckdb/src/transaction/commit_state.cpp
+++ b/src/duckdb/src/transaction/commit_state.cpp
@@ -18,14 +18,15 @@
 #include "duckdb/transaction/delete_info.hpp"
 #include "duckdb/transaction/update_info.hpp"
 #include "duckdb/transaction/duck_transaction.hpp"
+#include "duckdb/transaction/duck_transaction_manager.hpp"
 
 namespace duckdb {
 
 //===--------------------------------------------------------------------===//
 // IndexDataRemover
 //===--------------------------------------------------------------------===//
-IndexDataRemover::IndexDataRemover(QueryContext context, IndexRemovalType removal_type)
-    : context(context), removal_type(removal_type) {
+IndexDataRemover::IndexDataRemover(DuckTransaction &transaction_p, QueryContext context, IndexRemovalType removal_type)
+    : transaction(transaction_p), context(context), removal_type(removal_type) {
 }
 
 void IndexDataRemover::PushDelete(DeleteInfo &info) {
@@ -74,11 +75,13 @@ void IndexDataRemover::Flush(DataTable &table, row_t *row_numbers, idx_t count)
 	// set up the row identifiers vector
 	Vector row_identifiers(LogicalType::ROW_TYPE, data_ptr_cast(row_numbers));
 
+	auto active_checkpoint = transaction.GetTransactionManager().Cast<DuckTransactionManager>().GetActiveCheckpoint();
+	auto checkpoint_id = active_checkpoint == MAX_TRANSACTION_ID ? optional_idx() : active_checkpoint;
 	// delete the tuples from all the indexes.
 	// If there is any issue with removal, a FatalException must be thrown since there may be a corruption of
 	// data, hence the transaction cannot be guaranteed.
 	try {
-		table.RemoveFromIndexes(context, row_identifiers, count, removal_type);
+		table.RemoveFromIndexes(context, row_identifiers, count, removal_type, checkpoint_id);
 	} catch (std::exception &ex) {
 		throw FatalException(ErrorData(ex).Message());
 	} catch (...) {
@@ -94,7 +97,8 @@ void IndexDataRemover::Flush(DataTable &table, row_t *row_numbers, idx_t count)
 CommitState::CommitState(DuckTransaction &transaction_p, transaction_t commit_id,
                          ActiveTransactionState transaction_state, CommitMode commit_mode)
     : transaction(transaction_p), commit_id(commit_id),
-      index_data_remover(*transaction.context.lock(), GetIndexRemovalType(transaction_state, commit_mode)) {
+      index_data_remover(transaction, *transaction.context.lock(),
+                         GetIndexRemovalType(transaction_state, commit_mode)) {
 }
 
 IndexRemovalType CommitState::GetIndexRemovalType(ActiveTransactionState transaction_state, CommitMode commit_mode) {
diff --git a/src/duckdb/src/transaction/duck_transaction.cpp b/src/duckdb/src/transaction/duck_transaction.cpp
index 53bf74d6c..d4c73c5cf 100644
--- a/src/duckdb/src/transaction/duck_transaction.cpp
+++ b/src/duckdb/src/transaction/duck_transaction.cpp
@@ -259,6 +259,9 @@ ErrorData DuckTransaction::Commit(AttachedDatabase &db, CommitInfo &commit_info,
 	try {
 		storage->Commit(commit_state.get());
 		undo_buffer.Commit(iterator_state, commit_info);
+		// if (DebugForceAbortCommit()) {
+		// 	throw InvalidInputException("Force revert");
+		// }
 		if (commit_state) {
 			// if we have written to the WAL - flush after the commit has been successful
 			commit_state->FlushCommit();
@@ -289,30 +292,36 @@ void DuckTransaction::Cleanup(transaction_t lowest_active_transaction) {
 }
 
 void DuckTransaction::SetModifications(DatabaseModificationType type) {
-	if (write_lock) {
-		// already have a write lock
-		return;
+	if (!checkpoint_lock) {
+		bool require_write_lock = false;
+		require_write_lock = require_write_lock || type.UpdateData();
+		require_write_lock = require_write_lock || type.AlterTable();
+		require_write_lock = require_write_lock || type.CreateCatalogEntry();
+		require_write_lock = require_write_lock || type.DropCatalogEntry();
+		require_write_lock = require_write_lock || type.Sequence();
+		require_write_lock = require_write_lock || type.CreateIndex();
+
+		if (require_write_lock) {
+			// obtain a shared checkpoint lock to prevent concurrent checkpoints while this transaction is running
+			checkpoint_lock = GetTransactionManager().SharedCheckpointLock();
+		}
 	}
-	bool require_write_lock = false;
-	require_write_lock = require_write_lock || type.DeleteData();
-	require_write_lock = require_write_lock || type.UpdateData();
-	require_write_lock = require_write_lock || type.AlterTable();
-	require_write_lock = require_write_lock || type.CreateCatalogEntry();
-	require_write_lock = require_write_lock || type.DropCatalogEntry();
-	require_write_lock = require_write_lock || type.Sequence();
-	require_write_lock = require_write_lock || type.CreateIndex();
-
-	if (require_write_lock) {
-		// obtain a shared checkpoint lock to prevent concurrent checkpoints while this transaction is running
-		write_lock = GetTransactionManager().SharedCheckpointLock();
+	if (!vacuum_lock) {
+		bool require_vacuum_lock = false;
+		require_vacuum_lock = require_vacuum_lock || type.InsertData();
+		require_vacuum_lock = require_vacuum_lock || type.DeleteData();
+
+		if (require_vacuum_lock) {
+			vacuum_lock = GetTransactionManager().SharedVacuumLock();
+		}
 	}
 }
 
 unique_ptr<StorageLockKey> DuckTransaction::TryGetCheckpointLock() {
-	if (!write_lock) {
+	if (!checkpoint_lock) {
 		return GetTransactionManager().TryGetCheckpointLock();
 	} else {
-		return GetTransactionManager().TryUpgradeCheckpointLock(*write_lock);
+		return GetTransactionManager().TryUpgradeCheckpointLock(*checkpoint_lock);
 	}
 }
 
diff --git a/src/duckdb/src/transaction/duck_transaction_manager.cpp b/src/duckdb/src/transaction/duck_transaction_manager.cpp
index 49128adee..29d2fbde3 100644
--- a/src/duckdb/src/transaction/duck_transaction_manager.cpp
+++ b/src/duckdb/src/transaction/duck_transaction_manager.cpp
@@ -267,6 +267,14 @@ unique_ptr<StorageLockKey> DuckTransactionManager::TryGetCheckpointLock() {
 	return checkpoint_lock.TryGetExclusiveLock();
 }
 
+unique_ptr<StorageLockKey> DuckTransactionManager::SharedVacuumLock() {
+	return vacuum_lock.GetSharedLock();
+}
+
+unique_ptr<StorageLockKey> DuckTransactionManager::TryGetVacuumLock() {
+	return vacuum_lock.TryGetExclusiveLock();
+}
+
 transaction_t DuckTransactionManager::GetCommitTimestamp() {
 	return current_start_timestamp++;
 }
diff --git a/src/duckdb/src/transaction/undo_buffer.cpp b/src/duckdb/src/transaction/undo_buffer.cpp
index 8adb8e2de..29f036934 100644
--- a/src/duckdb/src/transaction/undo_buffer.cpp
+++ b/src/duckdb/src/transaction/undo_buffer.cpp
@@ -181,7 +181,7 @@ void UndoBuffer::Cleanup(transaction_t lowest_active_transaction) {
 	//      the chunks)
 	//  (2) there is no active transaction with start_id < commit_id of this
 	//  transaction
-	CleanupState state(QueryContext(), lowest_active_transaction, active_transaction_state);
+	CleanupState state(transaction, lowest_active_transaction, active_transaction_state);
 	UndoBuffer::IteratorState iterator_state;
 	IterateEntries(iterator_state, [&](UndoFlags type, data_ptr_t data) { state.CleanupEntry(type, data); });
 }
diff --git a/src/duckdb/third_party/fsst/libfsst.cpp b/src/duckdb/third_party/fsst/libfsst.cpp
index b8394a446..62d89702f 100644
--- a/src/duckdb/third_party/fsst/libfsst.cpp
+++ b/src/duckdb/third_party/fsst/libfsst.cpp
@@ -18,21 +18,23 @@
 #include "libfsst.hpp"
 #include "duckdb/common/unique_ptr.hpp"
 
+namespace libfsst {
 Symbol concat(Symbol a, Symbol b) {
 	Symbol s;
 	u32 length = a.length()+b.length();
 	if (length > Symbol::maxLength) length = Symbol::maxLength;
 	s.set_code_len(FSST_CODE_MASK, length);
-	s.val.num = (b.val.num << (8*a.length())) | a.val.num;
+	s.store_num((b.load_num() << (8*a.length())) | a.load_num());
 	return s;
 }
+}  // namespace libfsst
 
 namespace std {
 template <>
-class hash<QSymbol> {
-public:
-	size_t operator()(const QSymbol& q) const {
-		uint64_t k = q.symbol.val.num;
+class hash<libfsst::QSymbol> {
+	public:
+	size_t operator()(const libfsst::QSymbol& q) const {
+		uint64_t k = q.symbol.load_num();
 		const uint64_t m = 0xc6a4a7935bd1e995;
 		const int r = 47;
 		uint64_t h = 0x8445d61a4e774912 ^ (8*m);
@@ -49,6 +51,7 @@ class hash<QSymbol> {
 };
 }
 
+namespace libfsst {
 bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; }
 
 std::ostream& operator<<(std::ostream& out, const Symbol& s) {
@@ -57,7 +60,7 @@ std::ostream& operator<<(std::ostream& out, const Symbol& s) {
 	return out;
 }
 
-SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[], bool zeroTerminated=false) {
+SymbolTable *buildSymbolTable(Counters& counters, vector<const u8*> line, const size_t len[], bool zeroTerminated=false) {
 	SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable();
 	int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception)
 	size_t sampleFrac = 128;
@@ -70,8 +73,8 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 		u16 byteHisto[256];
 		memset(byteHisto, 0, sizeof(byteHisto));
 		for(size_t i=0; i<line.size(); i++) {
-			u8* cur = line[i];
-			u8* end = cur + len[i];
+			const u8* cur = line[i];
+			const u8* end = cur + len[i];
 			while(cur < end) byteHisto[*cur++]++;
 		}
 		u32 minSize = FSST_SAMPLEMAXSZ, i = st->terminator = 256;
@@ -91,15 +94,14 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 		int gain = 0;
 
 		for(size_t i=0; i<line.size(); i++) {
-			u8* cur = line[i];
-			u8* end = cur + len[i];
+			const u8* cur = line[i], *start = cur;
+			const u8* end = cur + len[i];
 
 			if (sampleFrac < 128) {
 				// in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x)
 				if (rnd128(i) > sampleFrac) continue;
 			}
 			if (cur < end) {
-				u8* start = cur;
 				u16 code2 = 255, code1 = st->findLongestSymbol(cur, end);
 				cur += st->symbols[code1].length();
 				gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1)));
@@ -124,7 +126,7 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 						Symbol s = st->hashTab[idx];
 						code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK;
 						word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
-						if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) {
+						if ((s.icl < FSST_ICL_FREE) & (s.load_num() == word)) {
 							code2 = s.code();
 							cur += s.length();
 						} else if (code2 >= FSST_CODE_BASE) {
@@ -188,10 +190,11 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 			addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1);
 
 			if (sampleFrac >= 128 || // last round we do not create new (combined) symbols
-			    s1.length() == Symbol::maxLength || // symbol cannot be extended
-			    s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
+				s1.length() == Symbol::maxLength || // symbol cannot be extended
+				s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte
 				continue;
 			}
+
 			for (u32 pos2=0; pos2<FSST_CODE_BASE+(size_t)st->nSymbols; pos2++) {
 				u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!!
 				if (!cnt2) continue;
@@ -205,7 +208,7 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 		}
 
 		// insert candidates into priority queue (by gain)
-		auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); };
+		auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.load_num() > q2.symbol.load_num()); };
 		priority_queue<QSymbol,vector<QSymbol>,decltype(cmpGn)> pq(cmpGn);
 		for (auto& q : cands)
 			pq.push(q);
@@ -244,11 +247,11 @@ SymbolTable *buildSymbolTable(Counters& counters, vector<u8*> line, size_t len[]
 
 // optimized adaptive *scalar* compression method
 static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_t lenIn[], u8* strIn[], size_t size, u8* out, size_t lenOut[], u8* strOut[], bool noSuffixOpt, bool avoidBranch) {
-	u8 *cur = NULL, *end =  NULL, *lim = out + size;
+	const u8 *cur = NULL, *end =  NULL, *lim = out + size;
 	size_t curLine, suffixLim = symbolTable.suffixLim;
 	u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0];
 
-	u8 buf[512+7] = {}; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
+	u8 buf[512+8] = {}; /* +8 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */
 
 	// three variants are possible. dead code falls away since the bool arguments are constants
 	auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) {
@@ -264,7 +267,7 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
 				Symbol s = symbolTable.hashTab[idx];
 				out[1] = (u8) word; // speculatively write out escaped byte
 				word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
-				if ((s.icl < FSST_ICL_FREE) && s.val.num == word) {
+				if ((s.icl < FSST_ICL_FREE) && s.load_num() == word) {
 					*out++ = (u8) s.code(); cur += s.length();
 				} else if (avoidBranch) {
 					// could be a 2-byte or 1-byte code, or miss
@@ -320,19 +323,20 @@ static inline size_t compressBulk(SymbolTable &symbolTable, size_t nlines, size_
 #define FSST_SAMPLELINE ((size_t) 512)
 
 // quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes
-vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t *lenIn, size_t nlines,
+vector<const u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t *lenIn, size_t nlines,
                                                     duckdb::unique_ptr<vector<size_t>>& sample_len_out) {
 	size_t totSize = 0;
-	vector<u8*> sample;
+	vector<const u8*> sample;
 
 	for(size_t i=0; i<nlines; i++)
 		totSize += lenIn[i];
+
 	if (totSize < FSST_SAMPLETARGET) {
 		for(size_t i=0; i<nlines; i++)
 			sample.push_back(strIn[i]);
 	} else {
 		size_t sampleRnd = FSST_HASH(4637947);
-		u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
+		const u8* sampleLim = sampleBuf + FSST_SAMPLETARGET;
 
 		sample_len_out = duckdb::unique_ptr<vector<size_t>>(new vector<size_t>());
 		sample_len_out->reserve(nlines + FSST_SAMPLEMAXSZ/FSST_SAMPLELINE);
@@ -365,9 +369,9 @@ vector<u8*> makeSample(u8* sampleBuf, u8* strIn[], size_t *lenIn, size_t nlines,
 extern "C" duckdb_fsst_encoder_t* duckdb_fsst_create(size_t n, size_t lenIn[], u8 *strIn[], int zeroTerminated) {
 	u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ];
 	duckdb::unique_ptr<vector<size_t>> sample_sizes;
-	vector<u8*> sample = makeSample(sampleBuf, strIn, lenIn, n?n:1, sample_sizes); // careful handling of input to get a right-size and representative sample
+	vector<const u8*> sample = makeSample(sampleBuf, strIn, lenIn, n?n:1, sample_sizes); // careful handling of input to get a right-size and representative sample
 	Encoder *encoder = new Encoder();
-	size_t* sampleLen = sample_sizes ? sample_sizes->data() : &lenIn[0];
+	const size_t* sampleLen = sample_sizes ? sample_sizes->data() : &lenIn[0];
 	encoder->symbolTable = shared_ptr<SymbolTable>(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated));
 	delete[] sampleBuf;
 	return (duckdb_fsst_encoder_t*) encoder;
@@ -403,6 +407,8 @@ extern "C" u32 duckdb_fsst_export(duckdb_fsst_encoder_t *encoder, u8 *buf) {
 	              (((u64) e->symbolTable->nSymbols) << 8) |
 	              FSST_ENDIAN_MARKER; // least significant byte is nonzero
 
+	version = swap64_if_be(version); // ensure version is little-endian encoded
+
 	/* do not assume unaligned reads here */
 	memcpy(buf, &version, 8);
 	buf[8] = e->symbolTable->zeroTerminated;
@@ -427,6 +433,8 @@ extern "C" u32 duckdb_fsst_import(duckdb_fsst_decoder_t *decoder, u8 *buf) {
 
 	// version field (first 8 bytes) is now there just for future-proofness, unused still (skipped)
 	memcpy(&version, buf, 8);
+	version = swap64_if_be(version); // version is always little-endian encoded
+
 	if ((version>>32) != FSST_VERSION) return 0;
 	decoder->zeroTerminated = buf[8]&1;
 	memcpy(lenHisto, buf+9, 8);
@@ -481,7 +489,9 @@ inline size_t _compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn
 size_t compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) {
 	return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd);
 }
+}  // namespace libfsst
 
+using namespace libfsst;
 // the main compression function (everything automatic)
 extern "C" size_t duckdb_fsst_compress(duckdb_fsst_encoder_t *encoder, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) {
 	// to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB)
diff --git a/src/duckdb/third_party/fsst/libfsst.hpp b/src/duckdb/third_party/fsst/libfsst.hpp
index 0d556386a..fd33ce581 100644
--- a/src/duckdb/third_party/fsst/libfsst.hpp
+++ b/src/duckdb/third_party/fsst/libfsst.hpp
@@ -37,16 +37,17 @@ using namespace std;
 #include "fsst.h" // the official FSST API -- also usable by C mortals
 
 /* unsigned integers */
+namespace libfsst {
 typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
+}  // namespace libfsst
 
-inline uint64_t fsst_unaligned_load(u8 const* V) {
-	uint64_t Ret;
-	memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible)
-	return Ret;
-}
+#if UINTPTR_MAX == 0xffffffffU
+// We're on a 32-bit platform
+#define NONOPT_FSST
+#endif
 
 #define FSST_ENDIAN_MARKER ((u64) 1)
 #define FSST_VERSION_20190218 20190218
@@ -63,6 +64,29 @@ inline uint64_t fsst_unaligned_load(u8 const* V) {
 #define FSST_CODE_MAX       (1UL<<FSST_CODE_BITS) /* all bits set: indicating a symbol that has not been assigned a code yet */
 #define FSST_CODE_MASK      (FSST_CODE_MAX-1UL)   /* all bits set: indicating a symbol that has not been assigned a code yet */
 
+namespace libfsst {
+constexpr inline uint64_t swap64_if_be(uint64_t v) noexcept {
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	#if defined(__clang__) || defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+		return __builtin_bswap64(v);
+	#elif
+		return (v&0xff00000000000000ull) >> 56) | (v&0x00ff000000000000ull) >> 40 |  \
+				(v&0x0000ff0000000000ull) >> 24 | (v&0x000000ff00000000ull) >> 8 |   \
+				(v&0x00000000ff000000ull) << 8 | (v&0x0000000000ff0000ull) << 24 |   \
+				(v&0x000000000000ff00ull) << 40 | (v&0x00000000000000ffull) << 56
+
+	#endif
+#else
+    return v; // little-endian (or unknown), so no swap needed
+#endif
+}
+
+inline uint64_t fsst_unaligned_load(u8 const* V) {
+    uint64_t Ret;
+    memcpy(&Ret, V, sizeof(uint64_t)); // compiler will generate efficient code (unaligned load, where possible)
+    return swap64_if_be(Ret);
+}
+
 struct Symbol {
    static const unsigned maxLength = 8;
 
@@ -74,9 +98,9 @@ struct Symbol {
 
    Symbol() : icl(0) { val.num = 0; }
 
-   explicit Symbol(u8 c, u16 code) : icl((1<<28)|(code<<16)|56) { val.num = c; } // single-char symbol
+   explicit Symbol(u8 c, u16 code) : icl((1<<28)|(code<<16)|56) { store_num(c); } // single-char symbol
    explicit Symbol(const char* begin, const char* end) : Symbol(begin, (u32) (end-begin)) {}
-   explicit Symbol(u8* begin, u8* end) : Symbol((const char*)begin, (u32) (end-begin)) {}
+   explicit Symbol(const u8* begin, const u8* end) : Symbol((const char*)begin, (u32) (end-begin)) {}
    explicit Symbol(const char* input, u32 len) {
       val.num = 0;
       if (len>=8) {
@@ -89,18 +113,21 @@ struct Symbol {
    }
    void set_code_len(u32 code, u32 len) { icl = (len<<28)|(code<<16)|((8-len)*8); }
 
+   u64 load_num() const { return swap64_if_be(val.num); }
+   void store_num(u64 v) { val.num = swap64_if_be(v); }
+
    u32 length() const { return (u32) (icl >> 28); }
    u16 code() const { return (icl >> 16) & FSST_CODE_MASK; }
    u32 ignoredBits() const { return (u32) icl; }
 
-   u8 first() const { assert( length() >= 1); return 0xFF & val.num; }
-   u16 first2() const { assert( length() >= 2); return 0xFFFF & val.num; }
+   u8 first() const { assert( length() >= 1); return 0xFF & load_num(); }
+   u16 first2() const { assert( length() >= 2); return 0xFFFF & load_num(); }
 
 #define FSST_HASH_LOG2SIZE 10 
 #define FSST_HASH_PRIME 2971215073LL
 #define FSST_SHIFT 15
 #define FSST_HASH(w) (((w)*FSST_HASH_PRIME)^(((w)*FSST_HASH_PRIME)>>FSST_SHIFT))
-   size_t hash() const { size_t v = 0xFFFFFF & val.num; return FSST_HASH(v); } // hash on the next 3 bytes
+   size_t hash() const { size_t v = 0xFFFFFF & load_num(); return FSST_HASH(v); } // hash on the next 3 bytes
 };
 
 // Symbol that can be put in a queue, ordered on gain
@@ -117,7 +144,7 @@ struct QSymbol{
 // two phases of compression, before and after optimize():
 //
 // (1) to encode values we probe (and maintain) three datastructures:
-// - u16 byteCodes[65536] array at the position of the next byte  (s.length==1)
+// - u16 byteCodes[256] array at the position of the next byte  (s.length==1)
 // - u16 shortCodes[65536] array at the position of the next twobyte pattern (s.length==2)
 // - Symbol hashtable[1024] (keyed by the next three bytes, ie for s.length>2), 
 // this search will yield a u16 code, it points into Symbol symbols[]. You always find a hit, because the first 256 codes are 
@@ -215,7 +242,7 @@ struct SymbolTable {
       bool taken = (hashTab[idx].icl < FSST_ICL_FREE);
       if (taken) return false; // collision in hash table
       hashTab[idx].icl = s.icl;
-      hashTab[idx].val.num = s.val.num & (0xFFFFFFFFFFFFFFFF >> (u8) s.icl);
+      hashTab[idx].store_num(s.load_num() & (0xFFFFFFFFFFFFFFFF >> (u8) s.icl));
       return true;
    }
    bool add(Symbol s) {
@@ -236,8 +263,8 @@ struct SymbolTable {
    /// Find longest expansion, return code (= position in symbol table)
    u16 findLongestSymbol(Symbol s) const {
       size_t idx = s.hash() & (hashTabSize-1);
-      if (hashTab[idx].icl <= s.icl && hashTab[idx].val.num == (s.val.num & (0xFFFFFFFFFFFFFFFF >> ((u8) hashTab[idx].icl)))) {
-         return (hashTab[idx].icl>>16) & FSST_CODE_MASK; // matched a long symbol 
+      if (hashTab[idx].icl <= s.icl && hashTab[idx].load_num() == (s.load_num() & (0xFFFFFFFFFFFFFFFF >> ((u8) hashTab[idx].icl)))) {
+         return (hashTab[idx].icl>>16) & FSST_CODE_MASK; // matched a long symbol
       }
       if (s.length() >= 2) {
          u16 code =  shortCodes[s.first2()] & FSST_CODE_MASK;
@@ -245,7 +272,7 @@ struct SymbolTable {
       }
       return byteCodes[s.first()] & FSST_CODE_MASK;
    }
-   u16 findLongestSymbol(u8* cur, u8* end) const {
+   u16 findLongestSymbol(const u8* cur, const u8* end) const {
       return findLongestSymbol(Symbol(cur,end)); // represent the string as a temporary symbol
    }
 
@@ -380,7 +407,7 @@ struct Counters {
    }
    u32 count1GetNext(u32 &pos1) { // note: we will advance pos1 to the next nonzero counter in register range
       // read 16-bits single symbol counter, split into two 8-bits numbers (count1Low, count1High), while skipping over zeros
-	   u64 high = fsst_unaligned_load(&count1High[pos1]);
+      u64 high = fsst_unaligned_load(&count1High[pos1]); // note: this reads 8 subsequent counters [pos1..pos1+7]
 
       u32 zero = high?(__builtin_ctzll(high)>>3):7UL; // number of zero bytes
       high = (high >> (zero << 3)) & 255; // advance to nonzero counter
@@ -393,7 +420,7 @@ struct Counters {
    }
    u32 count2GetNext(u32 pos1, u32 &pos2) { // note: we will advance pos2 to the next nonzero counter in register range
       // read 12-bits pairwise symbol counter, split into low 8-bits and high 4-bits number while skipping over zeros
-	  u64 high = fsst_unaligned_load(&count2High[pos1][pos2>>1]);
+      u64 high = fsst_unaligned_load(&count2High[pos1][pos2>>1]); // note: this reads 16 subsequent counters [pos2..pos2+15]
       high >>= ((pos2&1) << 2); // odd pos2: ignore the lowest 4 bits & we see only 15 counters
 
       u32 zero = high?(__builtin_ctzll(high)>>2):(15UL-(pos2&1UL)); // number of zero 4-bits counters
@@ -434,5 +461,6 @@ struct SIMDjob {
 };
 
 // C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree)
-size_t compressImpl(Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
-size_t compressAuto(Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], int simd);
+size_t compressImpl(Encoder *encoder, size_t n, const size_t lenIn[], const u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
+size_t compressAuto(Encoder *encoder, size_t n, const size_t lenIn[], const u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], int simd);
+}  // namespace libfsst
diff --git a/src/duckdb/ub_src_optimizer.cpp b/src/duckdb/ub_src_optimizer.cpp
index 0cbee13d3..89c5e7a6c 100644
--- a/src/duckdb/ub_src_optimizer.cpp
+++ b/src/duckdb/ub_src_optimizer.cpp
@@ -12,6 +12,8 @@
 
 #include "src/optimizer/compressed_materialization.cpp"
 
+#include "src/optimizer/count_window_elimination.cpp"
+
 #include "src/optimizer/cse_optimizer.cpp"
 
 #include "src/optimizer/cte_filter_pusher.cpp"