From c5c51af8319e2d6f9b981689d9abe7d5ad21fdf6 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 29 Jun 2026 11:31:20 +0200 Subject: [PATCH] sort: merge lazily in UTF-8 locales instead of precomputing sort keys In a UTF-8 locale `sort` precomputes a full ICU collation sort key for every line while parsing chunks. The regular sort path needs this to amortize O(n log n) comparisons, but merging only performs O(n log k) comparisons (and none at all when merging a single already-sorted file), so the keys are pure overhead there. Add merge_compare(), which mirrors compare_by() but compares whole-line locale keys lazily with the collator. The merge reader now runs with fast_locale_collation disabled so Line::create no longer computes per-line sort keys, and the merge comparator and dedup use merge_compare. `sort -m /usr/share/dict/words` in a UTF-8 locale goes from ~70ms to ~9ms (10.5x -> 1.32x vs GNU sort), with byte-identical output. Other sort modes and the C locale are unaffected. --- src/uu/sort/src/merge.rs | 14 ++++++++++---- src/uu/sort/src/sort.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/uu/sort/src/merge.rs b/src/uu/sort/src/merge.rs index 85276fce6d8..8900284e82f 100644 --- a/src/uu/sort/src/merge.rs +++ b/src/uu/sort/src/merge.rs @@ -30,7 +30,7 @@ use uucore::error::{FromIo, UResult}; use crate::{ GlobalSettings, Output, SortError, chunks::{self, Chunk, RecycledChunk}, - compare_by, current_open_fd_count, fd_soft_limit, open, + current_open_fd_count, fd_soft_limit, merge_compare, open, tmp_dir::TmpDirWrapper, }; @@ -200,7 +200,13 @@ fn merge_without_limit>>( } let reader_join_handle = thread::spawn({ - let settings = settings.clone(); + // The merge comparator (`merge_compare`) compares whole-line locale keys lazily + // with the ICU collator, so the reader does not need to precompute per-line sort + // keys. Disabling `fast_locale_collation` here turns `Line::create` into a no-op + // for that (whole-line, default) mode and avoids the dominant cost of merging + // already-sorted input. Other modes are unaffected (the flag is already false). + let mut settings = settings.clone(); + settings.precomputed.fast_locale_collation = false; move || { reader( &request_receiver, @@ -332,7 +338,7 @@ impl FileMerger<'_> { let current_line = &contents.lines[file.line_idx]; if settings.unique { if let Some(prev) = &prev { - let cmp = compare_by( + let cmp = merge_compare( &prev.chunk.lines()[prev.line_idx], current_line, settings, @@ -383,7 +389,7 @@ struct FileComparator<'a> { impl Compare for FileComparator<'_> { fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering { - let mut cmp = compare_by( + let mut cmp = merge_compare( &a.current_chunk.lines()[a.line_idx], &b.current_chunk.lines()[b.line_idx], self.settings, diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index e745365591a..53690e0264d 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -2621,6 +2621,37 @@ fn sort_by<'a>(unsorted: &mut Vec>, settings: &GlobalSettings, line_dat } } +/// Comparison used by the merge path. +/// +/// This is result-identical to [`compare_by`], but for the whole-line locale-collation +/// case it compares the two lines lazily with the ICU collator instead of relying on +/// precomputed collation keys. Merging only performs O(n log k) comparisons (and none at +/// all when merging a single file), so computing a full sort key for every line — as the +/// regular sort path does to amortize O(n log n) comparisons — is pure overhead here. +/// Skipping that per-line work (see `merge_without_limit`) is what makes `sort -m` of +/// already-sorted input fast. +pub fn merge_compare<'a>( + a: &Line<'a>, + b: &Line<'a>, + settings: &GlobalSettings, + a_line_data: &LineData<'a>, + b_line_data: &LineData<'a>, +) -> Ordering { + #[cfg(feature = "i18n-collator")] + if settings.precomputed.fast_locale_collation { + // Mirror the `fast_locale_collation` branch of `compare_by`, but compare the line + // bytes directly rather than precomputed keys: `locale_cmp` (ICU `compare_utf8`) + // and the sort-key comparison agree on ordering by construction. + let mut cmp = locale_cmp(a.line, b.line); + if cmp == Ordering::Equal { + // Equal keys for inputs like `01` and `0_1`; fall back to (reversed) byte order. + cmp = b.line.cmp(a.line); + } + return if settings.reverse { cmp.reverse() } else { cmp }; + } + compare_by(a, b, settings, a_line_data, b_line_data) +} + fn compare_by<'a>( a: &Line<'a>, b: &Line<'a>,