Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 128 additions & 70 deletions src/uu/numfmt/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,6 @@
use crate::options::{NumfmtOptions, RoundMethod, TransformOptions};
use crate::units::{DisplayableSuffix, IEC_BASES, RawSuffix, Result, SI_BASES, Suffix, Unit};

/// Iterate over a line's fields, where each field is a contiguous sequence of
/// non-whitespace, optionally prefixed with one or more characters of leading
/// whitespace. Fields are returned as tuples of `(prefix, field)`.
///
/// # Examples:
///
/// ```
/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some(" 1234 5") };
///
/// assert_eq!(Some((" ", "1234")), fields.next());
/// assert_eq!(Some((" ", "5")), fields.next());
/// assert_eq!(None, fields.next());
/// ```
///
/// Delimiters are included in the results; `prefix` will be empty only for
/// the first field of the line (including the case where the input line is
/// empty):
///
/// ```
/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some("first second") };
///
/// assert_eq!(Some(("", "first")), fields.next());
/// assert_eq!(Some((" ", "second")), fields.next());
///
/// let mut fields = uu_numfmt::format::WhitespaceSplitter { s: Some("") };
///
/// assert_eq!(Some(("", "")), fields.next());
/// ```
pub struct WhitespaceSplitter<'a> {
pub s: Option<&'a str>,
}

impl<'a> Iterator for WhitespaceSplitter<'a> {
type Item = (&'a str, &'a str);

/// Yield the next field in the input string as a tuple `(prefix, field)`.
fn next(&mut self) -> Option<Self::Item> {
let haystack = self.s?;

let (prefix, field) = haystack.split_at(
haystack
.find(|c: char| !c.is_whitespace())
.unwrap_or(haystack.len()),
);

let (field, rest) = field.split_at(field.find(char::is_whitespace).unwrap_or(field.len()));

self.s = if rest.is_empty() { None } else { Some(rest) };

Some((prefix, field))
}
}

fn find_numeric_beginning(s: &str) -> Option<&str> {
let mut decimal_point_seen = false;
if s.is_empty() {
Expand Down Expand Up @@ -130,6 +77,9 @@

if valid_part != s && valid_part.parse::<f64>().is_ok() {
return match s.chars().nth(valid_part.len()) {
Some('+' | '-') => {
Some(translate!("numfmt-error-invalid-number", "input" => s.quote()))
}
Some(v) if RawSuffix::try_from(&v).is_ok() => Some(
translate!("numfmt-error-rejecting-suffix", "number" => valid_part, "suffix" => s[valid_part.len()..]),
),
Expand All @@ -146,7 +96,12 @@
None
}

fn parse_suffix(s: &str, unit: Unit, max_whitespace: usize) -> Result<(f64, Option<Suffix>)> {
fn parse_suffix(
s: &str,
unit: Unit,
unit_separator: &str,
explicit_unit_separator: bool,
) -> Result<(f64, Option<Suffix>)> {
let trimmed = s.trim_end();
if trimmed.is_empty() {
return Err(translate!("numfmt-error-invalid-number-empty"));
Expand Down Expand Up @@ -185,23 +140,122 @@
};

let number_part = &trimmed[..trimmed.len() - suffix_len];
let number_trimmed = number_part.trim_end();

// Validate whitespace between number and suffix
if suffix.is_some() {
let whitespace = number_part.len() - number_trimmed.len();
if whitespace > max_whitespace {
return Err(translate!("numfmt-error-invalid-suffix", "input" => s.quote()));
}
let separator_len = if explicit_unit_separator {
if number_part.ends_with(unit_separator) {
unit_separator.len()
} else if unit_separator.is_empty() {
0
} else {
return Err(translate!("numfmt-error-invalid-suffix", "input" => s.quote()));
}
} else {
let number_trimmed = number_part.trim_end();
let whitespace = number_part.len() - number_trimmed.len();
if whitespace > 1 {
return Err(translate!("numfmt-error-invalid-suffix", "input" => s.quote()));
}
whitespace
};

let number = number_part[..number_part.len() - separator_len]
.parse::<f64>()
.map_err(|_| translate!("numfmt-error-invalid-number", "input" => s.quote()))?;

return Ok((number, suffix));
}

let number = number_trimmed
let number = number_part
.parse::<f64>()
.map_err(|_| translate!("numfmt-error-invalid-number", "input" => s.quote()))?;

Ok((number, suffix))
}

fn next_field_index(s: &str) -> usize {
s.find(char::is_whitespace).unwrap_or(s.len())
}

fn split_next_field<'a>(s: &'a str) -> (&'a str, &'a str, &'a str) {

Check failure on line 180 in src/uu/numfmt/src/format.rs

View workflow job for this annotation

GitHub Actions / Style and Lint (ubuntu-24.04, unix)

ERROR: `cargo clippy`: the following explicit lifetimes could be elided: 'a (file:'src/uu/numfmt/src/format.rs', line:180)

Check failure on line 180 in src/uu/numfmt/src/format.rs

View workflow job for this annotation

GitHub Actions / Style and Lint (unix)

ERROR: `cargo clippy`: the following explicit lifetimes could be elided: 'a (file:'src/uu/numfmt/src/format.rs', line:180)
let prefix_len = s.find(|c: char| !c.is_whitespace()).unwrap_or(s.len());
let prefix = &s[..prefix_len];
let field_start = prefix_len;
let field_len = next_field_index(&s[field_start..]);
let field_end = field_start + field_len;
let field = &s[field_start..field_end];
let rest = &s[field_end..];
(prefix, field, rest)
}

/// When an explicit whitespace unit separator is set (e.g. `--unit-separator=" "`),
/// a suffix like "K" may appear as a separate whitespace-delimited field. Detect
/// this case so the caller can merge the suffix back into the preceding number field.
fn split_mergeable_suffix<'a>(s: &'a str, options: &NumfmtOptions) -> Option<(&'a str, &'a str)> {
if !options.explicit_unit_separator
|| options.unit_separator.is_empty()
|| !options.unit_separator.chars().all(char::is_whitespace)
{
return None;
}

if !s.starts_with(&options.unit_separator) {
return None;
}

let (prefix, field, _) = split_next_field(s);
if prefix != options.unit_separator {
return None;
}

match field.len() {
1 => {
let _ = field
.chars()
.next()
.filter(|c| RawSuffix::try_from(c).is_ok())?;
}
2 if field.ends_with('i') => {
let _ = field
.chars()
.next()
.filter(|c| RawSuffix::try_from(c).is_ok())?;
}
_ => return None,
}

Some((prefix, field))
}

struct WhitespaceSplitter<'a, 'b> {
s: Option<&'a str>,
options: &'b NumfmtOptions,
}

impl<'a> Iterator for WhitespaceSplitter<'a, '_> {
type Item = (&'a str, &'a str);

fn next(&mut self) -> Option<Self::Item> {
let haystack = self.s?;
let (prefix, field, rest) = split_next_field(haystack);

if field.is_empty() {
self.s = None;
return Some((prefix, field));
}

if let Some((suffix_prefix, suffix_field)) = split_mergeable_suffix(rest, self.options) {
let merged_len = prefix.len() + field.len() + suffix_prefix.len() + suffix_field.len();
let merged_field = &haystack[prefix.len()..merged_len];
self.s = Some(&haystack[merged_len..]).filter(|rest| !rest.is_empty());
return Some((prefix, merged_field));
}

self.s = Some(rest).filter(|rest| !rest.is_empty());
Some((prefix, field))
}
}

/// Returns the implicit precision of a number, which is the count of digits after the dot. For
/// example, 1.23 has an implicit precision of 2.
fn parse_implicit_precision(s: &str) -> usize {
Expand Down Expand Up @@ -252,9 +306,14 @@
}
}

fn transform_from(s: &str, opts: &TransformOptions, max_whitespace: usize) -> Result<f64> {
let (i, suffix) = parse_suffix(s, opts.from, max_whitespace)
.map_err(|original| detailed_error_message(s, opts.from).unwrap_or(original))?;
fn transform_from(s: &str, opts: &TransformOptions, options: &NumfmtOptions) -> Result<f64> {
let (i, suffix) = parse_suffix(
s,
opts.from,
&options.unit_separator,
options.explicit_unit_separator,
)
.map_err(|original| detailed_error_message(s, opts.from).unwrap_or(original))?;
let i = i * (opts.from_unit as f64);

remove_suffix(i, suffix, opts.from).map(|n| {
Expand Down Expand Up @@ -409,11 +468,7 @@
};

let number = transform_to(
transform_from(
source_without_suffix,
&options.transform,
options.max_whitespace,
)?,
transform_from(source_without_suffix, &options.transform, options)?,
&options.transform,
options.round,
precision,
Expand Down Expand Up @@ -531,7 +586,10 @@
options: &NumfmtOptions,
eol: Option<u8>,
) -> Result<()> {
for (n, (prefix, field)) in (1..).zip(WhitespaceSplitter { s: Some(s) }) {
for (n, (prefix, field)) in (1..).zip(WhitespaceSplitter {
s: Some(s),
options,
}) {
let field_selected = uucore::ranges::contain(&options.fields, n);

if field_selected {
Expand Down
14 changes: 4 additions & 10 deletions src/uu/numfmt/src/numfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,14 +285,8 @@ fn parse_options(args: &ArgMatches) -> Result<NumfmtOptions> {
.cloned()
.unwrap_or_default();

// Max whitespace between number and suffix: length of separator if provided, default one
let max_whitespace = if args.contains_id(UNIT_SEPARATOR)
&& args.value_source(UNIT_SEPARATOR) == Some(ValueSource::CommandLine)
{
unit_separator.len()
} else {
1
};
let explicit_unit_separator = args.contains_id(UNIT_SEPARATOR)
&& args.value_source(UNIT_SEPARATOR) == Some(ValueSource::CommandLine);

let invalid = InvalidModes::from_str(args.get_one::<String>(INVALID).unwrap()).unwrap();

Expand All @@ -309,7 +303,7 @@ fn parse_options(args: &ArgMatches) -> Result<NumfmtOptions> {
round,
suffix,
unit_separator,
max_whitespace,
explicit_unit_separator,
format,
invalid,
zero_terminated,
Expand Down Expand Up @@ -530,7 +524,7 @@ mod tests {
round: RoundMethod::Nearest,
suffix: None,
unit_separator: String::new(),
max_whitespace: 1,
explicit_unit_separator: false,
format: FormatOptions::default(),
invalid: InvalidModes::Abort,
zero_terminated: false,
Expand Down
2 changes: 1 addition & 1 deletion src/uu/numfmt/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub struct NumfmtOptions {
pub round: RoundMethod,
pub suffix: Option<String>,
pub unit_separator: String,
pub max_whitespace: usize,
pub explicit_unit_separator: bool,
pub format: FormatOptions,
pub invalid: InvalidModes,
pub zero_terminated: bool,
Expand Down
23 changes: 23 additions & 0 deletions tests/by-util/test_numfmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ fn test_should_report_invalid_number_with_interior_junk() {
.stderr_is("numfmt: invalid suffix in input: '1x0K'\n");
}

#[test]
fn test_should_report_invalid_number_with_sign_after_decimal() {
new_ucmd!()
.args(&["--", "-0.-1"])
.fails_with_code(2)
.stderr_is("numfmt: invalid number: '-0.-1'\n");
}

#[test]
fn test_should_skip_leading_space_from_stdin() {
new_ucmd!()
Expand Down Expand Up @@ -1237,6 +1245,21 @@ fn test_empty_delimiter_multi_char_unit_separator() {
.stdout_only("1000\n2000000\n3000000000\n");
}

#[test]
fn test_whitespace_mode_parses_custom_unit_separator_inputs() {
new_ucmd!()
.args(&["--from=iec", "--unit-separator=::"])
.pipe_in("4::K\n")
.succeeds()
.stdout_only("4096\n");

new_ucmd!()
.args(&["--from=iec", "--unit-separator=\u{a0}"])
.pipe_in("4\u{a0}K\n")
.succeeds()
.stdout_only("4096\n");
}

#[test]
fn test_empty_delimiter_whitespace_rejection() {
new_ucmd!()
Expand Down
Loading