From 0e3876d5e423988ad4752a4d4bfd5cb486bdc3b2 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Sat, 27 Jun 2026 20:21:05 +0200
Subject: [PATCH] buzhash64: add FastCDC-style normalized chunking

Normalized chunking switches between a stricter and a looser cut mask
around the target chunk size. This greatly tightens the chunk-size
distribution (coefficient of variation ~0.9 -> ~0.3 in tests) and removes
the dedup-hostile max-size-clamped chunks, with unchanged deduplication.

chunker-params for buzhash64 gains a required 6th field, nc_level:

  buzhash64,chunk_min,chunk_max,chunk_mask,window_size,nc_level

Use nc_level=2 for the new default, nc_level=0 to disable (then behavior
is byte-identical to the previous single-mask chunker).

buzhash (32bit) is untouched and stays bit-compatible with borg 1.x.

The mask transition point (normal_size) defaults to a principled formula
(target minus the expected loose-phase tail) so the mean stays near the
target; it can be tuned via the normal_size constructor arg.

scripts/chunker_bench.py: evidence harness used to measure chunk-size
distribution, dedup ratio, throughput and shift-resilience.

Measurements (before = nc_level 0, after = nc_level 2; both at the default
params buzhash64,19,23,21,4095; measured with scripts/chunker_bench.py):

5 GiB of incompressible data (~2000-2700 chunks, statistically stable):

  before:  CV 0.739,  49 max-size-clamped (8 MiB) chunks,   953 MB/s
  after:   CV 0.311,   0 max-size-clamped chunks,          1024 MB/s

Re-backup of a 2.5 GiB file after a few scattered single-byte edits
(deduplication ratio; 0.5 = v2 fully deduplicated against v1, lower is
better):

   64 edits:  before 0.5424  ->  after 0.5235
  320 edits:  before 0.6791  ->  after 0.6142

Normalized chunking deduplicates better after edits: removing the
max-size-clamped chunks means a single-byte change invalidates much less
data (about 36% less dedup overhead at 320 edits). Throughput was also
consistently higher with nc_level=2 at this scale.

Also: fix bug when computing the mask, one needs to use 1ULL instead of
1, so the shifting computation is done in a uint64, not in a 32bit int.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 docs/changes.rst                              |   8 +
 docs/usage/transfer.rst                       |   2 +-
 scripts/chunker_bench.py                      | 321 ++++++++++++++++++
 src/borg/archiver/benchmark_cmd.py            |  10 +-
 src/borg/archiver/completion_cmd.py           |   2 +-
 src/borg/chunkers/__init__.py                 |   4 +-
 src/borg/chunkers/buzhash64.pyx               |  59 +++-
 src/borg/constants.py                         |   6 +-
 src/borg/helpers/parseformat.py               |  15 +-
 src/borg/testsuite/archiver/list_cmd_test.py  |   2 +-
 src/borg/testsuite/chunkers/buzhash64_test.py |   6 +-
 11 files changed, 414 insertions(+), 21 deletions(-)
 create mode 100644 scripts/chunker_bench.py

diff --git a/docs/changes.rst b/docs/changes.rst
index 566c3d4610..8caa0d0e8b 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -168,6 +168,14 @@ above.
 
 New features:
 
+- buzhash64 chunker: add FastCDC-style normalized chunking and enable it by default
+  (``nc_level=2``). It switches between a stricter and a looser cut mask around the target
+  chunk size, which greatly tightens the chunk-size distribution (chunk-size variance /
+  coefficient of variation roughly cut by ~60% in tests) and removes the dedup-hostile
+  max-size-clamped chunks, at negligible throughput cost and with unchanged deduplication.
+  ``chunker-params`` for buzhash64 gains a required 6th field ``nc_level``
+  (``buzhash64,chunk_min,chunk_max,chunk_mask,window_size,nc_level``).
+  buzhash (32bit) is unchanged and stays bit-compatible with borg 1.x.
 - repo-create: split ``--encryption`` into orthogonal options. ``--encryption`` now
   selects only the cipher / AE algorithm (``none``, ``authenticated``, ``aes256-ocb``
   or ``chacha20-poly1305``), the new ``--id-hash`` selects the id hash function
diff --git a/docs/usage/transfer.rst b/docs/usage/transfer.rst
index 22e729debe..86c5716bde 100644
--- a/docs/usage/transfer.rst
+++ b/docs/usage/transfer.rst
@@ -55,7 +55,7 @@ locations and passphrases first:
     # The AEAD cipher does not matter (everything must be re-encrypted and
     # re-authenticated anyway); you could also choose -e chacha20-poly1305 -i blake3.
     $ borg repo-create -e aes256-ocb -i blake3
-    $ export CHUNKER_PARAMS="buzhash64,19,23,21,4095"
+    $ export CHUNKER_PARAMS="buzhash64,19,23,21,4095,2"
 
     # 2. Check what and how much it would transfer:
     $ borg transfer --from-borg1 --chunker-params=$CHUNKER_PARAMS --dry-run
diff --git a/scripts/chunker_bench.py b/scripts/chunker_bench.py
new file mode 100644
index 0000000000..a90b66a04c
--- /dev/null
+++ b/scripts/chunker_bench.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+"""
+buzhash64 chunker evaluation harness.
+
+Purpose
+-------
+Establish an *evidence baseline* for the current buzhash64 chunker (and buzhash32
+for reference) so that any future change to buzhash64 can be judged against real
+numbers instead of intuition.
+
+It measures, for a given chunker config and corpus:
+
+  * chunk-size distribution: count, mean, stddev, coefficient of variation (CV),
+    and how many chunks were clamped at min_size / max_size,
+  * deduplication ratio: unique-chunk-bytes / total-bytes (lower is better dedup),
+  * throughput in MB/s,
+  * shift resilience: re-chunk a mutated copy (bytes inserted/deleted at random
+    offsets) and report what fraction of chunks (by content) survive. This is the
+    property content-defined chunking exists for; size-distribution changes can
+    help or hurt it, so we must watch it.
+
+Corpora
+-------
+  --path FILE_OR_DIR   use real data (a dir is concatenated, file order sorted)
+  --synthetic random:N        N bytes of os.urandom (incompressible, worst case)
+  --synthetic lcg:N           N bytes of a cheap LCG stream (deterministic)
+  --synthetic textish:N       N bytes of low-entropy, repetitive ascii-ish data
+
+Examples
+--------
+  python scripts/chunker_bench.py --synthetic lcg:67108864
+  python scripts/chunker_bench.py --path /usr/lib --max-bytes 268435456
+  python scripts/chunker_bench.py --path ./some.tar --algo buzhash64 buzhash
+
+This script imports the *compiled* borg chunkers, so build borg first.
+It does not modify borg in any way; it is a measurement tool only.
+"""
+
+import argparse
+import hashlib
+import os
+import random
+import statistics
+import sys
+import time
+from io import BytesIO
+
+from borg.chunkers import get_chunker
+from borg.constants import CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE
+
+
+def gen_synthetic(spec):
+    kind, _, rest = spec.partition(":")
+    if kind == "versioned":
+        # parsed below from the full spec (it has two numeric fields)
+        n = 0
+    else:
+        n = int(rest)
+    if kind == "random":
+        return os.urandom(n)
+    if kind == "lcg":
+        a = bytearray(n)
+        x = 1
+        for i in range(n):
+            x = (x * 1103515245 + 12345) & 0x7FFFFFFF
+            a[i] = x & 0xFF
+        return bytes(a)
+    if kind == "versioned":
+        # "versioned:N[:E]" -> corpus = v1 ++ v2, where v2 is v1 with E scattered single-byte
+        # inserts/deletes (default E=64). Models backing up a slightly-changed large file: the
+        # dedup ratio shows how much of v2 is re-deduplicated against v1, which is exactly what
+        # shift-resilient chunk boundaries (and normalized chunking) affect.
+        parts = spec.split(":")
+        n = int(parts[1])
+        edits = int(parts[2]) if len(parts) > 2 else 64
+        v1 = os.urandom(n)
+        v2 = mutate(v1, edits, random.Random(42))
+        corpus = v1 + v2
+        del v1, v2
+        return corpus
+    if kind == "textish":
+        # low-entropy, repetitive: stresses buzhash window cancellation and
+        # tends to produce many min/max-clamped chunks.
+        words = [
+            b"the ",
+            b"quick ",
+            b"brown ",
+            b"fox ",
+            b"jumps ",
+            b"over ",
+            b"lazy ",
+            b"dog ",
+            b"lorem ",
+            b"ipsum ",
+            b"dolor ",
+            b"sit ",
+        ]
+        rng = random.Random(1234)
+        out = bytearray()
+        while len(out) < n:
+            out += rng.choice(words)
+        return bytes(out[:n])
+    raise SystemExit(f"unknown synthetic spec: {spec!r}")
+
+
+def load_path(path, max_bytes):
+    if os.path.isfile(path):
+        with open(path, "rb") as f:
+            return f.read(max_bytes if max_bytes else -1)
+    buf = bytearray()
+    for root, _, files in os.walk(path):
+        for name in sorted(files):
+            fp = os.path.join(root, name)
+            try:
+                with open(fp, "rb") as f:
+                    buf += f.read()
+            except OSError:
+                continue
+            if max_bytes and len(buf) >= max_bytes:
+                return bytes(buf[:max_bytes])
+    return bytes(buf)
+
+
+def chunk_stats(algo, data, min_exp, max_exp, mask_bits, win, nc_level=0, normal_size=0):
+    """Chunk data and return (sizes, hashes, chunking_time) without materializing chunk bytes.
+
+    Memory-lean: only a size (int) and a sha256 digest are kept per chunk, so very large
+    corpora can be processed. key=None -> zero key (deterministic)."""
+    params = [min_exp, max_exp, mask_bits, win]
+    kw = dict(key=None, sparse=False)
+    if algo == "buzhash64":
+        params.append(nc_level)  # nc_level is a positional buzhash64 param
+        kw["normal_size"] = normal_size
+    chunker = get_chunker(algo, *params, **kw)
+    sizes = []
+    hashes = []
+    for c in chunker.chunkify(BytesIO(data)):
+        if c.data is None:  # hole / all-zero alloc chunk
+            n = c.meta["size"]
+            sizes.append(n)
+            hashes.append(hashlib.sha256(b"\0" * n).digest())
+        else:
+            b = c.data
+            sizes.append(len(b))
+            hashes.append(hashlib.sha256(b).digest())
+    return sizes, hashes, getattr(chunker, "chunking_time", 0.0)
+
+
+def mutate(data, n_edits, rng):
+    """Insert and delete a few single bytes at random offsets (boundary shift test)."""
+    b = bytearray(data)
+    for _ in range(n_edits):
+        pos = rng.randrange(len(b))
+        if rng.random() < 0.5:
+            b.insert(pos, rng.randrange(256))
+        else:
+            del b[pos]
+    return bytes(b)
+
+
+def analyze(algo, data, params, shift_edits, rng, nc_level=0, normal_size=0):
+    min_exp, max_exp, mask_bits, win = params
+    min_size, max_size = 1 << min_exp, 1 << max_exp
+
+    t0 = time.monotonic()
+    sizes, hashes, internal_t = chunk_stats(algo, data, *params, nc_level=nc_level, normal_size=normal_size)
+    wall = time.monotonic() - t0
+
+    # drop last chunk for distribution stats (it is a remainder, often < min)
+    dist_sizes = sizes[:-1] if len(sizes) > 1 else sizes
+    total = sum(sizes)
+
+    mean = statistics.fmean(dist_sizes) if dist_sizes else 0
+    stdev = statistics.pstdev(dist_sizes) if len(dist_sizes) > 1 else 0.0
+    cv = (stdev / mean) if mean else 0.0
+    min_clamped = sum(1 for s in dist_sizes if s == min_size)
+    max_clamped = sum(1 for s in dist_sizes if s == max_size)
+
+    # dedup ratio: unique chunk content / total (lower = more dedup)
+    seen = set()
+    unique_bytes = 0
+    for h, n in zip(hashes, sizes):
+        if h not in seen:
+            seen.add(h)
+            unique_bytes += n
+    dedup_ratio = unique_bytes / total if total else 0.0
+
+    # shift resilience: re-chunk a mutated copy, fraction of chunks (by content) that survive
+    shift_survival = None
+    if shift_edits:
+        mutated = mutate(data, shift_edits, rng)
+        _, mhashes, _ = chunk_stats(algo, mutated, *params, nc_level=nc_level, normal_size=normal_size)
+        del mutated
+        orig_set = set(hashes)
+        survived = sum(1 for h in mhashes if h in orig_set)
+        shift_survival = survived / len(mhashes) if mhashes else 0.0
+
+    mb = total / (1024 * 1024)
+    secs = internal_t or wall
+    label = algo if not nc_level else f"{algo}/nc{nc_level}"
+    return {
+        "algo": label,
+        "count": len(sizes),
+        "total_mb": mb,
+        "mean": mean,
+        "stdev": stdev,
+        "cv": cv,
+        "min_clamped": min_clamped,
+        "max_clamped": max_clamped,
+        "min_obs": min(dist_sizes) if dist_sizes else 0,
+        "max_obs": max(dist_sizes) if dist_sizes else 0,
+        "dedup_ratio": dedup_ratio,
+        "throughput_mbps": mb / secs if secs else float("inf"),
+        "shift_survival": shift_survival,
+    }
+
+
+def fmt(r):
+    line = (
+        f"{r['algo']:>13}  "
+        f"n={r['count']:>6}  "
+        f"mean={r['mean']/1024:8.1f}K  "
+        f"stdev={r['stdev']/1024:8.1f}K  "
+        f"CV={r['cv']:5.3f}  "
+        f"min/max-clamp={r['min_clamped']:>4}/{r['max_clamped']:<4}  "
+        f"dedup={r['dedup_ratio']:6.4f}  "
+        f"{r['throughput_mbps']:7.1f} MB/s"
+    )
+    if r["shift_survival"] is not None:
+        line += f"  shift-survive={r['shift_survival']:6.4f}"
+    return line
+
+
+def main():
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    src = ap.add_mutually_exclusive_group(required=True)
+    src.add_argument("--path", help="file or directory to use as corpus")
+    src.add_argument("--synthetic", help="random:N | lcg:N | textish:N")
+    ap.add_argument("--max-bytes", type=int, default=0, help="cap corpus size (0 = no cap)")
+    ap.add_argument(
+        "--algo",
+        nargs="+",
+        default=["buzhash64", "buzhash"],
+        help="chunker algos to compare (default: buzhash64 buzhash)",
+    )
+    ap.add_argument("--min-exp", type=int, default=CHUNK_MIN_EXP)
+    ap.add_argument("--max-exp", type=int, default=CHUNK_MAX_EXP)
+    ap.add_argument("--mask-bits", type=int, default=HASH_MASK_BITS)
+    ap.add_argument("--window", type=int, default=HASH_WINDOW_SIZE)
+    ap.add_argument(
+        "--nc-level",
+        type=int,
+        default=2,
+        help="normalized chunking level for buzhash64; runs nc=0 AND this level (0 to disable)",
+    )
+    ap.add_argument(
+        "--normal-size",
+        type=int,
+        default=0,
+        help="explicit NC transition size in bytes (0 = auto = min_size + 2**mask_bits)",
+    )
+    ap.add_argument(
+        "--shift-edits", type=int, default=8, help="number of random insert/delete edits for shift test (0 to skip)"
+    )
+    ap.add_argument("--repeat", type=int, default=1, help="repeat runs (throughput stability)")
+    ap.add_argument("--seed", type=int, default=0)
+    args = ap.parse_args()
+
+    if args.synthetic:
+        data = gen_synthetic(args.synthetic)
+        corpus_desc = args.synthetic
+    else:
+        data = load_path(args.path, args.max_bytes)
+        corpus_desc = args.path
+    if args.max_bytes:
+        data = data[: args.max_bytes]
+
+    params = (args.min_exp, args.max_exp, args.mask_bits, args.window)
+
+    print(f"corpus: {corpus_desc}  size: {len(data)/(1024*1024):.1f} MiB")
+    print(
+        f"params: min_exp={params[0]} max_exp={params[1]} mask_bits={params[2]} "
+        f"window={params[3]}  (target ~{(1<<params[2])/(1024*1024):.2f} MiB)"
+    )
+    print(f"shift test: {args.shift_edits} edits   repeats: {args.repeat}")
+    print("-" * 118)
+
+    # build (algo, nc_level) variants; for buzhash64 also run the requested NC level
+    variants = []
+    for algo in args.algo:
+        variants.append((algo, 0))
+        if algo == "buzhash64" and args.nc_level > 0:
+            variants.append((algo, args.nc_level))
+
+    for algo, nc in variants:
+        best_tput = 0.0
+        last = None
+        for _ in range(args.repeat):
+            r = analyze(
+                algo,
+                data,
+                params,
+                args.shift_edits,
+                random.Random(args.seed),
+                nc_level=nc,
+                normal_size=args.normal_size,
+            )
+            best_tput = max(best_tput, r["throughput_mbps"])
+            last = r
+        last["throughput_mbps"] = best_tput  # report best (least-noisy) throughput
+        print(fmt(last))
+
+    print("-" * 118)
+    print(
+        "notes: dedup<1.0 only if corpus has duplicate content; CV lower = tighter "
+        "size distribution; shift-survive higher = better."
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/borg/archiver/benchmark_cmd.py b/src/borg/archiver/benchmark_cmd.py
index e448bbef0a..099cac8b36 100644
--- a/src/borg/archiver/benchmark_cmd.py
+++ b/src/borg/archiver/benchmark_cmd.py
@@ -199,8 +199,8 @@ def chunkit(ch):
             ),
             # note: the buzhash64 chunker creation is rather slow, so we must keep it in setup
             (
-                "buzhash64,19,23,21,4095",
-                "ch = get_chunker('buzhash64', 19, 23, 21, 4095, sparse=False)",
+                "buzhash64,19,23,21,4095,2",
+                "ch = get_chunker('buzhash64', 19, 23, 21, 4095, 2, sparse=False)",
                 "chunkit(ch)",
                 locals(),
             ),
@@ -211,7 +211,7 @@ def chunkit(ch):
                 algo, _, algo_params = spec.partition(",")
                 result["chunkers"].append({"algo": algo, "algo_params": algo_params, "size": size, "time": dt})
             else:
-                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
+                print(f"{spec:<26} {format_file_size(size):<10} {dt:.3f}s")
 
         from ..crypto.low_level import hmac_sha256, blake2b_256
         import blake3
@@ -232,7 +232,7 @@ def chunkit(ch):
             if args.json:
                 result["hashes"].append({"algo": spec, "size": size, "time": dt})
             else:
-                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
+                print(f"{spec:<26} {format_file_size(size):<10} {dt:.3f}s")
 
         from ..crypto.low_level import AES256_CTR_BLAKE2b, AES256_CTR_HMAC_SHA256
         from ..crypto.low_level import AES256_OCB, CHACHA20_POLY1305
@@ -272,7 +272,7 @@ def chunkit(ch):
             if args.json:
                 result["encryption"].append({"algo": spec, "size": size, "time": dt})
             else:
-                print(f"{spec:<24} {format_file_size(size):<10} {dt:.3f}s")
+                print(f"{spec:<26} {format_file_size(size):<10} {dt:.3f}s")
 
         if not args.json:
             print("Compression ====================================================")
diff --git a/src/borg/archiver/completion_cmd.py b/src/borg/archiver/completion_cmd.py
index 4b973fafce..1bce0cb15d 100644
--- a/src/borg/archiver/completion_cmd.py
+++ b/src/borg/archiver/completion_cmd.py
@@ -708,7 +708,7 @@ def do_completion(self, args):
         comp_spec_choices_str = " ".join(comp_spec_choices)
 
         # Chunker params choices (static list)
-        chunker_params_choices = ["default", "fixed,4194304", "buzhash,19,23,21,4095", "buzhash64,19,23,21,4095"]
+        chunker_params_choices = ["default", "fixed,4194304", "buzhash,19,23,21,4095", "buzhash64,19,23,21,4095,2"]
         chunker_params_choices_str = " ".join(chunker_params_choices)
 
         # Relative time marker choices (static list)
diff --git a/src/borg/chunkers/__init__.py b/src/borg/chunkers/__init__.py
index dd3376985d..7282b5e8eb 100644
--- a/src/borg/chunkers/__init__.py
+++ b/src/borg/chunkers/__init__.py
@@ -17,7 +17,9 @@ def get_chunker(algo, *params, **kw):
     if algo == "buzhash":
         return Chunker(seed, *params, sparse=sparse)
     if algo == "buzhash64":
-        return ChunkerBuzHash64(bh64_key, *params, sparse=sparse)
+        # params is (chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size, nc_level);
+        # nc_level is passed positionally. normal_size is an optional tuning knob (0 = auto).
+        return ChunkerBuzHash64(bh64_key, *params, normal_size=kw.get("normal_size", 0), sparse=sparse)
     if algo == "fixed":
         return ChunkerFixed(*params, sparse=sparse)
     if algo == "fail":
diff --git a/src/borg/chunkers/buzhash64.pyx b/src/borg/chunkers/buzhash64.pyx
index a3c7bf1101..1a1ce8fc9c 100644
--- a/src/borg/chunkers/buzhash64.pyx
+++ b/src/borg/chunkers/buzhash64.pyx
@@ -109,6 +109,9 @@ cdef class ChunkerBuzHash64:
     It also uses a per-repo random seed to avoid some chunk length fingerprinting attacks.
     """
     cdef uint64_t chunk_mask
+    cdef uint64_t mask_s, mask_l  # normalized chunking: strict / loose masks
+    cdef size_t normal_size       # chunk length at which we switch mask_s -> mask_l
+    cdef int nc_level             # normalized chunking level (0 = disabled)
     cdef uint64_t* table
     cdef uint8_t* data
     cdef object _fd  # Python object for file descriptor
@@ -121,7 +124,7 @@ cdef class ChunkerBuzHash64:
     cdef size_t reader_block_size
     cdef bint sparse
 
-    def __cinit__(self, bytes key, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, bint sparse=False):
+    def __cinit__(self, bytes key, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, int nc_level=0, size_t normal_size=0, bint sparse=False):
         self.table = NULL
         self.data = NULL
         min_size = 1 << chunk_min_exp
@@ -131,8 +134,29 @@ cdef class ChunkerBuzHash64:
         assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
 
         self.window_size = hash_window_size
-        self.chunk_mask = (1 << hash_mask_bits) - 1
+        self.chunk_mask = (1ULL << hash_mask_bits) - 1
         self.min_size = min_size
+        # Normalized chunking (FastCDC-style): use a stricter mask (lower cut probability) until
+        # the chunk reaches its expected/normal size, then a looser mask (higher cut probability).
+        # This concentrates chunk sizes around the target and reduces chunk-size variance.
+        # nc_level == 0 disables it, keeping behavior byte-identical to the single-mask chunker.
+        assert nc_level >= 0
+        assert hash_mask_bits - nc_level >= 1, "nc_level too large for hash_mask_bits"
+        assert hash_mask_bits + nc_level <= 48, "nc_level too large for hash_mask_bits"
+        self.nc_level = nc_level
+        if nc_level:
+            self.mask_s = (1ULL << (hash_mask_bits + nc_level)) - 1
+            self.mask_l = (1ULL << (hash_mask_bits - nc_level)) - 1
+            # normal_size is the chunk length at which we switch from the strict to the loose
+            # mask; it dominates the mean chunk size. The default is the nominal target size
+            # (1ULL << hash_mask_bits) minus the expected loose-phase tail (1ULL << (bits - nc_level)),
+            # which lands the mean close to the target instead of overshooting it. Pass an
+            # explicit normal_size to tune it further.
+            self.normal_size = normal_size if normal_size else ((1ULL << hash_mask_bits) - (1ULL << (hash_mask_bits - nc_level)))
+        else:
+            self.mask_s = self.chunk_mask
+            self.mask_l = self.chunk_mask
+            self.normal_size = 0
         self.table = buzhash64_init_table(key)
         self.buf_size = max_size
         self.data = <uint8_t*>malloc(self.buf_size)
@@ -196,10 +220,14 @@ cdef class ChunkerBuzHash64:
 
     cdef object process(self) except *:
         """Process the chunker's buffer and return the next chunk."""
-        cdef uint64_t sum, chunk_mask = self.chunk_mask
+        cdef uint64_t sum, mask
+        cdef uint64_t mask_s = self.mask_s, mask_l = self.mask_l
+        cdef int nc_level = self.nc_level
         cdef size_t n, old_last, min_size = self.min_size, window_size = self.window_size
+        cdef size_t normal_size = self.normal_size, normal_pos
         cdef uint8_t* p
         cdef uint8_t* stop_at
+        cdef uint8_t* nc_stop
         cdef size_t did_bytes
 
         if self.done:
@@ -232,11 +260,32 @@ cdef class ChunkerBuzHash64:
         self.remaining -= min_size
         sum = _buzhash64(self.data + self.position, window_size, self.table)
 
-        while self.remaining > window_size and (sum & chunk_mask) and not (self.eof and self.remaining <= window_size):
+        # Normalized chunking: pick the mask based on how far we are into the current chunk.
+        # While below normal_size use the strict mask (lower cut probability), afterward the
+        # loose mask (higher cut probability). The mask is re-evaluated at the top of every
+        # iteration, so the transition is honored exactly at normal_pos. When nc is disabled,
+        # mask_s == mask_l == chunk_mask and the normal_pos cap is not applied, so this reduces
+        # to the original single-mask behavior.
+        mask = mask_s
+        normal_pos = 0
+        while True:
+            if nc_level:
+                normal_pos = self.last + normal_size
+                mask = mask_s if self.position < normal_pos else mask_l
+
+            if not (self.remaining > window_size and (sum & mask) and not (self.eof and self.remaining <= window_size)):
+                break
+
             p = self.data + self.position
             stop_at = p + self.remaining - window_size
 
-            while p < stop_at and (sum & chunk_mask):
+            if nc_level and self.position < normal_pos:
+                # do not scan past the strict->loose transition; re-evaluate the mask there
+                nc_stop = self.data + normal_pos
+                if nc_stop < stop_at:
+                    stop_at = nc_stop
+
+            while p < stop_at and (sum & mask):
                 sum = _buzhash64_update(sum, p[0], p[window_size], window_size, self.table)
                 p += 1
 
diff --git a/src/borg/constants.py b/src/borg/constants.py
index 5c88b6b89e..319393ec30 100644
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -111,9 +111,13 @@
 HASH_WINDOW_SIZE = 0xFFF  # 4095 B
 HASH_MASK_BITS = 21  # results in ~2 MiB chunks statistically
 
+# buzhash64-only: normalized chunking level (0 disables it). buzhash (32bit) does not support this
+# and must stay bit-compatible to borg 1.x, so it has no nc_level param.
+NC_LEVEL = 2  # FastCDC-style normalized chunking: tightens chunk-size distribution (much lower variance)
+
 # defaults, use --chunker-params to override
 CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
-CHUNKER64_PARAMS = (CH_BUZHASH64, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
+CHUNKER64_PARAMS = (CH_BUZHASH64, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE, NC_LEVEL)
 
 # chunker params for the items metadata stream, finer granularity
 ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE)
diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py
index b8d9f89a9a..72183885fd 100644
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -304,8 +304,11 @@ def ChunkerParams(s):
         return algo, block_size, header_size
     if algo == "default" and count == 1:  # default
         return CHUNKER_PARAMS
-    if algo == CH_BUZHASH64 and count == 5:  # buzhash64, chunk_min, chunk_max, chunk_mask, window_size
-        chunk_min, chunk_max, chunk_mask, window_size = (int(p) for p in params[1:])
+    if algo == CH_BUZHASH64 and count == 6:
+        # buzhash64, chunk_min, chunk_max, chunk_mask, window_size, nc_level
+        # use nc_level 0 to disable normalized chunking.
+        chunk_min, chunk_max, chunk_mask, window_size = (int(p) for p in params[1:5])
+        nc_level = int(params[5])
         if not (chunk_min <= chunk_mask <= chunk_max):
             raise ArgumentTypeError("required: chunk_min <= chunk_mask <= chunk_max")
         if chunk_min < 6:
@@ -313,8 +316,14 @@ def ChunkerParams(s):
             raise ArgumentTypeError("min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)")
         if chunk_max > 23:
             raise ArgumentTypeError("max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)")
+        # normalized chunking switches the mask at the target size; it needs room below and above
+        # the base mask bits (chunk_mask). nc_level 0 disables it.
+        if not (0 <= nc_level and chunk_mask - nc_level >= 1 and chunk_mask + nc_level <= 48):
+            raise ArgumentTypeError(
+                "required: 0 <= nc_level and 1 <= chunk_mask - nc_level and chunk_mask + nc_level <= 48"
+            )
         # note that for buzhash64, there is no problem with even window_size.
-        return CH_BUZHASH64, chunk_min, chunk_max, chunk_mask, window_size
+        return CH_BUZHASH64, chunk_min, chunk_max, chunk_mask, window_size, nc_level
     # this must stay last as it deals with old-style compat mode (no algorithm, 4 params, buzhash):
     if algo == CH_BUZHASH and count == 5 or count == 4:  # [buzhash, ]chunk_min, chunk_max, chunk_mask, window_size
         chunk_min, chunk_max, chunk_mask, window_size = (int(p) for p in params[count - 4 :])
diff --git a/src/borg/testsuite/archiver/list_cmd_test.py b/src/borg/testsuite/archiver/list_cmd_test.py
index f9bb56e58a..d5ce605890 100644
--- a/src/borg/testsuite/archiver/list_cmd_test.py
+++ b/src/borg/testsuite/archiver/list_cmd_test.py
@@ -250,7 +250,7 @@ def test_fingerprint(archivers, request):
     assert fingerprints1["input/file2"] != fingerprints4["input/file2"]
 
     # Also try with buzhash64
-    cmd(archiver, "create", "--chunker-params=buzhash64,10,23,16,4095", "test5", "input")
+    cmd(archiver, "create", "--chunker-params=buzhash64,10,23,16,4095,2", "test5", "input")
     output = cmd(archiver, "list", "test5", "--format={fingerprint} {path}{NL}")
     fingerprints5 = {}
     for line in output.splitlines():
diff --git a/src/borg/testsuite/chunkers/buzhash64_test.py b/src/borg/testsuite/chunkers/buzhash64_test.py
index 0bbeb4d3d5..9b19448587 100644
--- a/src/borg/testsuite/chunkers/buzhash64_test.py
+++ b/src/borg/testsuite/chunkers/buzhash64_test.py
@@ -110,15 +110,15 @@ def test_fuzz_bh64(worker):
     def rnd_key():
         return os.urandom(32)
 
-    # decompose CHUNKER64_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size)
-    algo, min_exp, max_exp, mask_bits, win_size = CHUNKER64_PARAMS
+    # decompose CHUNKER64_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size, nc_level)
+    algo, min_exp, max_exp, mask_bits, win_size, nc_level = CHUNKER64_PARAMS
     assert algo == CH_BUZHASH64  # default chunker must be buzhash64 here
 
     keys = [b"\0" * 32] + [rnd_key() for _ in range(10)]
     sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
 
     for key in keys:
-        chunker = ChunkerBuzHash64(key, min_exp, max_exp, mask_bits, win_size)
+        chunker = ChunkerBuzHash64(key, min_exp, max_exp, mask_bits, win_size, nc_level)
         for size in sizes:
             # Random data
             data = os.urandom(size)